| { | |
| "best_global_step": 12904, | |
| "best_metric": 0.3503767491926803, | |
| "best_model_checkpoint": "models/combined_finetuned\\checkpoint-12904", | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 12904, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0015499070055796653, | |
| "grad_norm": 18.449249267578125, | |
| "learning_rate": 1.9986050836949784e-05, | |
| "loss": 2.3857, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0030998140111593306, | |
| "grad_norm": 17.223068237304688, | |
| "learning_rate": 1.997055176689399e-05, | |
| "loss": 1.8583, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0046497210167389955, | |
| "grad_norm": 6.169708251953125, | |
| "learning_rate": 1.995505269683819e-05, | |
| "loss": 1.4451, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.006199628022318661, | |
| "grad_norm": 14.2859525680542, | |
| "learning_rate": 1.9939553626782395e-05, | |
| "loss": 1.3793, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.007749535027898326, | |
| "grad_norm": 10.35885238647461, | |
| "learning_rate": 1.9924054556726596e-05, | |
| "loss": 1.3316, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.009299442033477991, | |
| "grad_norm": 15.786426544189453, | |
| "learning_rate": 1.99085554866708e-05, | |
| "loss": 1.5881, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.010849349039057656, | |
| "grad_norm": 8.541648864746094, | |
| "learning_rate": 1.9893056416615006e-05, | |
| "loss": 1.1178, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.012399256044637322, | |
| "grad_norm": 12.486669540405273, | |
| "learning_rate": 1.9877557346559207e-05, | |
| "loss": 1.2329, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.013949163050216987, | |
| "grad_norm": 18.372262954711914, | |
| "learning_rate": 1.9862058276503412e-05, | |
| "loss": 1.1821, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.015499070055796652, | |
| "grad_norm": 14.894574165344238, | |
| "learning_rate": 1.9846559206447617e-05, | |
| "loss": 1.154, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.017048977061376317, | |
| "grad_norm": 10.745121002197266, | |
| "learning_rate": 1.9831060136391818e-05, | |
| "loss": 1.2915, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.018598884066955982, | |
| "grad_norm": 9.22031307220459, | |
| "learning_rate": 1.9815561066336023e-05, | |
| "loss": 1.3442, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.020148791072535647, | |
| "grad_norm": 7.619173526763916, | |
| "learning_rate": 1.9800061996280224e-05, | |
| "loss": 1.2266, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.02169869807811531, | |
| "grad_norm": 7.492969512939453, | |
| "learning_rate": 1.978456292622443e-05, | |
| "loss": 1.2723, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.023248605083694977, | |
| "grad_norm": 8.811915397644043, | |
| "learning_rate": 1.976906385616863e-05, | |
| "loss": 1.0317, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.024798512089274645, | |
| "grad_norm": 8.983158111572266, | |
| "learning_rate": 1.9753564786112835e-05, | |
| "loss": 0.9404, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.02634841909485431, | |
| "grad_norm": 7.216524600982666, | |
| "learning_rate": 1.9738065716057036e-05, | |
| "loss": 1.1478, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.027898326100433975, | |
| "grad_norm": 7.921091079711914, | |
| "learning_rate": 1.972256664600124e-05, | |
| "loss": 1.3151, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.02944823310601364, | |
| "grad_norm": 9.79519271850586, | |
| "learning_rate": 1.9707067575945446e-05, | |
| "loss": 1.0722, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.030998140111593304, | |
| "grad_norm": 6.854232311248779, | |
| "learning_rate": 1.9691568505889647e-05, | |
| "loss": 1.021, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.03254804711717297, | |
| "grad_norm": 5.456936836242676, | |
| "learning_rate": 1.9676069435833852e-05, | |
| "loss": 0.9743, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.034097954122752634, | |
| "grad_norm": 9.073031425476074, | |
| "learning_rate": 1.9660570365778057e-05, | |
| "loss": 1.1088, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.0356478611283323, | |
| "grad_norm": 11.155451774597168, | |
| "learning_rate": 1.9645071295722258e-05, | |
| "loss": 1.1379, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.037197768133911964, | |
| "grad_norm": 6.259587287902832, | |
| "learning_rate": 1.9629572225666463e-05, | |
| "loss": 1.2134, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.03874767513949163, | |
| "grad_norm": 9.289825439453125, | |
| "learning_rate": 1.9614073155610664e-05, | |
| "loss": 0.8888, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.040297582145071294, | |
| "grad_norm": 10.56016731262207, | |
| "learning_rate": 1.959857408555487e-05, | |
| "loss": 0.866, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.04184748915065096, | |
| "grad_norm": 9.807143211364746, | |
| "learning_rate": 1.958307501549907e-05, | |
| "loss": 1.0504, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.04339739615623062, | |
| "grad_norm": 8.206954956054688, | |
| "learning_rate": 1.9567575945443275e-05, | |
| "loss": 0.9964, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.04494730316181029, | |
| "grad_norm": 11.40170955657959, | |
| "learning_rate": 1.9552076875387477e-05, | |
| "loss": 0.9297, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.04649721016738995, | |
| "grad_norm": 12.768455505371094, | |
| "learning_rate": 1.953657780533168e-05, | |
| "loss": 0.9266, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.048047117172969625, | |
| "grad_norm": 7.376617908477783, | |
| "learning_rate": 1.9521078735275886e-05, | |
| "loss": 0.9087, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.04959702417854929, | |
| "grad_norm": 7.627091884613037, | |
| "learning_rate": 1.9505579665220087e-05, | |
| "loss": 0.8817, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.051146931184128955, | |
| "grad_norm": 8.248144149780273, | |
| "learning_rate": 1.9490080595164292e-05, | |
| "loss": 0.8539, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.05269683818970862, | |
| "grad_norm": 8.01970386505127, | |
| "learning_rate": 1.9474581525108497e-05, | |
| "loss": 1.1062, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.054246745195288285, | |
| "grad_norm": 6.326109409332275, | |
| "learning_rate": 1.94590824550527e-05, | |
| "loss": 0.9872, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.05579665220086795, | |
| "grad_norm": 6.3588128089904785, | |
| "learning_rate": 1.9443583384996903e-05, | |
| "loss": 1.0147, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.057346559206447614, | |
| "grad_norm": 5.58894157409668, | |
| "learning_rate": 1.9428084314941104e-05, | |
| "loss": 0.9058, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.05889646621202728, | |
| "grad_norm": 7.436977863311768, | |
| "learning_rate": 1.941258524488531e-05, | |
| "loss": 1.0007, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.060446373217606944, | |
| "grad_norm": 9.496243476867676, | |
| "learning_rate": 1.939708617482951e-05, | |
| "loss": 1.0589, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.06199628022318661, | |
| "grad_norm": 8.498618125915527, | |
| "learning_rate": 1.9381587104773715e-05, | |
| "loss": 1.0345, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.06354618722876627, | |
| "grad_norm": 9.962313652038574, | |
| "learning_rate": 1.9366088034717917e-05, | |
| "loss": 0.889, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.06509609423434594, | |
| "grad_norm": 10.519235610961914, | |
| "learning_rate": 1.935058896466212e-05, | |
| "loss": 1.1431, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.0666460012399256, | |
| "grad_norm": 9.293874740600586, | |
| "learning_rate": 1.9335089894606326e-05, | |
| "loss": 0.842, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.06819590824550527, | |
| "grad_norm": 13.305548667907715, | |
| "learning_rate": 1.9319590824550528e-05, | |
| "loss": 1.0341, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.06974581525108493, | |
| "grad_norm": 10.887805938720703, | |
| "learning_rate": 1.9304091754494732e-05, | |
| "loss": 0.9853, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.0712957222566646, | |
| "grad_norm": 9.665596008300781, | |
| "learning_rate": 1.9288592684438937e-05, | |
| "loss": 0.8416, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.07284562926224426, | |
| "grad_norm": 7.394255638122559, | |
| "learning_rate": 1.927309361438314e-05, | |
| "loss": 0.8716, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.07439553626782393, | |
| "grad_norm": 9.392470359802246, | |
| "learning_rate": 1.9257594544327343e-05, | |
| "loss": 0.8584, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.07594544327340359, | |
| "grad_norm": 7.491386413574219, | |
| "learning_rate": 1.9242095474271545e-05, | |
| "loss": 0.8534, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.07749535027898326, | |
| "grad_norm": 9.354204177856445, | |
| "learning_rate": 1.922659640421575e-05, | |
| "loss": 0.9783, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.07904525728456292, | |
| "grad_norm": 7.941482067108154, | |
| "learning_rate": 1.921109733415995e-05, | |
| "loss": 0.9655, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.08059516429014259, | |
| "grad_norm": 7.988227367401123, | |
| "learning_rate": 1.9195598264104156e-05, | |
| "loss": 0.818, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.08214507129572225, | |
| "grad_norm": 10.701931953430176, | |
| "learning_rate": 1.9180099194048357e-05, | |
| "loss": 0.9184, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.08369497830130192, | |
| "grad_norm": 7.293822765350342, | |
| "learning_rate": 1.916460012399256e-05, | |
| "loss": 0.5997, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.08524488530688158, | |
| "grad_norm": 6.722202301025391, | |
| "learning_rate": 1.9149101053936766e-05, | |
| "loss": 0.8596, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.08679479231246125, | |
| "grad_norm": 12.55391788482666, | |
| "learning_rate": 1.9133601983880968e-05, | |
| "loss": 1.0486, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.08834469931804091, | |
| "grad_norm": 4.803437232971191, | |
| "learning_rate": 1.9118102913825173e-05, | |
| "loss": 0.7763, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.08989460632362058, | |
| "grad_norm": 4.649928569793701, | |
| "learning_rate": 1.9102603843769377e-05, | |
| "loss": 1.0312, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.09144451332920024, | |
| "grad_norm": 10.55102825164795, | |
| "learning_rate": 1.908710477371358e-05, | |
| "loss": 0.9301, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.0929944203347799, | |
| "grad_norm": 5.860504150390625, | |
| "learning_rate": 1.9071605703657783e-05, | |
| "loss": 0.8281, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.09454432734035959, | |
| "grad_norm": 4.4910101890563965, | |
| "learning_rate": 1.9056106633601985e-05, | |
| "loss": 0.8093, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.09609423434593925, | |
| "grad_norm": 16.84994888305664, | |
| "learning_rate": 1.904060756354619e-05, | |
| "loss": 0.9299, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.09764414135151891, | |
| "grad_norm": 12.170949935913086, | |
| "learning_rate": 1.902510849349039e-05, | |
| "loss": 0.6854, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.09919404835709858, | |
| "grad_norm": 10.96451473236084, | |
| "learning_rate": 1.9009609423434596e-05, | |
| "loss": 0.9306, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.10074395536267824, | |
| "grad_norm": 10.7017822265625, | |
| "learning_rate": 1.8994110353378797e-05, | |
| "loss": 0.9565, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.10229386236825791, | |
| "grad_norm": 7.391804218292236, | |
| "learning_rate": 1.8978611283323002e-05, | |
| "loss": 0.7833, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.10384376937383757, | |
| "grad_norm": 12.865453720092773, | |
| "learning_rate": 1.8963112213267207e-05, | |
| "loss": 0.9807, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.10539367637941724, | |
| "grad_norm": 10.577310562133789, | |
| "learning_rate": 1.8947613143211408e-05, | |
| "loss": 0.8834, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.1069435833849969, | |
| "grad_norm": 11.489950180053711, | |
| "learning_rate": 1.8932114073155613e-05, | |
| "loss": 1.0228, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.10849349039057657, | |
| "grad_norm": 2.870493173599243, | |
| "learning_rate": 1.8916615003099818e-05, | |
| "loss": 0.7706, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.11004339739615623, | |
| "grad_norm": 5.9892706871032715, | |
| "learning_rate": 1.890111593304402e-05, | |
| "loss": 0.8552, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.1115933044017359, | |
| "grad_norm": 3.6785945892333984, | |
| "learning_rate": 1.8885616862988224e-05, | |
| "loss": 0.7112, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.11314321140731556, | |
| "grad_norm": 6.439176559448242, | |
| "learning_rate": 1.8870117792932425e-05, | |
| "loss": 0.8313, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.11469311841289523, | |
| "grad_norm": 15.108148574829102, | |
| "learning_rate": 1.885461872287663e-05, | |
| "loss": 0.8033, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.1162430254184749, | |
| "grad_norm": 6.084428310394287, | |
| "learning_rate": 1.883911965282083e-05, | |
| "loss": 0.7643, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.11779293242405456, | |
| "grad_norm": 10.377224922180176, | |
| "learning_rate": 1.8823620582765036e-05, | |
| "loss": 1.0162, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.11934283942963422, | |
| "grad_norm": 9.730420112609863, | |
| "learning_rate": 1.8808121512709237e-05, | |
| "loss": 0.8065, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.12089274643521389, | |
| "grad_norm": 9.038290023803711, | |
| "learning_rate": 1.8792622442653442e-05, | |
| "loss": 0.8185, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.12244265344079355, | |
| "grad_norm": 9.10308837890625, | |
| "learning_rate": 1.8777123372597647e-05, | |
| "loss": 0.9643, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.12399256044637322, | |
| "grad_norm": 7.978387832641602, | |
| "learning_rate": 1.8761624302541848e-05, | |
| "loss": 0.9773, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.12554246745195288, | |
| "grad_norm": 9.240801811218262, | |
| "learning_rate": 1.8746125232486053e-05, | |
| "loss": 1.0035, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.12709237445753255, | |
| "grad_norm": 7.459136962890625, | |
| "learning_rate": 1.8730626162430258e-05, | |
| "loss": 0.898, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.1286422814631122, | |
| "grad_norm": 7.9399027824401855, | |
| "learning_rate": 1.871512709237446e-05, | |
| "loss": 1.061, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.13019218846869188, | |
| "grad_norm": 5.291181564331055, | |
| "learning_rate": 1.8699628022318664e-05, | |
| "loss": 0.8294, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.13174209547427154, | |
| "grad_norm": 5.9574079513549805, | |
| "learning_rate": 1.8684128952262865e-05, | |
| "loss": 1.1741, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.1332920024798512, | |
| "grad_norm": 8.97977352142334, | |
| "learning_rate": 1.866862988220707e-05, | |
| "loss": 1.024, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.13484190948543087, | |
| "grad_norm": 7.710363388061523, | |
| "learning_rate": 1.865313081215127e-05, | |
| "loss": 0.862, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.13639181649101054, | |
| "grad_norm": 12.134010314941406, | |
| "learning_rate": 1.8637631742095476e-05, | |
| "loss": 1.0665, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.1379417234965902, | |
| "grad_norm": 7.517919063568115, | |
| "learning_rate": 1.8622132672039677e-05, | |
| "loss": 0.9495, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.13949163050216987, | |
| "grad_norm": 7.245954990386963, | |
| "learning_rate": 1.8606633601983882e-05, | |
| "loss": 0.7532, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.14104153750774953, | |
| "grad_norm": 7.372646808624268, | |
| "learning_rate": 1.8591134531928087e-05, | |
| "loss": 0.8079, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.1425914445133292, | |
| "grad_norm": 7.447389125823975, | |
| "learning_rate": 1.857563546187229e-05, | |
| "loss": 0.7802, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.14414135151890886, | |
| "grad_norm": 5.043920993804932, | |
| "learning_rate": 1.8560136391816493e-05, | |
| "loss": 0.8062, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.14569125852448853, | |
| "grad_norm": 9.097290992736816, | |
| "learning_rate": 1.8544637321760698e-05, | |
| "loss": 0.8234, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.1472411655300682, | |
| "grad_norm": 6.955564498901367, | |
| "learning_rate": 1.85291382517049e-05, | |
| "loss": 0.951, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.14879107253564786, | |
| "grad_norm": 8.295125961303711, | |
| "learning_rate": 1.8513639181649104e-05, | |
| "loss": 0.6106, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.15034097954122752, | |
| "grad_norm": 6.4834513664245605, | |
| "learning_rate": 1.8498140111593305e-05, | |
| "loss": 0.8087, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.15189088654680719, | |
| "grad_norm": 8.002385139465332, | |
| "learning_rate": 1.848264104153751e-05, | |
| "loss": 0.7515, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.15344079355238685, | |
| "grad_norm": 8.667396545410156, | |
| "learning_rate": 1.846714197148171e-05, | |
| "loss": 0.8659, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.15499070055796652, | |
| "grad_norm": 7.546538352966309, | |
| "learning_rate": 1.8451642901425916e-05, | |
| "loss": 0.9391, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.15654060756354618, | |
| "grad_norm": 10.719348907470703, | |
| "learning_rate": 1.8436143831370118e-05, | |
| "loss": 1.0736, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.15809051456912585, | |
| "grad_norm": 4.889281749725342, | |
| "learning_rate": 1.8420644761314322e-05, | |
| "loss": 0.6875, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.1596404215747055, | |
| "grad_norm": 7.394768238067627, | |
| "learning_rate": 1.8405145691258527e-05, | |
| "loss": 0.8181, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.16119032858028517, | |
| "grad_norm": 8.632201194763184, | |
| "learning_rate": 1.8389646621202732e-05, | |
| "loss": 1.0494, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.16274023558586484, | |
| "grad_norm": 7.193940162658691, | |
| "learning_rate": 1.8374147551146933e-05, | |
| "loss": 0.7158, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.1642901425914445, | |
| "grad_norm": 7.70432710647583, | |
| "learning_rate": 1.8358648481091138e-05, | |
| "loss": 1.0215, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.16584004959702417, | |
| "grad_norm": 5.057873249053955, | |
| "learning_rate": 1.834314941103534e-05, | |
| "loss": 0.6548, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.16738995660260383, | |
| "grad_norm": 6.86895751953125, | |
| "learning_rate": 1.8327650340979544e-05, | |
| "loss": 0.8226, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.1689398636081835, | |
| "grad_norm": 3.7493245601654053, | |
| "learning_rate": 1.8312151270923746e-05, | |
| "loss": 0.8125, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.17048977061376316, | |
| "grad_norm": 6.053397178649902, | |
| "learning_rate": 1.829665220086795e-05, | |
| "loss": 0.7817, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.17203967761934283, | |
| "grad_norm": 7.619532585144043, | |
| "learning_rate": 1.8281153130812152e-05, | |
| "loss": 0.6368, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.1735895846249225, | |
| "grad_norm": 10.370380401611328, | |
| "learning_rate": 1.8265654060756356e-05, | |
| "loss": 0.899, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.17513949163050216, | |
| "grad_norm": 3.4213171005249023, | |
| "learning_rate": 1.8250154990700558e-05, | |
| "loss": 0.5833, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.17668939863608182, | |
| "grad_norm": 8.931166648864746, | |
| "learning_rate": 1.8234655920644763e-05, | |
| "loss": 0.7682, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.1782393056416615, | |
| "grad_norm": 4.954645156860352, | |
| "learning_rate": 1.8219156850588967e-05, | |
| "loss": 0.8337, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.17978921264724115, | |
| "grad_norm": 3.9182803630828857, | |
| "learning_rate": 1.8203657780533172e-05, | |
| "loss": 0.9461, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.18133911965282082, | |
| "grad_norm": 10.957509994506836, | |
| "learning_rate": 1.8188158710477373e-05, | |
| "loss": 1.0823, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.18288902665840048, | |
| "grad_norm": 5.406250476837158, | |
| "learning_rate": 1.8172659640421578e-05, | |
| "loss": 0.8319, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.18443893366398015, | |
| "grad_norm": 9.316621780395508, | |
| "learning_rate": 1.815716057036578e-05, | |
| "loss": 0.8848, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.1859888406695598, | |
| "grad_norm": 7.41352653503418, | |
| "learning_rate": 1.8141661500309984e-05, | |
| "loss": 0.8053, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.1875387476751395, | |
| "grad_norm": 8.243196487426758, | |
| "learning_rate": 1.8126162430254186e-05, | |
| "loss": 0.9792, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.18908865468071917, | |
| "grad_norm": 6.000314712524414, | |
| "learning_rate": 1.811066336019839e-05, | |
| "loss": 0.7051, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.19063856168629884, | |
| "grad_norm": 9.19047737121582, | |
| "learning_rate": 1.8095164290142592e-05, | |
| "loss": 0.7739, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.1921884686918785, | |
| "grad_norm": 4.234779357910156, | |
| "learning_rate": 1.8079665220086797e-05, | |
| "loss": 1.0071, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.19373837569745817, | |
| "grad_norm": 5.595962047576904, | |
| "learning_rate": 1.8064166150030998e-05, | |
| "loss": 0.9207, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.19528828270303783, | |
| "grad_norm": 9.883673667907715, | |
| "learning_rate": 1.8048667079975203e-05, | |
| "loss": 0.7329, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.1968381897086175, | |
| "grad_norm": 4.593166828155518, | |
| "learning_rate": 1.8033168009919408e-05, | |
| "loss": 0.6768, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.19838809671419716, | |
| "grad_norm": 11.609228134155273, | |
| "learning_rate": 1.8017668939863612e-05, | |
| "loss": 1.1065, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.19993800371977682, | |
| "grad_norm": 4.635692596435547, | |
| "learning_rate": 1.8002169869807814e-05, | |
| "loss": 0.568, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.2014879107253565, | |
| "grad_norm": 10.727137565612793, | |
| "learning_rate": 1.798667079975202e-05, | |
| "loss": 0.8751, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.20303781773093615, | |
| "grad_norm": 6.74473237991333, | |
| "learning_rate": 1.797117172969622e-05, | |
| "loss": 0.7213, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.20458772473651582, | |
| "grad_norm": 0.40561366081237793, | |
| "learning_rate": 1.7955672659640425e-05, | |
| "loss": 0.7377, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.20613763174209548, | |
| "grad_norm": 9.337224006652832, | |
| "learning_rate": 1.7940173589584626e-05, | |
| "loss": 0.8402, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.20768753874767515, | |
| "grad_norm": 5.331075668334961, | |
| "learning_rate": 1.792467451952883e-05, | |
| "loss": 0.7356, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.2092374457532548, | |
| "grad_norm": 10.293356895446777, | |
| "learning_rate": 1.7909175449473032e-05, | |
| "loss": 0.8442, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.21078735275883448, | |
| "grad_norm": 3.94999098777771, | |
| "learning_rate": 1.7893676379417237e-05, | |
| "loss": 0.7805, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.21233725976441414, | |
| "grad_norm": 13.504864692687988, | |
| "learning_rate": 1.7878177309361438e-05, | |
| "loss": 0.7697, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.2138871667699938, | |
| "grad_norm": 8.761556625366211, | |
| "learning_rate": 1.7862678239305643e-05, | |
| "loss": 0.8553, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.21543707377557347, | |
| "grad_norm": 9.55759334564209, | |
| "learning_rate": 1.7847179169249848e-05, | |
| "loss": 0.7005, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.21698698078115314, | |
| "grad_norm": 9.412562370300293, | |
| "learning_rate": 1.7831680099194053e-05, | |
| "loss": 0.8092, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.2185368877867328, | |
| "grad_norm": 3.6494250297546387, | |
| "learning_rate": 1.7816181029138254e-05, | |
| "loss": 0.7754, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.22008679479231247, | |
| "grad_norm": 7.303593158721924, | |
| "learning_rate": 1.780068195908246e-05, | |
| "loss": 0.6011, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.22163670179789213, | |
| "grad_norm": 4.854985237121582, | |
| "learning_rate": 1.778518288902666e-05, | |
| "loss": 0.8983, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.2231866088034718, | |
| "grad_norm": 4.855501174926758, | |
| "learning_rate": 1.7769683818970865e-05, | |
| "loss": 0.6847, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.22473651580905146, | |
| "grad_norm": 8.637642860412598, | |
| "learning_rate": 1.7754184748915066e-05, | |
| "loss": 0.6741, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.22628642281463113, | |
| "grad_norm": 11.863778114318848, | |
| "learning_rate": 1.773868567885927e-05, | |
| "loss": 0.9925, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.2278363298202108, | |
| "grad_norm": 7.329986095428467, | |
| "learning_rate": 1.7723186608803472e-05, | |
| "loss": 0.9337, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.22938623682579046, | |
| "grad_norm": 10.348973274230957, | |
| "learning_rate": 1.7707687538747677e-05, | |
| "loss": 0.7293, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.23093614383137012, | |
| "grad_norm": 10.60584831237793, | |
| "learning_rate": 1.769218846869188e-05, | |
| "loss": 0.9099, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.2324860508369498, | |
| "grad_norm": 10.066960334777832, | |
| "learning_rate": 1.7676689398636083e-05, | |
| "loss": 0.8515, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.23403595784252945, | |
| "grad_norm": 8.389772415161133, | |
| "learning_rate": 1.7661190328580288e-05, | |
| "loss": 0.8219, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.23558586484810912, | |
| "grad_norm": 9.143502235412598, | |
| "learning_rate": 1.7645691258524493e-05, | |
| "loss": 0.7401, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.23713577185368878, | |
| "grad_norm": 3.5140132904052734, | |
| "learning_rate": 1.7630192188468694e-05, | |
| "loss": 0.8184, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.23868567885926845, | |
| "grad_norm": 7.025918483734131, | |
| "learning_rate": 1.76146931184129e-05, | |
| "loss": 0.5758, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.2402355858648481, | |
| "grad_norm": 4.971466541290283, | |
| "learning_rate": 1.75991940483571e-05, | |
| "loss": 0.974, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.24178549287042778, | |
| "grad_norm": 3.7477047443389893, | |
| "learning_rate": 1.7583694978301305e-05, | |
| "loss": 0.5627, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.24333539987600744, | |
| "grad_norm": 8.450775146484375, | |
| "learning_rate": 1.7568195908245506e-05, | |
| "loss": 0.5772, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.2448853068815871, | |
| "grad_norm": 5.886590480804443, | |
| "learning_rate": 1.755269683818971e-05, | |
| "loss": 0.6206, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.24643521388716677, | |
| "grad_norm": 8.515731811523438, | |
| "learning_rate": 1.7537197768133912e-05, | |
| "loss": 0.8716, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.24798512089274644, | |
| "grad_norm": 6.910757541656494, | |
| "learning_rate": 1.7521698698078117e-05, | |
| "loss": 0.8909, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.2495350278983261, | |
| "grad_norm": 5.422664642333984, | |
| "learning_rate": 1.750619962802232e-05, | |
| "loss": 0.7547, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.25108493490390577, | |
| "grad_norm": 9.602860450744629, | |
| "learning_rate": 1.7490700557966523e-05, | |
| "loss": 0.8456, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.25263484190948543, | |
| "grad_norm": 6.122979164123535, | |
| "learning_rate": 1.7475201487910728e-05, | |
| "loss": 0.6478, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.2541847489150651, | |
| "grad_norm": 9.506874084472656, | |
| "learning_rate": 1.745970241785493e-05, | |
| "loss": 0.8572, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.25573465592064476, | |
| "grad_norm": 4.676219940185547, | |
| "learning_rate": 1.7444203347799134e-05, | |
| "loss": 0.9653, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.2572845629262244, | |
| "grad_norm": 7.991693496704102, | |
| "learning_rate": 1.7428704277743336e-05, | |
| "loss": 0.5923, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.2588344699318041, | |
| "grad_norm": 4.9383063316345215, | |
| "learning_rate": 1.741320520768754e-05, | |
| "loss": 0.7942, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.26038437693738375, | |
| "grad_norm": 5.993865489959717, | |
| "learning_rate": 1.7397706137631742e-05, | |
| "loss": 0.7397, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.2619342839429634, | |
| "grad_norm": 8.85261344909668, | |
| "learning_rate": 1.7382207067575947e-05, | |
| "loss": 0.8507, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.2634841909485431, | |
| "grad_norm": 7.463364601135254, | |
| "learning_rate": 1.7366707997520148e-05, | |
| "loss": 0.7555, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.26503409795412275, | |
| "grad_norm": 5.885605812072754, | |
| "learning_rate": 1.7351208927464353e-05, | |
| "loss": 0.6855, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.2665840049597024, | |
| "grad_norm": 5.121423244476318, | |
| "learning_rate": 1.7335709857408557e-05, | |
| "loss": 0.6555, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.2681339119652821, | |
| "grad_norm": 10.616006851196289, | |
| "learning_rate": 1.732021078735276e-05, | |
| "loss": 0.7157, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.26968381897086174, | |
| "grad_norm": 6.397147178649902, | |
| "learning_rate": 1.7304711717296964e-05, | |
| "loss": 0.5973, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.2712337259764414, | |
| "grad_norm": 6.465205669403076, | |
| "learning_rate": 1.7289212647241168e-05, | |
| "loss": 0.7021, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.2727836329820211, | |
| "grad_norm": 12.990324974060059, | |
| "learning_rate": 1.727371357718537e-05, | |
| "loss": 1.0169, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.27433353998760074, | |
| "grad_norm": 7.413804054260254, | |
| "learning_rate": 1.7258214507129574e-05, | |
| "loss": 0.67, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.2758834469931804, | |
| "grad_norm": 9.471087455749512, | |
| "learning_rate": 1.7242715437073776e-05, | |
| "loss": 0.6722, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.27743335399876007, | |
| "grad_norm": 4.511841297149658, | |
| "learning_rate": 1.722721636701798e-05, | |
| "loss": 0.6935, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.27898326100433973, | |
| "grad_norm": 3.657198429107666, | |
| "learning_rate": 1.7211717296962182e-05, | |
| "loss": 0.6261, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.2805331680099194, | |
| "grad_norm": 6.290339946746826, | |
| "learning_rate": 1.7196218226906387e-05, | |
| "loss": 0.7307, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.28208307501549906, | |
| "grad_norm": 3.8453187942504883, | |
| "learning_rate": 1.7180719156850588e-05, | |
| "loss": 0.6748, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.28363298202107873, | |
| "grad_norm": 8.411739349365234, | |
| "learning_rate": 1.7165220086794793e-05, | |
| "loss": 0.9805, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.2851828890266584, | |
| "grad_norm": 7.1236653327941895, | |
| "learning_rate": 1.7149721016738998e-05, | |
| "loss": 0.8837, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.28673279603223806, | |
| "grad_norm": 8.25506591796875, | |
| "learning_rate": 1.71342219466832e-05, | |
| "loss": 0.658, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.2882827030378177, | |
| "grad_norm": 6.947543144226074, | |
| "learning_rate": 1.7118722876627404e-05, | |
| "loss": 0.6613, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.2898326100433974, | |
| "grad_norm": 9.180550575256348, | |
| "learning_rate": 1.710322380657161e-05, | |
| "loss": 0.6232, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.29138251704897705, | |
| "grad_norm": 2.687608003616333, | |
| "learning_rate": 1.708772473651581e-05, | |
| "loss": 0.5759, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.2929324240545567, | |
| "grad_norm": 5.574268341064453, | |
| "learning_rate": 1.7072225666460015e-05, | |
| "loss": 0.686, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.2944823310601364, | |
| "grad_norm": 5.5589070320129395, | |
| "learning_rate": 1.7056726596404216e-05, | |
| "loss": 0.7817, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.29603223806571605, | |
| "grad_norm": 8.487772941589355, | |
| "learning_rate": 1.704122752634842e-05, | |
| "loss": 0.9106, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.2975821450712957, | |
| "grad_norm": 5.565657615661621, | |
| "learning_rate": 1.7025728456292622e-05, | |
| "loss": 0.5631, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.2991320520768754, | |
| "grad_norm": 3.6714425086975098, | |
| "learning_rate": 1.7010229386236827e-05, | |
| "loss": 0.6945, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.30068195908245504, | |
| "grad_norm": 3.192857265472412, | |
| "learning_rate": 1.6994730316181028e-05, | |
| "loss": 0.6953, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.3022318660880347, | |
| "grad_norm": 6.31390905380249, | |
| "learning_rate": 1.6979231246125233e-05, | |
| "loss": 0.9058, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.30378177309361437, | |
| "grad_norm": 3.161252498626709, | |
| "learning_rate": 1.6963732176069438e-05, | |
| "loss": 0.9399, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.30533168009919404, | |
| "grad_norm": 6.567419528961182, | |
| "learning_rate": 1.694823310601364e-05, | |
| "loss": 0.7066, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.3068815871047737, | |
| "grad_norm": 10.786821365356445, | |
| "learning_rate": 1.6932734035957844e-05, | |
| "loss": 0.58, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.30843149411035337, | |
| "grad_norm": 4.073357105255127, | |
| "learning_rate": 1.691723496590205e-05, | |
| "loss": 0.8259, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.30998140111593303, | |
| "grad_norm": 5.362609386444092, | |
| "learning_rate": 1.690173589584625e-05, | |
| "loss": 0.7092, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.3115313081215127, | |
| "grad_norm": 4.431297302246094, | |
| "learning_rate": 1.6886236825790455e-05, | |
| "loss": 0.8349, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.31308121512709236, | |
| "grad_norm": 4.878475666046143, | |
| "learning_rate": 1.6870737755734656e-05, | |
| "loss": 0.6687, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.314631122132672, | |
| "grad_norm": 7.905298233032227, | |
| "learning_rate": 1.685523868567886e-05, | |
| "loss": 0.7769, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.3161810291382517, | |
| "grad_norm": 7.189042568206787, | |
| "learning_rate": 1.6839739615623062e-05, | |
| "loss": 0.7288, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.31773093614383136, | |
| "grad_norm": 9.089221000671387, | |
| "learning_rate": 1.6824240545567267e-05, | |
| "loss": 0.6814, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.319280843149411, | |
| "grad_norm": 8.003725051879883, | |
| "learning_rate": 1.680874147551147e-05, | |
| "loss": 0.8992, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.3208307501549907, | |
| "grad_norm": 5.8466410636901855, | |
| "learning_rate": 1.6793242405455673e-05, | |
| "loss": 0.7694, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.32238065716057035, | |
| "grad_norm": 5.350661277770996, | |
| "learning_rate": 1.6777743335399878e-05, | |
| "loss": 0.8208, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.32393056416615, | |
| "grad_norm": 6.502786636352539, | |
| "learning_rate": 1.676224426534408e-05, | |
| "loss": 0.6959, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.3254804711717297, | |
| "grad_norm": 11.890753746032715, | |
| "learning_rate": 1.6746745195288284e-05, | |
| "loss": 1.0159, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.32703037817730934, | |
| "grad_norm": 6.462416648864746, | |
| "learning_rate": 1.673124612523249e-05, | |
| "loss": 0.8057, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.328580285182889, | |
| "grad_norm": 8.766998291015625, | |
| "learning_rate": 1.671574705517669e-05, | |
| "loss": 0.9381, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.3301301921884687, | |
| "grad_norm": 4.663023471832275, | |
| "learning_rate": 1.6700247985120895e-05, | |
| "loss": 0.9093, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.33168009919404834, | |
| "grad_norm": 8.117950439453125, | |
| "learning_rate": 1.6684748915065096e-05, | |
| "loss": 0.6841, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.333230006199628, | |
| "grad_norm": 3.851203441619873, | |
| "learning_rate": 1.66692498450093e-05, | |
| "loss": 0.6287, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.33477991320520767, | |
| "grad_norm": 10.521380424499512, | |
| "learning_rate": 1.6653750774953503e-05, | |
| "loss": 0.6752, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.33632982021078733, | |
| "grad_norm": 3.793071746826172, | |
| "learning_rate": 1.6638251704897707e-05, | |
| "loss": 0.6795, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.337879727216367, | |
| "grad_norm": 8.640475273132324, | |
| "learning_rate": 1.662275263484191e-05, | |
| "loss": 0.7289, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.33942963422194666, | |
| "grad_norm": 7.897937774658203, | |
| "learning_rate": 1.6607253564786113e-05, | |
| "loss": 0.5091, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.34097954122752633, | |
| "grad_norm": 6.605180740356445, | |
| "learning_rate": 1.6591754494730318e-05, | |
| "loss": 0.7188, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.342529448233106, | |
| "grad_norm": 10.198129653930664, | |
| "learning_rate": 1.657625542467452e-05, | |
| "loss": 0.7225, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.34407935523868566, | |
| "grad_norm": 3.2186694145202637, | |
| "learning_rate": 1.6560756354618724e-05, | |
| "loss": 0.5876, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.3456292622442653, | |
| "grad_norm": 11.494028091430664, | |
| "learning_rate": 1.654525728456293e-05, | |
| "loss": 0.7043, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.347179169249845, | |
| "grad_norm": 9.092860221862793, | |
| "learning_rate": 1.652975821450713e-05, | |
| "loss": 0.8232, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.34872907625542465, | |
| "grad_norm": 5.422155857086182, | |
| "learning_rate": 1.6514259144451335e-05, | |
| "loss": 0.6459, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.3502789832610043, | |
| "grad_norm": 8.467617988586426, | |
| "learning_rate": 1.6498760074395537e-05, | |
| "loss": 0.6452, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.351828890266584, | |
| "grad_norm": 8.067217826843262, | |
| "learning_rate": 1.648326100433974e-05, | |
| "loss": 0.8663, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.35337879727216365, | |
| "grad_norm": 6.379934310913086, | |
| "learning_rate": 1.6467761934283943e-05, | |
| "loss": 0.6867, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.3549287042777433, | |
| "grad_norm": 4.482844352722168, | |
| "learning_rate": 1.6452262864228147e-05, | |
| "loss": 0.6077, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.356478611283323, | |
| "grad_norm": 5.696823596954346, | |
| "learning_rate": 1.643676379417235e-05, | |
| "loss": 0.7574, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.35802851828890264, | |
| "grad_norm": 8.586214065551758, | |
| "learning_rate": 1.6421264724116554e-05, | |
| "loss": 0.8223, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.3595784252944823, | |
| "grad_norm": 4.69325065612793, | |
| "learning_rate": 1.640576565406076e-05, | |
| "loss": 0.9626, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.36112833230006197, | |
| "grad_norm": 7.331809043884277, | |
| "learning_rate": 1.639026658400496e-05, | |
| "loss": 0.6907, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.36267823930564164, | |
| "grad_norm": 6.298605442047119, | |
| "learning_rate": 1.6374767513949164e-05, | |
| "loss": 0.8718, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.3642281463112213, | |
| "grad_norm": 4.11399507522583, | |
| "learning_rate": 1.635926844389337e-05, | |
| "loss": 0.7102, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.36577805331680097, | |
| "grad_norm": 4.657794952392578, | |
| "learning_rate": 1.634376937383757e-05, | |
| "loss": 0.6324, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.36732796032238063, | |
| "grad_norm": 9.156194686889648, | |
| "learning_rate": 1.6328270303781775e-05, | |
| "loss": 0.7338, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.3688778673279603, | |
| "grad_norm": 16.996049880981445, | |
| "learning_rate": 1.6312771233725977e-05, | |
| "loss": 0.8453, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.37042777433353996, | |
| "grad_norm": 8.539373397827148, | |
| "learning_rate": 1.629727216367018e-05, | |
| "loss": 0.8521, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.3719776813391196, | |
| "grad_norm": 3.439394474029541, | |
| "learning_rate": 1.6281773093614383e-05, | |
| "loss": 0.7142, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.3735275883446993, | |
| "grad_norm": 2.998128652572632, | |
| "learning_rate": 1.6266274023558588e-05, | |
| "loss": 0.7817, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.375077495350279, | |
| "grad_norm": 7.721405506134033, | |
| "learning_rate": 1.625077495350279e-05, | |
| "loss": 0.7564, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.3766274023558587, | |
| "grad_norm": 4.268898963928223, | |
| "learning_rate": 1.6235275883446994e-05, | |
| "loss": 0.6392, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.37817730936143834, | |
| "grad_norm": 8.477879524230957, | |
| "learning_rate": 1.62197768133912e-05, | |
| "loss": 0.7102, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.379727216367018, | |
| "grad_norm": 6.268753528594971, | |
| "learning_rate": 1.62042777433354e-05, | |
| "loss": 0.8036, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.38127712337259767, | |
| "grad_norm": 8.156237602233887, | |
| "learning_rate": 1.6188778673279605e-05, | |
| "loss": 0.798, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.38282703037817734, | |
| "grad_norm": 8.959247589111328, | |
| "learning_rate": 1.617327960322381e-05, | |
| "loss": 0.8801, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.384376937383757, | |
| "grad_norm": 4.165592670440674, | |
| "learning_rate": 1.615778053316801e-05, | |
| "loss": 0.7829, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.38592684438933667, | |
| "grad_norm": 6.727826118469238, | |
| "learning_rate": 1.6142281463112216e-05, | |
| "loss": 0.9334, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.38747675139491633, | |
| "grad_norm": 5.1142497062683105, | |
| "learning_rate": 1.6126782393056417e-05, | |
| "loss": 0.9171, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.389026658400496, | |
| "grad_norm": 4.020603179931641, | |
| "learning_rate": 1.6111283323000622e-05, | |
| "loss": 0.5681, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.39057656540607566, | |
| "grad_norm": 5.856823921203613, | |
| "learning_rate": 1.6095784252944823e-05, | |
| "loss": 0.6705, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.3921264724116553, | |
| "grad_norm": 8.811408042907715, | |
| "learning_rate": 1.6080285182889028e-05, | |
| "loss": 0.8041, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.393676379417235, | |
| "grad_norm": 2.1637704372406006, | |
| "learning_rate": 1.606478611283323e-05, | |
| "loss": 0.7588, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.39522628642281465, | |
| "grad_norm": 5.179090976715088, | |
| "learning_rate": 1.6049287042777434e-05, | |
| "loss": 0.5119, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.3967761934283943, | |
| "grad_norm": 9.014410018920898, | |
| "learning_rate": 1.603378797272164e-05, | |
| "loss": 0.743, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.398326100433974, | |
| "grad_norm": 5.311403751373291, | |
| "learning_rate": 1.601828890266584e-05, | |
| "loss": 0.7806, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.39987600743955365, | |
| "grad_norm": 5.875848770141602, | |
| "learning_rate": 1.6002789832610045e-05, | |
| "loss": 0.866, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.4014259144451333, | |
| "grad_norm": 5.602693557739258, | |
| "learning_rate": 1.598729076255425e-05, | |
| "loss": 0.7083, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.402975821450713, | |
| "grad_norm": 10.609829902648926, | |
| "learning_rate": 1.597179169249845e-05, | |
| "loss": 0.7104, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.40452572845629264, | |
| "grad_norm": 5.588577747344971, | |
| "learning_rate": 1.5956292622442656e-05, | |
| "loss": 0.8263, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.4060756354618723, | |
| "grad_norm": 3.9749042987823486, | |
| "learning_rate": 1.5940793552386857e-05, | |
| "loss": 0.7349, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.407625542467452, | |
| "grad_norm": 5.295260906219482, | |
| "learning_rate": 1.5925294482331062e-05, | |
| "loss": 0.6873, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.40917544947303164, | |
| "grad_norm": 3.9901695251464844, | |
| "learning_rate": 1.5909795412275263e-05, | |
| "loss": 0.6693, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.4107253564786113, | |
| "grad_norm": 6.513910293579102, | |
| "learning_rate": 1.5894296342219468e-05, | |
| "loss": 0.7173, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.41227526348419097, | |
| "grad_norm": 5.145935535430908, | |
| "learning_rate": 1.587879727216367e-05, | |
| "loss": 0.7877, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.41382517048977063, | |
| "grad_norm": 8.055877685546875, | |
| "learning_rate": 1.5863298202107874e-05, | |
| "loss": 0.7365, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.4153750774953503, | |
| "grad_norm": 14.172343254089355, | |
| "learning_rate": 1.584779913205208e-05, | |
| "loss": 1.0304, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.41692498450092996, | |
| "grad_norm": 5.074191093444824, | |
| "learning_rate": 1.583230006199628e-05, | |
| "loss": 0.83, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.4184748915065096, | |
| "grad_norm": 6.958271026611328, | |
| "learning_rate": 1.5816800991940485e-05, | |
| "loss": 0.5557, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.4200247985120893, | |
| "grad_norm": 5.674747943878174, | |
| "learning_rate": 1.580130192188469e-05, | |
| "loss": 0.546, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.42157470551766896, | |
| "grad_norm": 6.318327903747559, | |
| "learning_rate": 1.578580285182889e-05, | |
| "loss": 0.7078, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.4231246125232486, | |
| "grad_norm": 8.800183296203613, | |
| "learning_rate": 1.5770303781773096e-05, | |
| "loss": 0.7918, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.4246745195288283, | |
| "grad_norm": 7.619904518127441, | |
| "learning_rate": 1.5754804711717297e-05, | |
| "loss": 0.9327, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.42622442653440795, | |
| "grad_norm": 8.662442207336426, | |
| "learning_rate": 1.5739305641661502e-05, | |
| "loss": 0.816, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.4277743335399876, | |
| "grad_norm": 7.784162998199463, | |
| "learning_rate": 1.5723806571605703e-05, | |
| "loss": 0.8073, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.4293242405455673, | |
| "grad_norm": 5.800148963928223, | |
| "learning_rate": 1.5708307501549908e-05, | |
| "loss": 0.7166, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.43087414755114695, | |
| "grad_norm": 6.138567924499512, | |
| "learning_rate": 1.569280843149411e-05, | |
| "loss": 0.8677, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.4324240545567266, | |
| "grad_norm": 8.85258674621582, | |
| "learning_rate": 1.5677309361438314e-05, | |
| "loss": 0.7602, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.4339739615623063, | |
| "grad_norm": 5.599133014678955, | |
| "learning_rate": 1.566181029138252e-05, | |
| "loss": 0.8947, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.43552386856788594, | |
| "grad_norm": 7.680168628692627, | |
| "learning_rate": 1.564631122132672e-05, | |
| "loss": 0.6632, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.4370737755734656, | |
| "grad_norm": 9.464559555053711, | |
| "learning_rate": 1.5630812151270925e-05, | |
| "loss": 0.6559, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.43862368257904527, | |
| "grad_norm": 1.4966700077056885, | |
| "learning_rate": 1.561531308121513e-05, | |
| "loss": 0.5048, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.44017358958462494, | |
| "grad_norm": 10.32955551147461, | |
| "learning_rate": 1.559981401115933e-05, | |
| "loss": 0.627, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.4417234965902046, | |
| "grad_norm": 8.718482971191406, | |
| "learning_rate": 1.5584314941103536e-05, | |
| "loss": 0.7869, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.44327340359578427, | |
| "grad_norm": 7.256460666656494, | |
| "learning_rate": 1.5568815871047738e-05, | |
| "loss": 0.6825, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.44482331060136393, | |
| "grad_norm": 6.733129978179932, | |
| "learning_rate": 1.5553316800991942e-05, | |
| "loss": 0.7388, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.4463732176069436, | |
| "grad_norm": 10.719423294067383, | |
| "learning_rate": 1.5537817730936144e-05, | |
| "loss": 0.6301, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.44792312461252326, | |
| "grad_norm": 5.897913932800293, | |
| "learning_rate": 1.552231866088035e-05, | |
| "loss": 0.9147, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.4494730316181029, | |
| "grad_norm": 3.1488702297210693, | |
| "learning_rate": 1.550681959082455e-05, | |
| "loss": 0.5445, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.4510229386236826, | |
| "grad_norm": 10.640909194946289, | |
| "learning_rate": 1.5491320520768755e-05, | |
| "loss": 0.7616, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.45257284562926225, | |
| "grad_norm": 7.557347297668457, | |
| "learning_rate": 1.547582145071296e-05, | |
| "loss": 0.6965, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.4541227526348419, | |
| "grad_norm": 5.1408843994140625, | |
| "learning_rate": 1.546032238065716e-05, | |
| "loss": 0.6548, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.4556726596404216, | |
| "grad_norm": 8.21218204498291, | |
| "learning_rate": 1.5444823310601365e-05, | |
| "loss": 1.013, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.45722256664600125, | |
| "grad_norm": 11.295351028442383, | |
| "learning_rate": 1.542932424054557e-05, | |
| "loss": 0.9081, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.4587724736515809, | |
| "grad_norm": 7.3191046714782715, | |
| "learning_rate": 1.541382517048977e-05, | |
| "loss": 0.6628, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.4603223806571606, | |
| "grad_norm": 2.974071741104126, | |
| "learning_rate": 1.5398326100433976e-05, | |
| "loss": 0.8033, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.46187228766274024, | |
| "grad_norm": 8.998639106750488, | |
| "learning_rate": 1.5382827030378178e-05, | |
| "loss": 0.6918, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.4634221946683199, | |
| "grad_norm": 7.389767169952393, | |
| "learning_rate": 1.5367327960322382e-05, | |
| "loss": 0.713, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.4649721016738996, | |
| "grad_norm": 3.8665876388549805, | |
| "learning_rate": 1.5351828890266584e-05, | |
| "loss": 0.6958, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.46652200867947924, | |
| "grad_norm": 7.674843788146973, | |
| "learning_rate": 1.533632982021079e-05, | |
| "loss": 1.0091, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.4680719156850589, | |
| "grad_norm": 12.13735294342041, | |
| "learning_rate": 1.532083075015499e-05, | |
| "loss": 0.9314, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.46962182269063857, | |
| "grad_norm": 4.446971416473389, | |
| "learning_rate": 1.5305331680099195e-05, | |
| "loss": 0.4494, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.47117172969621823, | |
| "grad_norm": 9.129325866699219, | |
| "learning_rate": 1.52898326100434e-05, | |
| "loss": 0.8304, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.4727216367017979, | |
| "grad_norm": 5.487653732299805, | |
| "learning_rate": 1.52743335399876e-05, | |
| "loss": 0.661, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.47427154370737756, | |
| "grad_norm": 6.226546764373779, | |
| "learning_rate": 1.5258834469931806e-05, | |
| "loss": 0.8503, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.47582145071295723, | |
| "grad_norm": 9.435007095336914, | |
| "learning_rate": 1.5243335399876009e-05, | |
| "loss": 0.6498, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.4773713577185369, | |
| "grad_norm": 7.831610679626465, | |
| "learning_rate": 1.5227836329820212e-05, | |
| "loss": 0.6945, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.47892126472411656, | |
| "grad_norm": 7.432961940765381, | |
| "learning_rate": 1.5212337259764417e-05, | |
| "loss": 0.6282, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.4804711717296962, | |
| "grad_norm": 9.168987274169922, | |
| "learning_rate": 1.5196838189708618e-05, | |
| "loss": 0.707, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.4820210787352759, | |
| "grad_norm": 5.86536169052124, | |
| "learning_rate": 1.5181339119652823e-05, | |
| "loss": 0.898, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.48357098574085555, | |
| "grad_norm": 7.2908172607421875, | |
| "learning_rate": 1.5165840049597024e-05, | |
| "loss": 0.8231, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.4851208927464352, | |
| "grad_norm": 4.543672561645508, | |
| "learning_rate": 1.5150340979541229e-05, | |
| "loss": 0.7451, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.4866707997520149, | |
| "grad_norm": 3.355882167816162, | |
| "learning_rate": 1.5134841909485432e-05, | |
| "loss": 0.8017, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.48822070675759455, | |
| "grad_norm": 3.6283857822418213, | |
| "learning_rate": 1.5119342839429637e-05, | |
| "loss": 0.7989, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.4897706137631742, | |
| "grad_norm": 8.624027252197266, | |
| "learning_rate": 1.5103843769373838e-05, | |
| "loss": 0.8073, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.4913205207687539, | |
| "grad_norm": 5.2577595710754395, | |
| "learning_rate": 1.5088344699318043e-05, | |
| "loss": 0.66, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.49287042777433354, | |
| "grad_norm": 4.758912563323975, | |
| "learning_rate": 1.5072845629262244e-05, | |
| "loss": 0.6226, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.4944203347799132, | |
| "grad_norm": 5.687361240386963, | |
| "learning_rate": 1.5057346559206449e-05, | |
| "loss": 0.792, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.49597024178549287, | |
| "grad_norm": 5.457574844360352, | |
| "learning_rate": 1.5041847489150652e-05, | |
| "loss": 0.958, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.49752014879107254, | |
| "grad_norm": 4.327171802520752, | |
| "learning_rate": 1.5026348419094857e-05, | |
| "loss": 0.6538, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.4990700557966522, | |
| "grad_norm": 3.7483696937561035, | |
| "learning_rate": 1.5010849349039058e-05, | |
| "loss": 0.5517, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.5006199628022319, | |
| "grad_norm": 7.629466533660889, | |
| "learning_rate": 1.4995350278983263e-05, | |
| "loss": 0.8133, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.5021698698078115, | |
| "grad_norm": 6.833787441253662, | |
| "learning_rate": 1.4979851208927464e-05, | |
| "loss": 0.5979, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.5037197768133912, | |
| "grad_norm": 5.130917549133301, | |
| "learning_rate": 1.4964352138871669e-05, | |
| "loss": 0.6538, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.5052696838189709, | |
| "grad_norm": 5.909976005554199, | |
| "learning_rate": 1.4948853068815872e-05, | |
| "loss": 0.7074, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.5068195908245505, | |
| "grad_norm": 7.032032489776611, | |
| "learning_rate": 1.4933353998760077e-05, | |
| "loss": 0.8226, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.5083694978301302, | |
| "grad_norm": 5.627322196960449, | |
| "learning_rate": 1.4917854928704278e-05, | |
| "loss": 0.6371, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.5099194048357099, | |
| "grad_norm": 8.351363182067871, | |
| "learning_rate": 1.4902355858648483e-05, | |
| "loss": 0.9501, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.5114693118412895, | |
| "grad_norm": 7.641820907592773, | |
| "learning_rate": 1.4886856788592684e-05, | |
| "loss": 0.846, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.5130192188468692, | |
| "grad_norm": 5.8080010414123535, | |
| "learning_rate": 1.4871357718536889e-05, | |
| "loss": 0.6714, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.5145691258524488, | |
| "grad_norm": 3.774705648422241, | |
| "learning_rate": 1.4855858648481092e-05, | |
| "loss": 0.7114, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.5161190328580285, | |
| "grad_norm": 3.194676160812378, | |
| "learning_rate": 1.4840359578425297e-05, | |
| "loss": 0.6901, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.5176689398636082, | |
| "grad_norm": 3.790651559829712, | |
| "learning_rate": 1.4824860508369498e-05, | |
| "loss": 0.6808, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.5192188468691878, | |
| "grad_norm": 5.215169429779053, | |
| "learning_rate": 1.4809361438313703e-05, | |
| "loss": 0.6263, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.5207687538747675, | |
| "grad_norm": 6.934802055358887, | |
| "learning_rate": 1.4793862368257904e-05, | |
| "loss": 0.839, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.5223186608803472, | |
| "grad_norm": 9.833372116088867, | |
| "learning_rate": 1.4778363298202109e-05, | |
| "loss": 0.8676, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.5238685678859268, | |
| "grad_norm": 8.891237258911133, | |
| "learning_rate": 1.4762864228146312e-05, | |
| "loss": 0.7184, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.5254184748915065, | |
| "grad_norm": 2.9913341999053955, | |
| "learning_rate": 1.4747365158090517e-05, | |
| "loss": 0.4414, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.5269683818970862, | |
| "grad_norm": 11.538298606872559, | |
| "learning_rate": 1.4731866088034718e-05, | |
| "loss": 0.4681, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.5285182889026658, | |
| "grad_norm": 6.064094543457031, | |
| "learning_rate": 1.4716367017978923e-05, | |
| "loss": 0.9835, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.5300681959082455, | |
| "grad_norm": 4.357783317565918, | |
| "learning_rate": 1.4700867947923126e-05, | |
| "loss": 0.4394, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.5316181029138252, | |
| "grad_norm": 3.527278423309326, | |
| "learning_rate": 1.468536887786733e-05, | |
| "loss": 0.5394, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.5331680099194048, | |
| "grad_norm": 5.441196918487549, | |
| "learning_rate": 1.4669869807811532e-05, | |
| "loss": 0.5942, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.5347179169249845, | |
| "grad_norm": 7.565889358520508, | |
| "learning_rate": 1.4654370737755737e-05, | |
| "loss": 0.6188, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.5362678239305642, | |
| "grad_norm": 5.547229290008545, | |
| "learning_rate": 1.4638871667699938e-05, | |
| "loss": 0.7393, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.5378177309361438, | |
| "grad_norm": 13.453187942504883, | |
| "learning_rate": 1.4623372597644143e-05, | |
| "loss": 0.8353, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.5393676379417235, | |
| "grad_norm": 9.306943893432617, | |
| "learning_rate": 1.4607873527588346e-05, | |
| "loss": 0.9389, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.5409175449473032, | |
| "grad_norm": 5.817188739776611, | |
| "learning_rate": 1.459237445753255e-05, | |
| "loss": 0.7419, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.5424674519528828, | |
| "grad_norm": 4.312830448150635, | |
| "learning_rate": 1.4576875387476752e-05, | |
| "loss": 0.6188, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.5440173589584625, | |
| "grad_norm": 8.189769744873047, | |
| "learning_rate": 1.4561376317420957e-05, | |
| "loss": 0.8339, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.5455672659640421, | |
| "grad_norm": 5.929519176483154, | |
| "learning_rate": 1.4545877247365159e-05, | |
| "loss": 0.7464, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.5471171729696218, | |
| "grad_norm": 6.465437412261963, | |
| "learning_rate": 1.4530378177309363e-05, | |
| "loss": 0.6996, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.5486670799752015, | |
| "grad_norm": 5.720170974731445, | |
| "learning_rate": 1.4514879107253566e-05, | |
| "loss": 0.9782, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.5502169869807811, | |
| "grad_norm": 8.770698547363281, | |
| "learning_rate": 1.449938003719777e-05, | |
| "loss": 0.6206, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.5517668939863608, | |
| "grad_norm": 5.114345073699951, | |
| "learning_rate": 1.4483880967141972e-05, | |
| "loss": 0.5781, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.5533168009919405, | |
| "grad_norm": 8.168386459350586, | |
| "learning_rate": 1.4468381897086177e-05, | |
| "loss": 0.8869, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.5548667079975201, | |
| "grad_norm": 7.974008560180664, | |
| "learning_rate": 1.4452882827030379e-05, | |
| "loss": 0.8623, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.5564166150030998, | |
| "grad_norm": 5.2523274421691895, | |
| "learning_rate": 1.4437383756974583e-05, | |
| "loss": 0.7054, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.5579665220086795, | |
| "grad_norm": 5.542336463928223, | |
| "learning_rate": 1.4421884686918786e-05, | |
| "loss": 0.6853, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.5595164290142591, | |
| "grad_norm": 3.6195218563079834, | |
| "learning_rate": 1.440638561686299e-05, | |
| "loss": 0.6817, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.5610663360198388, | |
| "grad_norm": 4.10429573059082, | |
| "learning_rate": 1.4390886546807193e-05, | |
| "loss": 0.5063, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.5626162430254185, | |
| "grad_norm": 9.308690071105957, | |
| "learning_rate": 1.4375387476751397e-05, | |
| "loss": 0.745, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.5641661500309981, | |
| "grad_norm": 6.351543426513672, | |
| "learning_rate": 1.4359888406695599e-05, | |
| "loss": 0.6918, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.5657160570365778, | |
| "grad_norm": 7.3764495849609375, | |
| "learning_rate": 1.4344389336639803e-05, | |
| "loss": 0.5581, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.5672659640421575, | |
| "grad_norm": 6.400803565979004, | |
| "learning_rate": 1.4328890266584007e-05, | |
| "loss": 0.6851, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.5688158710477371, | |
| "grad_norm": 5.576094627380371, | |
| "learning_rate": 1.431339119652821e-05, | |
| "loss": 0.7973, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.5703657780533168, | |
| "grad_norm": 5.8568902015686035, | |
| "learning_rate": 1.4297892126472413e-05, | |
| "loss": 0.7913, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.5719156850588965, | |
| "grad_norm": 6.588540077209473, | |
| "learning_rate": 1.4282393056416617e-05, | |
| "loss": 0.6668, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.5734655920644761, | |
| "grad_norm": 4.362242221832275, | |
| "learning_rate": 1.4266893986360819e-05, | |
| "loss": 0.4781, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.5750154990700558, | |
| "grad_norm": 6.261458396911621, | |
| "learning_rate": 1.4251394916305024e-05, | |
| "loss": 0.6587, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.5765654060756354, | |
| "grad_norm": 4.799465179443359, | |
| "learning_rate": 1.4235895846249227e-05, | |
| "loss": 0.7028, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.5781153130812151, | |
| "grad_norm": 6.386651039123535, | |
| "learning_rate": 1.422039677619343e-05, | |
| "loss": 0.6336, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.5796652200867948, | |
| "grad_norm": 7.835630893707275, | |
| "learning_rate": 1.4204897706137633e-05, | |
| "loss": 0.6865, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.5812151270923744, | |
| "grad_norm": 3.549483060836792, | |
| "learning_rate": 1.4189398636081838e-05, | |
| "loss": 0.8948, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.5827650340979541, | |
| "grad_norm": 6.352856636047363, | |
| "learning_rate": 1.4173899566026039e-05, | |
| "loss": 0.6209, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.5843149411035338, | |
| "grad_norm": 2.8066344261169434, | |
| "learning_rate": 1.4158400495970244e-05, | |
| "loss": 0.6885, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.5858648481091134, | |
| "grad_norm": 4.101128578186035, | |
| "learning_rate": 1.4142901425914447e-05, | |
| "loss": 0.6328, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.5874147551146931, | |
| "grad_norm": 6.97265100479126, | |
| "learning_rate": 1.412740235585865e-05, | |
| "loss": 0.5702, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.5889646621202728, | |
| "grad_norm": 7.036039352416992, | |
| "learning_rate": 1.4111903285802853e-05, | |
| "loss": 0.8447, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.5905145691258524, | |
| "grad_norm": 7.859857082366943, | |
| "learning_rate": 1.4096404215747058e-05, | |
| "loss": 0.6214, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.5920644761314321, | |
| "grad_norm": 7.97307825088501, | |
| "learning_rate": 1.4080905145691259e-05, | |
| "loss": 0.6224, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.5936143831370118, | |
| "grad_norm": 7.060767650604248, | |
| "learning_rate": 1.4065406075635464e-05, | |
| "loss": 1.2209, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.5951642901425914, | |
| "grad_norm": 3.851486921310425, | |
| "learning_rate": 1.4049907005579667e-05, | |
| "loss": 0.6759, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.5967141971481711, | |
| "grad_norm": 4.023894309997559, | |
| "learning_rate": 1.403440793552387e-05, | |
| "loss": 0.8202, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.5982641041537508, | |
| "grad_norm": 6.258404731750488, | |
| "learning_rate": 1.4018908865468073e-05, | |
| "loss": 0.7032, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.5998140111593304, | |
| "grad_norm": 9.688002586364746, | |
| "learning_rate": 1.4003409795412278e-05, | |
| "loss": 0.6889, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.6013639181649101, | |
| "grad_norm": 4.189449787139893, | |
| "learning_rate": 1.3987910725356479e-05, | |
| "loss": 0.7047, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.6029138251704897, | |
| "grad_norm": 2.062530040740967, | |
| "learning_rate": 1.3972411655300684e-05, | |
| "loss": 0.5757, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.6044637321760694, | |
| "grad_norm": 7.18925666809082, | |
| "learning_rate": 1.3956912585244887e-05, | |
| "loss": 0.7404, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.6060136391816491, | |
| "grad_norm": 11.522746086120605, | |
| "learning_rate": 1.394141351518909e-05, | |
| "loss": 0.911, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.6075635461872287, | |
| "grad_norm": 7.289572715759277, | |
| "learning_rate": 1.3925914445133293e-05, | |
| "loss": 0.7325, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.6091134531928084, | |
| "grad_norm": 5.386538505554199, | |
| "learning_rate": 1.3910415375077498e-05, | |
| "loss": 0.513, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.6106633601983881, | |
| "grad_norm": 5.218569278717041, | |
| "learning_rate": 1.38949163050217e-05, | |
| "loss": 0.7397, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.6122132672039677, | |
| "grad_norm": 8.259590148925781, | |
| "learning_rate": 1.3879417234965904e-05, | |
| "loss": 0.6696, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.6137631742095474, | |
| "grad_norm": 2.930074691772461, | |
| "learning_rate": 1.3863918164910107e-05, | |
| "loss": 0.5533, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.6153130812151271, | |
| "grad_norm": 5.612064838409424, | |
| "learning_rate": 1.384841909485431e-05, | |
| "loss": 0.6844, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.6168629882207067, | |
| "grad_norm": 8.284967422485352, | |
| "learning_rate": 1.3832920024798513e-05, | |
| "loss": 0.713, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.6184128952262864, | |
| "grad_norm": 5.959351062774658, | |
| "learning_rate": 1.3817420954742718e-05, | |
| "loss": 0.785, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.6199628022318661, | |
| "grad_norm": 5.368082523345947, | |
| "learning_rate": 1.380192188468692e-05, | |
| "loss": 0.6414, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.6215127092374457, | |
| "grad_norm": 6.045334815979004, | |
| "learning_rate": 1.3786422814631124e-05, | |
| "loss": 0.7842, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.6230626162430254, | |
| "grad_norm": 7.259751319885254, | |
| "learning_rate": 1.3770923744575327e-05, | |
| "loss": 0.691, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.6246125232486051, | |
| "grad_norm": 3.850944995880127, | |
| "learning_rate": 1.375542467451953e-05, | |
| "loss": 0.549, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.6261624302541847, | |
| "grad_norm": 2.9327070713043213, | |
| "learning_rate": 1.3739925604463733e-05, | |
| "loss": 0.5583, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.6277123372597644, | |
| "grad_norm": 3.157712697982788, | |
| "learning_rate": 1.3724426534407938e-05, | |
| "loss": 0.508, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.629262244265344, | |
| "grad_norm": 5.871835708618164, | |
| "learning_rate": 1.370892746435214e-05, | |
| "loss": 0.7169, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.6308121512709237, | |
| "grad_norm": 2.6925415992736816, | |
| "learning_rate": 1.3693428394296344e-05, | |
| "loss": 0.5738, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.6323620582765034, | |
| "grad_norm": 4.072033882141113, | |
| "learning_rate": 1.3677929324240547e-05, | |
| "loss": 0.7167, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.633911965282083, | |
| "grad_norm": 2.6646270751953125, | |
| "learning_rate": 1.366243025418475e-05, | |
| "loss": 0.5226, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.6354618722876627, | |
| "grad_norm": 1.837414264678955, | |
| "learning_rate": 1.3646931184128953e-05, | |
| "loss": 0.5607, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.6370117792932424, | |
| "grad_norm": 5.846659183502197, | |
| "learning_rate": 1.3631432114073158e-05, | |
| "loss": 0.5539, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.638561686298822, | |
| "grad_norm": 4.042922496795654, | |
| "learning_rate": 1.361593304401736e-05, | |
| "loss": 0.7586, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.6401115933044017, | |
| "grad_norm": 4.464644908905029, | |
| "learning_rate": 1.3600433973961564e-05, | |
| "loss": 0.7025, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.6416615003099814, | |
| "grad_norm": 5.151641845703125, | |
| "learning_rate": 1.3584934903905767e-05, | |
| "loss": 0.7848, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.643211407315561, | |
| "grad_norm": 8.169122695922852, | |
| "learning_rate": 1.356943583384997e-05, | |
| "loss": 0.7885, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.6447613143211407, | |
| "grad_norm": 5.91544246673584, | |
| "learning_rate": 1.3553936763794173e-05, | |
| "loss": 0.6493, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.6463112213267204, | |
| "grad_norm": 3.980699062347412, | |
| "learning_rate": 1.3538437693738378e-05, | |
| "loss": 0.5557, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.6478611283323, | |
| "grad_norm": 4.185513496398926, | |
| "learning_rate": 1.352293862368258e-05, | |
| "loss": 0.724, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.6494110353378797, | |
| "grad_norm": 5.239704608917236, | |
| "learning_rate": 1.3507439553626784e-05, | |
| "loss": 0.6691, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.6509609423434594, | |
| "grad_norm": 6.143891334533691, | |
| "learning_rate": 1.3491940483570987e-05, | |
| "loss": 0.6889, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.652510849349039, | |
| "grad_norm": 3.6569466590881348, | |
| "learning_rate": 1.347644141351519e-05, | |
| "loss": 0.5583, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.6540607563546187, | |
| "grad_norm": 4.665682315826416, | |
| "learning_rate": 1.3460942343459394e-05, | |
| "loss": 0.7787, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.6556106633601984, | |
| "grad_norm": 7.364247798919678, | |
| "learning_rate": 1.3445443273403598e-05, | |
| "loss": 0.7035, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.657160570365778, | |
| "grad_norm": 11.255681991577148, | |
| "learning_rate": 1.34299442033478e-05, | |
| "loss": 0.711, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.6587104773713577, | |
| "grad_norm": 5.959405899047852, | |
| "learning_rate": 1.3414445133292004e-05, | |
| "loss": 0.6408, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.6602603843769373, | |
| "grad_norm": 7.016552925109863, | |
| "learning_rate": 1.3398946063236207e-05, | |
| "loss": 0.6437, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.661810291382517, | |
| "grad_norm": 8.809164047241211, | |
| "learning_rate": 1.338344699318041e-05, | |
| "loss": 0.8967, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.6633601983880967, | |
| "grad_norm": 6.494601249694824, | |
| "learning_rate": 1.3367947923124614e-05, | |
| "loss": 0.9146, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.6649101053936763, | |
| "grad_norm": 7.576080799102783, | |
| "learning_rate": 1.3352448853068818e-05, | |
| "loss": 0.6699, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.666460012399256, | |
| "grad_norm": 6.122068881988525, | |
| "learning_rate": 1.333694978301302e-05, | |
| "loss": 0.6826, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.6680099194048357, | |
| "grad_norm": 7.942433834075928, | |
| "learning_rate": 1.3321450712957225e-05, | |
| "loss": 0.8486, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.6695598264104153, | |
| "grad_norm": 6.053321838378906, | |
| "learning_rate": 1.3305951642901428e-05, | |
| "loss": 0.5775, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.671109733415995, | |
| "grad_norm": 9.208568572998047, | |
| "learning_rate": 1.329045257284563e-05, | |
| "loss": 0.8334, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.6726596404215747, | |
| "grad_norm": 6.217433929443359, | |
| "learning_rate": 1.3274953502789834e-05, | |
| "loss": 0.7451, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.6742095474271543, | |
| "grad_norm": 5.161596775054932, | |
| "learning_rate": 1.3259454432734038e-05, | |
| "loss": 0.7425, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.675759454432734, | |
| "grad_norm": 11.464788436889648, | |
| "learning_rate": 1.324395536267824e-05, | |
| "loss": 0.7977, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.6773093614383137, | |
| "grad_norm": 4.669081687927246, | |
| "learning_rate": 1.3228456292622445e-05, | |
| "loss": 0.5158, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.6788592684438933, | |
| "grad_norm": 6.2955756187438965, | |
| "learning_rate": 1.3212957222566648e-05, | |
| "loss": 0.7647, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.680409175449473, | |
| "grad_norm": 8.309978485107422, | |
| "learning_rate": 1.319745815251085e-05, | |
| "loss": 0.6655, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.6819590824550527, | |
| "grad_norm": 5.269478797912598, | |
| "learning_rate": 1.3181959082455054e-05, | |
| "loss": 0.7487, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.6835089894606323, | |
| "grad_norm": 6.342654705047607, | |
| "learning_rate": 1.3166460012399259e-05, | |
| "loss": 0.7688, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.685058896466212, | |
| "grad_norm": 3.4577929973602295, | |
| "learning_rate": 1.315096094234346e-05, | |
| "loss": 0.5437, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.6866088034717917, | |
| "grad_norm": 3.24524188041687, | |
| "learning_rate": 1.3135461872287665e-05, | |
| "loss": 0.7577, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.6881587104773713, | |
| "grad_norm": 6.469138145446777, | |
| "learning_rate": 1.3119962802231868e-05, | |
| "loss": 0.5899, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.689708617482951, | |
| "grad_norm": 2.1648213863372803, | |
| "learning_rate": 1.310446373217607e-05, | |
| "loss": 0.6514, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.6912585244885306, | |
| "grad_norm": 5.5625762939453125, | |
| "learning_rate": 1.3088964662120274e-05, | |
| "loss": 0.8065, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.6928084314941103, | |
| "grad_norm": 9.689412117004395, | |
| "learning_rate": 1.3073465592064479e-05, | |
| "loss": 0.6607, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.69435833849969, | |
| "grad_norm": 3.1396737098693848, | |
| "learning_rate": 1.305796652200868e-05, | |
| "loss": 0.6567, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.6959082455052696, | |
| "grad_norm": 4.68833589553833, | |
| "learning_rate": 1.3042467451952885e-05, | |
| "loss": 0.5374, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.6974581525108493, | |
| "grad_norm": 3.161306619644165, | |
| "learning_rate": 1.3026968381897088e-05, | |
| "loss": 0.6759, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.699008059516429, | |
| "grad_norm": 5.765093803405762, | |
| "learning_rate": 1.3011469311841291e-05, | |
| "loss": 0.6232, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.7005579665220086, | |
| "grad_norm": 2.2126271724700928, | |
| "learning_rate": 1.2995970241785494e-05, | |
| "loss": 0.567, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.7021078735275883, | |
| "grad_norm": 6.046818256378174, | |
| "learning_rate": 1.2980471171729699e-05, | |
| "loss": 0.8203, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.703657780533168, | |
| "grad_norm": 9.789670944213867, | |
| "learning_rate": 1.29649721016739e-05, | |
| "loss": 0.7596, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.7052076875387476, | |
| "grad_norm": 5.565737724304199, | |
| "learning_rate": 1.2949473031618105e-05, | |
| "loss": 0.7163, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.7067575945443273, | |
| "grad_norm": 5.0460591316223145, | |
| "learning_rate": 1.2933973961562308e-05, | |
| "loss": 0.464, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.708307501549907, | |
| "grad_norm": 3.645411252975464, | |
| "learning_rate": 1.2918474891506511e-05, | |
| "loss": 0.9075, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.7098574085554866, | |
| "grad_norm": 12.882902145385742, | |
| "learning_rate": 1.2902975821450714e-05, | |
| "loss": 0.7601, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.7114073155610663, | |
| "grad_norm": 7.9908881187438965, | |
| "learning_rate": 1.2887476751394919e-05, | |
| "loss": 0.3679, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.712957222566646, | |
| "grad_norm": 5.731966972351074, | |
| "learning_rate": 1.287197768133912e-05, | |
| "loss": 0.5739, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.7145071295722256, | |
| "grad_norm": 7.825245380401611, | |
| "learning_rate": 1.2856478611283325e-05, | |
| "loss": 0.8259, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.7160570365778053, | |
| "grad_norm": 4.170419216156006, | |
| "learning_rate": 1.2840979541227528e-05, | |
| "loss": 0.6523, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.717606943583385, | |
| "grad_norm": 6.501009464263916, | |
| "learning_rate": 1.2825480471171731e-05, | |
| "loss": 0.7955, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.7191568505889646, | |
| "grad_norm": 5.350160598754883, | |
| "learning_rate": 1.2809981401115934e-05, | |
| "loss": 0.506, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.7207067575945443, | |
| "grad_norm": 5.508749485015869, | |
| "learning_rate": 1.2794482331060139e-05, | |
| "loss": 0.6531, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.7222566646001239, | |
| "grad_norm": 3.685305595397949, | |
| "learning_rate": 1.277898326100434e-05, | |
| "loss": 0.7643, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.7238065716057036, | |
| "grad_norm": 4.835994720458984, | |
| "learning_rate": 1.2763484190948545e-05, | |
| "loss": 0.7906, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.7253564786112833, | |
| "grad_norm": 4.646899700164795, | |
| "learning_rate": 1.2747985120892748e-05, | |
| "loss": 0.8304, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.7269063856168629, | |
| "grad_norm": 8.185405731201172, | |
| "learning_rate": 1.2732486050836951e-05, | |
| "loss": 0.8615, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.7284562926224426, | |
| "grad_norm": 4.894402980804443, | |
| "learning_rate": 1.2716986980781154e-05, | |
| "loss": 0.7577, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.7300061996280223, | |
| "grad_norm": 6.026183128356934, | |
| "learning_rate": 1.2701487910725359e-05, | |
| "loss": 0.6623, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.7315561066336019, | |
| "grad_norm": 8.507851600646973, | |
| "learning_rate": 1.268598884066956e-05, | |
| "loss": 0.6662, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.7331060136391816, | |
| "grad_norm": 6.706274032592773, | |
| "learning_rate": 1.2670489770613765e-05, | |
| "loss": 0.8706, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.7346559206447613, | |
| "grad_norm": 9.545132637023926, | |
| "learning_rate": 1.2654990700557968e-05, | |
| "loss": 0.4978, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.7362058276503409, | |
| "grad_norm": 8.161598205566406, | |
| "learning_rate": 1.2639491630502171e-05, | |
| "loss": 0.7005, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.7377557346559206, | |
| "grad_norm": 7.0445356369018555, | |
| "learning_rate": 1.2623992560446374e-05, | |
| "loss": 0.8165, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.7393056416615003, | |
| "grad_norm": 4.3169965744018555, | |
| "learning_rate": 1.2608493490390579e-05, | |
| "loss": 0.6663, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.7408555486670799, | |
| "grad_norm": 3.8627138137817383, | |
| "learning_rate": 1.259299442033478e-05, | |
| "loss": 0.7779, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.7424054556726596, | |
| "grad_norm": 7.226226806640625, | |
| "learning_rate": 1.2577495350278985e-05, | |
| "loss": 0.7438, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.7439553626782393, | |
| "grad_norm": 5.355091094970703, | |
| "learning_rate": 1.2561996280223188e-05, | |
| "loss": 0.4383, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.7455052696838189, | |
| "grad_norm": 9.102039337158203, | |
| "learning_rate": 1.2546497210167391e-05, | |
| "loss": 0.6852, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.7470551766893986, | |
| "grad_norm": 3.6771786212921143, | |
| "learning_rate": 1.2530998140111594e-05, | |
| "loss": 0.5601, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.7486050836949782, | |
| "grad_norm": 7.723818302154541, | |
| "learning_rate": 1.25154990700558e-05, | |
| "loss": 0.7773, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.750154990700558, | |
| "grad_norm": 11.004088401794434, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.7228, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.7517048977061377, | |
| "grad_norm": 6.969561576843262, | |
| "learning_rate": 1.2484500929944204e-05, | |
| "loss": 0.8263, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.7532548047117174, | |
| "grad_norm": 7.78557014465332, | |
| "learning_rate": 1.2469001859888408e-05, | |
| "loss": 0.5446, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.754804711717297, | |
| "grad_norm": 5.880771636962891, | |
| "learning_rate": 1.245350278983261e-05, | |
| "loss": 0.581, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.7563546187228767, | |
| "grad_norm": 3.1848342418670654, | |
| "learning_rate": 1.2438003719776815e-05, | |
| "loss": 0.7058, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.7579045257284563, | |
| "grad_norm": 3.6056909561157227, | |
| "learning_rate": 1.2422504649721016e-05, | |
| "loss": 0.6554, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.759454432734036, | |
| "grad_norm": 7.92929220199585, | |
| "learning_rate": 1.240700557966522e-05, | |
| "loss": 0.821, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.7610043397396157, | |
| "grad_norm": 9.471344947814941, | |
| "learning_rate": 1.2391506509609424e-05, | |
| "loss": 0.7723, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.7625542467451953, | |
| "grad_norm": 2.8073582649230957, | |
| "learning_rate": 1.2376007439553629e-05, | |
| "loss": 0.6256, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.764104153750775, | |
| "grad_norm": 4.1729416847229, | |
| "learning_rate": 1.236050836949783e-05, | |
| "loss": 0.6186, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.7656540607563547, | |
| "grad_norm": 9.117597579956055, | |
| "learning_rate": 1.2345009299442035e-05, | |
| "loss": 0.6793, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.7672039677619343, | |
| "grad_norm": 6.196779251098633, | |
| "learning_rate": 1.2329510229386236e-05, | |
| "loss": 0.6492, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.768753874767514, | |
| "grad_norm": 7.226120948791504, | |
| "learning_rate": 1.231401115933044e-05, | |
| "loss": 0.5859, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.7703037817730937, | |
| "grad_norm": 4.2936811447143555, | |
| "learning_rate": 1.2298512089274644e-05, | |
| "loss": 0.535, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.7718536887786733, | |
| "grad_norm": 8.011672973632812, | |
| "learning_rate": 1.2283013019218849e-05, | |
| "loss": 0.5563, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.773403595784253, | |
| "grad_norm": 6.809650421142578, | |
| "learning_rate": 1.226751394916305e-05, | |
| "loss": 0.8006, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.7749535027898327, | |
| "grad_norm": 3.524944305419922, | |
| "learning_rate": 1.2252014879107255e-05, | |
| "loss": 0.8, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.7765034097954123, | |
| "grad_norm": 6.521675109863281, | |
| "learning_rate": 1.2236515809051456e-05, | |
| "loss": 0.6234, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.778053316800992, | |
| "grad_norm": 4.332557201385498, | |
| "learning_rate": 1.2221016738995661e-05, | |
| "loss": 0.6875, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.7796032238065717, | |
| "grad_norm": 6.0071001052856445, | |
| "learning_rate": 1.2205517668939864e-05, | |
| "loss": 0.6597, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.7811531308121513, | |
| "grad_norm": 7.195069789886475, | |
| "learning_rate": 1.2190018598884069e-05, | |
| "loss": 0.58, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.782703037817731, | |
| "grad_norm": 5.996945858001709, | |
| "learning_rate": 1.217451952882827e-05, | |
| "loss": 0.6546, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.7842529448233106, | |
| "grad_norm": 5.592831134796143, | |
| "learning_rate": 1.2159020458772475e-05, | |
| "loss": 0.4924, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.7858028518288903, | |
| "grad_norm": 6.841360569000244, | |
| "learning_rate": 1.2143521388716676e-05, | |
| "loss": 0.4969, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.78735275883447, | |
| "grad_norm": 4.553008556365967, | |
| "learning_rate": 1.2128022318660881e-05, | |
| "loss": 0.6957, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.7889026658400496, | |
| "grad_norm": 6.283394813537598, | |
| "learning_rate": 1.2112523248605084e-05, | |
| "loss": 0.8347, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.7904525728456293, | |
| "grad_norm": 10.103006362915039, | |
| "learning_rate": 1.2097024178549289e-05, | |
| "loss": 0.7692, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.792002479851209, | |
| "grad_norm": 7.552977085113525, | |
| "learning_rate": 1.208152510849349e-05, | |
| "loss": 0.5701, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.7935523868567886, | |
| "grad_norm": 5.834062099456787, | |
| "learning_rate": 1.2066026038437695e-05, | |
| "loss": 0.5668, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.7951022938623683, | |
| "grad_norm": 13.34677791595459, | |
| "learning_rate": 1.2050526968381896e-05, | |
| "loss": 0.8795, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.796652200867948, | |
| "grad_norm": 3.1590523719787598, | |
| "learning_rate": 1.2035027898326101e-05, | |
| "loss": 0.681, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.7982021078735276, | |
| "grad_norm": 10.1262845993042, | |
| "learning_rate": 1.2019528828270304e-05, | |
| "loss": 0.6726, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.7997520148791073, | |
| "grad_norm": 6.833702564239502, | |
| "learning_rate": 1.2004029758214509e-05, | |
| "loss": 0.6204, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.801301921884687, | |
| "grad_norm": 3.4104177951812744, | |
| "learning_rate": 1.198853068815871e-05, | |
| "loss": 0.4738, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.8028518288902666, | |
| "grad_norm": 3.8799777030944824, | |
| "learning_rate": 1.1973031618102915e-05, | |
| "loss": 0.5993, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.8044017358958463, | |
| "grad_norm": 3.472444772720337, | |
| "learning_rate": 1.1957532548047116e-05, | |
| "loss": 0.6978, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.805951642901426, | |
| "grad_norm": 9.026004791259766, | |
| "learning_rate": 1.1942033477991321e-05, | |
| "loss": 0.864, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.8075015499070056, | |
| "grad_norm": 5.485747814178467, | |
| "learning_rate": 1.1926534407935524e-05, | |
| "loss": 0.6341, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.8090514569125853, | |
| "grad_norm": 7.51614236831665, | |
| "learning_rate": 1.1911035337879729e-05, | |
| "loss": 0.8633, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.810601363918165, | |
| "grad_norm": 4.139168739318848, | |
| "learning_rate": 1.189553626782393e-05, | |
| "loss": 0.6028, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.8121512709237446, | |
| "grad_norm": 6.211816310882568, | |
| "learning_rate": 1.1880037197768135e-05, | |
| "loss": 0.6971, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.8137011779293243, | |
| "grad_norm": 5.013968467712402, | |
| "learning_rate": 1.1864538127712336e-05, | |
| "loss": 0.7027, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.815251084934904, | |
| "grad_norm": 5.569365978240967, | |
| "learning_rate": 1.1849039057656541e-05, | |
| "loss": 0.6832, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.8168009919404836, | |
| "grad_norm": 10.435909271240234, | |
| "learning_rate": 1.1833539987600744e-05, | |
| "loss": 0.6241, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.8183508989460633, | |
| "grad_norm": 5.697261333465576, | |
| "learning_rate": 1.1818040917544949e-05, | |
| "loss": 0.747, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.8199008059516429, | |
| "grad_norm": 1.861268401145935, | |
| "learning_rate": 1.180254184748915e-05, | |
| "loss": 0.4611, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.8214507129572226, | |
| "grad_norm": 4.731881141662598, | |
| "learning_rate": 1.1787042777433355e-05, | |
| "loss": 0.747, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.8230006199628023, | |
| "grad_norm": 3.6312947273254395, | |
| "learning_rate": 1.1771543707377557e-05, | |
| "loss": 0.7208, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.8245505269683819, | |
| "grad_norm": 4.972537517547607, | |
| "learning_rate": 1.1756044637321761e-05, | |
| "loss": 0.6674, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.8261004339739616, | |
| "grad_norm": 7.811915874481201, | |
| "learning_rate": 1.1740545567265964e-05, | |
| "loss": 0.8836, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.8276503409795413, | |
| "grad_norm": 6.216588497161865, | |
| "learning_rate": 1.172504649721017e-05, | |
| "loss": 0.5343, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.8292002479851209, | |
| "grad_norm": 4.911030292510986, | |
| "learning_rate": 1.170954742715437e-05, | |
| "loss": 0.8264, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.8307501549907006, | |
| "grad_norm": 1.2918230295181274, | |
| "learning_rate": 1.1694048357098575e-05, | |
| "loss": 0.5548, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.8323000619962803, | |
| "grad_norm": 6.884954929351807, | |
| "learning_rate": 1.1678549287042777e-05, | |
| "loss": 0.536, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.8338499690018599, | |
| "grad_norm": 9.084111213684082, | |
| "learning_rate": 1.1663050216986981e-05, | |
| "loss": 0.5337, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.8353998760074396, | |
| "grad_norm": 5.208802700042725, | |
| "learning_rate": 1.1647551146931185e-05, | |
| "loss": 0.5921, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 0.8369497830130193, | |
| "grad_norm": 9.151023864746094, | |
| "learning_rate": 1.163205207687539e-05, | |
| "loss": 0.708, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.8384996900185989, | |
| "grad_norm": 5.111368656158447, | |
| "learning_rate": 1.161655300681959e-05, | |
| "loss": 0.6403, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 0.8400495970241786, | |
| "grad_norm": 10.290874481201172, | |
| "learning_rate": 1.1601053936763795e-05, | |
| "loss": 0.8278, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.8415995040297582, | |
| "grad_norm": 7.0446577072143555, | |
| "learning_rate": 1.1585554866707997e-05, | |
| "loss": 0.6219, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.8431494110353379, | |
| "grad_norm": 3.362149238586426, | |
| "learning_rate": 1.1570055796652202e-05, | |
| "loss": 0.4145, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.8446993180409176, | |
| "grad_norm": 10.284631729125977, | |
| "learning_rate": 1.1554556726596405e-05, | |
| "loss": 0.7615, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.8462492250464972, | |
| "grad_norm": 8.443507194519043, | |
| "learning_rate": 1.153905765654061e-05, | |
| "loss": 0.5863, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.8477991320520769, | |
| "grad_norm": 10.936263084411621, | |
| "learning_rate": 1.152355858648481e-05, | |
| "loss": 0.5876, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 0.8493490390576566, | |
| "grad_norm": 3.9119393825531006, | |
| "learning_rate": 1.1508059516429015e-05, | |
| "loss": 0.5987, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.8508989460632362, | |
| "grad_norm": 12.641061782836914, | |
| "learning_rate": 1.1492560446373217e-05, | |
| "loss": 0.7522, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 0.8524488530688159, | |
| "grad_norm": 5.811850070953369, | |
| "learning_rate": 1.1477061376317422e-05, | |
| "loss": 0.5882, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.8539987600743956, | |
| "grad_norm": 12.124834060668945, | |
| "learning_rate": 1.1461562306261625e-05, | |
| "loss": 0.7449, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 0.8555486670799752, | |
| "grad_norm": 5.387477874755859, | |
| "learning_rate": 1.144606323620583e-05, | |
| "loss": 0.6186, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.8570985740855549, | |
| "grad_norm": 7.7974677085876465, | |
| "learning_rate": 1.143056416615003e-05, | |
| "loss": 0.6465, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 0.8586484810911346, | |
| "grad_norm": 7.694963455200195, | |
| "learning_rate": 1.1415065096094236e-05, | |
| "loss": 0.6354, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 0.8601983880967142, | |
| "grad_norm": 2.7126150131225586, | |
| "learning_rate": 1.1399566026038437e-05, | |
| "loss": 0.648, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.8617482951022939, | |
| "grad_norm": 5.220517635345459, | |
| "learning_rate": 1.1384066955982642e-05, | |
| "loss": 0.7159, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 0.8632982021078736, | |
| "grad_norm": 3.2614328861236572, | |
| "learning_rate": 1.1368567885926845e-05, | |
| "loss": 0.6143, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 0.8648481091134532, | |
| "grad_norm": 7.20950174331665, | |
| "learning_rate": 1.135306881587105e-05, | |
| "loss": 0.6482, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.8663980161190329, | |
| "grad_norm": 5.191160678863525, | |
| "learning_rate": 1.1337569745815251e-05, | |
| "loss": 0.6001, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 0.8679479231246126, | |
| "grad_norm": 4.177618026733398, | |
| "learning_rate": 1.1322070675759456e-05, | |
| "loss": 0.5531, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.8694978301301922, | |
| "grad_norm": 9.273628234863281, | |
| "learning_rate": 1.1306571605703657e-05, | |
| "loss": 0.7717, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 0.8710477371357719, | |
| "grad_norm": 8.972816467285156, | |
| "learning_rate": 1.1291072535647862e-05, | |
| "loss": 0.6219, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 0.8725976441413515, | |
| "grad_norm": 9.480229377746582, | |
| "learning_rate": 1.1275573465592065e-05, | |
| "loss": 0.5561, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 0.8741475511469312, | |
| "grad_norm": 5.781515121459961, | |
| "learning_rate": 1.126007439553627e-05, | |
| "loss": 0.688, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.8756974581525109, | |
| "grad_norm": 1.9353567361831665, | |
| "learning_rate": 1.1244575325480471e-05, | |
| "loss": 0.8956, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.8772473651580905, | |
| "grad_norm": 4.833968639373779, | |
| "learning_rate": 1.1229076255424676e-05, | |
| "loss": 0.8678, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 0.8787972721636702, | |
| "grad_norm": 4.125862121582031, | |
| "learning_rate": 1.1213577185368877e-05, | |
| "loss": 0.6416, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 0.8803471791692499, | |
| "grad_norm": 5.829898834228516, | |
| "learning_rate": 1.1198078115313082e-05, | |
| "loss": 0.7404, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 0.8818970861748295, | |
| "grad_norm": 3.0239686965942383, | |
| "learning_rate": 1.1182579045257285e-05, | |
| "loss": 0.612, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 0.8834469931804092, | |
| "grad_norm": 6.91836404800415, | |
| "learning_rate": 1.116707997520149e-05, | |
| "loss": 0.7713, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.8849969001859889, | |
| "grad_norm": 3.822946548461914, | |
| "learning_rate": 1.1151580905145691e-05, | |
| "loss": 0.5584, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 0.8865468071915685, | |
| "grad_norm": 7.173694610595703, | |
| "learning_rate": 1.1136081835089896e-05, | |
| "loss": 0.6347, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 0.8880967141971482, | |
| "grad_norm": 5.30385160446167, | |
| "learning_rate": 1.1120582765034097e-05, | |
| "loss": 0.652, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 0.8896466212027279, | |
| "grad_norm": 5.35252571105957, | |
| "learning_rate": 1.1105083694978302e-05, | |
| "loss": 0.718, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 0.8911965282083075, | |
| "grad_norm": 5.5714850425720215, | |
| "learning_rate": 1.1089584624922505e-05, | |
| "loss": 0.7949, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.8927464352138872, | |
| "grad_norm": 1.9642515182495117, | |
| "learning_rate": 1.107408555486671e-05, | |
| "loss": 0.5296, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.8942963422194669, | |
| "grad_norm": 5.072615623474121, | |
| "learning_rate": 1.1058586484810911e-05, | |
| "loss": 0.5537, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 0.8958462492250465, | |
| "grad_norm": 3.3164658546447754, | |
| "learning_rate": 1.1043087414755116e-05, | |
| "loss": 0.6519, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 0.8973961562306262, | |
| "grad_norm": 7.427783489227295, | |
| "learning_rate": 1.1027588344699317e-05, | |
| "loss": 0.7408, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 0.8989460632362059, | |
| "grad_norm": 5.606306076049805, | |
| "learning_rate": 1.1012089274643522e-05, | |
| "loss": 0.7276, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.9004959702417855, | |
| "grad_norm": 10.554597854614258, | |
| "learning_rate": 1.0996590204587725e-05, | |
| "loss": 0.8125, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 0.9020458772473652, | |
| "grad_norm": 4.86166524887085, | |
| "learning_rate": 1.098109113453193e-05, | |
| "loss": 0.5918, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.9035957842529448, | |
| "grad_norm": 7.691864013671875, | |
| "learning_rate": 1.0965592064476131e-05, | |
| "loss": 0.7419, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 0.9051456912585245, | |
| "grad_norm": 7.993322849273682, | |
| "learning_rate": 1.0950092994420336e-05, | |
| "loss": 0.7957, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 0.9066955982641042, | |
| "grad_norm": 5.241565227508545, | |
| "learning_rate": 1.0934593924364537e-05, | |
| "loss": 0.603, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.9082455052696838, | |
| "grad_norm": 3.7309389114379883, | |
| "learning_rate": 1.0919094854308742e-05, | |
| "loss": 0.5684, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 0.9097954122752635, | |
| "grad_norm": 6.568719387054443, | |
| "learning_rate": 1.0903595784252945e-05, | |
| "loss": 0.5626, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 0.9113453192808432, | |
| "grad_norm": 12.020549774169922, | |
| "learning_rate": 1.088809671419715e-05, | |
| "loss": 0.6653, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 0.9128952262864228, | |
| "grad_norm": 5.109013557434082, | |
| "learning_rate": 1.0872597644141351e-05, | |
| "loss": 0.6933, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 0.9144451332920025, | |
| "grad_norm": 5.643796920776367, | |
| "learning_rate": 1.0857098574085556e-05, | |
| "loss": 0.5707, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.9159950402975822, | |
| "grad_norm": 8.789315223693848, | |
| "learning_rate": 1.0841599504029758e-05, | |
| "loss": 0.5212, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 0.9175449473031618, | |
| "grad_norm": 7.760068893432617, | |
| "learning_rate": 1.0826100433973962e-05, | |
| "loss": 0.618, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 0.9190948543087415, | |
| "grad_norm": 6.435039520263672, | |
| "learning_rate": 1.0810601363918165e-05, | |
| "loss": 0.622, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 0.9206447613143212, | |
| "grad_norm": 8.1256685256958, | |
| "learning_rate": 1.079510229386237e-05, | |
| "loss": 0.7558, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 0.9221946683199008, | |
| "grad_norm": 6.529952049255371, | |
| "learning_rate": 1.0779603223806571e-05, | |
| "loss": 0.6672, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.9237445753254805, | |
| "grad_norm": 7.354854583740234, | |
| "learning_rate": 1.0764104153750776e-05, | |
| "loss": 0.7919, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 0.9252944823310602, | |
| "grad_norm": 4.898510456085205, | |
| "learning_rate": 1.0748605083694978e-05, | |
| "loss": 0.6045, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 0.9268443893366398, | |
| "grad_norm": 13.176275253295898, | |
| "learning_rate": 1.0733106013639182e-05, | |
| "loss": 0.6485, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 0.9283942963422195, | |
| "grad_norm": 8.560967445373535, | |
| "learning_rate": 1.0717606943583385e-05, | |
| "loss": 0.8658, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 0.9299442033477991, | |
| "grad_norm": 5.788320541381836, | |
| "learning_rate": 1.070210787352759e-05, | |
| "loss": 0.7069, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.9314941103533788, | |
| "grad_norm": 9.707194328308105, | |
| "learning_rate": 1.0686608803471792e-05, | |
| "loss": 0.5805, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 0.9330440173589585, | |
| "grad_norm": 4.44306755065918, | |
| "learning_rate": 1.0671109733415996e-05, | |
| "loss": 0.6697, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 0.9345939243645381, | |
| "grad_norm": 10.93558120727539, | |
| "learning_rate": 1.0655610663360198e-05, | |
| "loss": 0.5436, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 0.9361438313701178, | |
| "grad_norm": 5.45318603515625, | |
| "learning_rate": 1.0640111593304402e-05, | |
| "loss": 0.4397, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 0.9376937383756975, | |
| "grad_norm": 7.893631458282471, | |
| "learning_rate": 1.0624612523248606e-05, | |
| "loss": 0.6742, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.9392436453812771, | |
| "grad_norm": 3.9682693481445312, | |
| "learning_rate": 1.060911345319281e-05, | |
| "loss": 0.75, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 0.9407935523868568, | |
| "grad_norm": 9.17393684387207, | |
| "learning_rate": 1.0593614383137012e-05, | |
| "loss": 0.6765, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 0.9423434593924365, | |
| "grad_norm": 6.118794918060303, | |
| "learning_rate": 1.0578115313081216e-05, | |
| "loss": 0.794, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 0.9438933663980161, | |
| "grad_norm": 5.624363422393799, | |
| "learning_rate": 1.056261624302542e-05, | |
| "loss": 0.7256, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 0.9454432734035958, | |
| "grad_norm": 4.654599666595459, | |
| "learning_rate": 1.0547117172969623e-05, | |
| "loss": 0.5472, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.9469931804091755, | |
| "grad_norm": 3.4627890586853027, | |
| "learning_rate": 1.0531618102913826e-05, | |
| "loss": 0.787, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 0.9485430874147551, | |
| "grad_norm": 4.292537212371826, | |
| "learning_rate": 1.051611903285803e-05, | |
| "loss": 0.719, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 0.9500929944203348, | |
| "grad_norm": 8.54881477355957, | |
| "learning_rate": 1.0500619962802232e-05, | |
| "loss": 0.5448, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 0.9516429014259145, | |
| "grad_norm": 6.626066207885742, | |
| "learning_rate": 1.0485120892746437e-05, | |
| "loss": 0.7148, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 0.9531928084314941, | |
| "grad_norm": 5.619740009307861, | |
| "learning_rate": 1.046962182269064e-05, | |
| "loss": 0.9223, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.9547427154370738, | |
| "grad_norm": 5.610943794250488, | |
| "learning_rate": 1.0454122752634843e-05, | |
| "loss": 0.7121, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 0.9562926224426535, | |
| "grad_norm": 4.307107925415039, | |
| "learning_rate": 1.0438623682579046e-05, | |
| "loss": 0.7286, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 0.9578425294482331, | |
| "grad_norm": 9.234688758850098, | |
| "learning_rate": 1.042312461252325e-05, | |
| "loss": 0.5873, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 0.9593924364538128, | |
| "grad_norm": 4.530324935913086, | |
| "learning_rate": 1.0407625542467452e-05, | |
| "loss": 0.941, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 0.9609423434593924, | |
| "grad_norm": 3.6115729808807373, | |
| "learning_rate": 1.0392126472411657e-05, | |
| "loss": 0.655, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.9624922504649721, | |
| "grad_norm": 1.2821598052978516, | |
| "learning_rate": 1.037662740235586e-05, | |
| "loss": 0.6786, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 0.9640421574705518, | |
| "grad_norm": 6.109910011291504, | |
| "learning_rate": 1.0361128332300063e-05, | |
| "loss": 0.7574, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 0.9655920644761314, | |
| "grad_norm": 3.653637409210205, | |
| "learning_rate": 1.0345629262244266e-05, | |
| "loss": 0.5425, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 0.9671419714817111, | |
| "grad_norm": 10.726699829101562, | |
| "learning_rate": 1.033013019218847e-05, | |
| "loss": 0.7053, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 0.9686918784872908, | |
| "grad_norm": 4.3392863273620605, | |
| "learning_rate": 1.0314631122132672e-05, | |
| "loss": 0.5929, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.9702417854928704, | |
| "grad_norm": 4.076502323150635, | |
| "learning_rate": 1.0299132052076877e-05, | |
| "loss": 0.6403, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 0.9717916924984501, | |
| "grad_norm": 2.983644485473633, | |
| "learning_rate": 1.028363298202108e-05, | |
| "loss": 0.4164, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 0.9733415995040298, | |
| "grad_norm": 3.6795578002929688, | |
| "learning_rate": 1.0268133911965283e-05, | |
| "loss": 0.7013, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 0.9748915065096094, | |
| "grad_norm": 5.60479736328125, | |
| "learning_rate": 1.0252634841909486e-05, | |
| "loss": 1.0202, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 0.9764414135151891, | |
| "grad_norm": 5.775755882263184, | |
| "learning_rate": 1.023713577185369e-05, | |
| "loss": 0.6361, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.9779913205207688, | |
| "grad_norm": 5.529523849487305, | |
| "learning_rate": 1.0221636701797892e-05, | |
| "loss": 0.7928, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 0.9795412275263484, | |
| "grad_norm": 6.792111396789551, | |
| "learning_rate": 1.0206137631742097e-05, | |
| "loss": 0.5417, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 0.9810911345319281, | |
| "grad_norm": 8.089174270629883, | |
| "learning_rate": 1.01906385616863e-05, | |
| "loss": 0.4928, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 0.9826410415375078, | |
| "grad_norm": 4.665356636047363, | |
| "learning_rate": 1.0175139491630503e-05, | |
| "loss": 0.6717, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 0.9841909485430874, | |
| "grad_norm": 6.834210395812988, | |
| "learning_rate": 1.0159640421574706e-05, | |
| "loss": 0.9058, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.9857408555486671, | |
| "grad_norm": 3.173269748687744, | |
| "learning_rate": 1.014414135151891e-05, | |
| "loss": 0.5223, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 0.9872907625542467, | |
| "grad_norm": 5.208651065826416, | |
| "learning_rate": 1.0128642281463112e-05, | |
| "loss": 0.6229, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 0.9888406695598264, | |
| "grad_norm": 4.102089881896973, | |
| "learning_rate": 1.0113143211407317e-05, | |
| "loss": 0.7312, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 0.9903905765654061, | |
| "grad_norm": 3.437283754348755, | |
| "learning_rate": 1.009764414135152e-05, | |
| "loss": 0.6693, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 0.9919404835709857, | |
| "grad_norm": 2.8786211013793945, | |
| "learning_rate": 1.0082145071295723e-05, | |
| "loss": 0.5546, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.9934903905765654, | |
| "grad_norm": 4.383023262023926, | |
| "learning_rate": 1.0066646001239926e-05, | |
| "loss": 0.6212, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 0.9950402975821451, | |
| "grad_norm": 9.466085433959961, | |
| "learning_rate": 1.0051146931184131e-05, | |
| "loss": 0.7268, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 0.9965902045877247, | |
| "grad_norm": 6.23153018951416, | |
| "learning_rate": 1.0035647861128332e-05, | |
| "loss": 0.6134, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 0.9981401115933044, | |
| "grad_norm": 5.540073394775391, | |
| "learning_rate": 1.0020148791072537e-05, | |
| "loss": 0.5933, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 0.9996900185988841, | |
| "grad_norm": 3.962766170501709, | |
| "learning_rate": 1.000464972101674e-05, | |
| "loss": 0.5341, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.3304628632938644, | |
| "eval_loss": 1.8274908065795898, | |
| "eval_runtime": 74.0511, | |
| "eval_samples_per_second": 25.091, | |
| "eval_steps_per_second": 3.146, | |
| "step": 6452 | |
| }, | |
| { | |
| "epoch": 1.0012399256044637, | |
| "grad_norm": 3.318366050720215, | |
| "learning_rate": 9.989150650960943e-06, | |
| "loss": 0.6346, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 1.0027898326100435, | |
| "grad_norm": 7.832874298095703, | |
| "learning_rate": 9.973651580905146e-06, | |
| "loss": 0.6954, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 1.004339739615623, | |
| "grad_norm": 1.6306357383728027, | |
| "learning_rate": 9.95815251084935e-06, | |
| "loss": 0.5244, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 1.0058896466212028, | |
| "grad_norm": 6.60695743560791, | |
| "learning_rate": 9.942653440793552e-06, | |
| "loss": 0.5221, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 1.0074395536267824, | |
| "grad_norm": 7.101430416107178, | |
| "learning_rate": 9.927154370737755e-06, | |
| "loss": 0.8363, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.0089894606323622, | |
| "grad_norm": 2.7308666706085205, | |
| "learning_rate": 9.91165530068196e-06, | |
| "loss": 0.497, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 1.0105393676379417, | |
| "grad_norm": 7.153020858764648, | |
| "learning_rate": 9.896156230626163e-06, | |
| "loss": 0.733, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 1.0120892746435215, | |
| "grad_norm": 8.50242805480957, | |
| "learning_rate": 9.880657160570366e-06, | |
| "loss": 0.6382, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 1.013639181649101, | |
| "grad_norm": 3.3164360523223877, | |
| "learning_rate": 9.86515809051457e-06, | |
| "loss": 0.4422, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 1.0151890886546808, | |
| "grad_norm": 9.223011016845703, | |
| "learning_rate": 9.849659020458772e-06, | |
| "loss": 0.7345, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 1.0167389956602604, | |
| "grad_norm": 2.469008207321167, | |
| "learning_rate": 9.834159950402975e-06, | |
| "loss": 0.5223, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 1.0182889026658402, | |
| "grad_norm": 8.593854904174805, | |
| "learning_rate": 9.81866088034718e-06, | |
| "loss": 0.6986, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 1.0198388096714197, | |
| "grad_norm": 2.7528738975524902, | |
| "learning_rate": 9.803161810291383e-06, | |
| "loss": 0.5359, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 1.0213887166769995, | |
| "grad_norm": 8.594300270080566, | |
| "learning_rate": 9.787662740235586e-06, | |
| "loss": 0.489, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 1.022938623682579, | |
| "grad_norm": 10.482670783996582, | |
| "learning_rate": 9.77216367017979e-06, | |
| "loss": 0.5833, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.0244885306881588, | |
| "grad_norm": 7.439445495605469, | |
| "learning_rate": 9.756664600123993e-06, | |
| "loss": 0.4675, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 1.0260384376937384, | |
| "grad_norm": 5.366421699523926, | |
| "learning_rate": 9.741165530068196e-06, | |
| "loss": 0.5947, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 1.0275883446993181, | |
| "grad_norm": 7.488732814788818, | |
| "learning_rate": 9.7256664600124e-06, | |
| "loss": 0.609, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 1.0291382517048977, | |
| "grad_norm": 6.010669231414795, | |
| "learning_rate": 9.710167389956603e-06, | |
| "loss": 0.7328, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 1.0306881587104775, | |
| "grad_norm": 5.864341735839844, | |
| "learning_rate": 9.694668319900806e-06, | |
| "loss": 0.4796, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 1.032238065716057, | |
| "grad_norm": 9.411959648132324, | |
| "learning_rate": 9.67916924984501e-06, | |
| "loss": 0.4709, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 1.0337879727216368, | |
| "grad_norm": 3.1275575160980225, | |
| "learning_rate": 9.663670179789213e-06, | |
| "loss": 0.4787, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 1.0353378797272164, | |
| "grad_norm": 6.15601110458374, | |
| "learning_rate": 9.648171109733416e-06, | |
| "loss": 0.5337, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 1.0368877867327961, | |
| "grad_norm": 5.146254062652588, | |
| "learning_rate": 9.63267203967762e-06, | |
| "loss": 0.5042, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 1.0384376937383757, | |
| "grad_norm": 7.776717662811279, | |
| "learning_rate": 9.617172969621824e-06, | |
| "loss": 0.8024, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.0399876007439555, | |
| "grad_norm": 7.797553539276123, | |
| "learning_rate": 9.601673899566027e-06, | |
| "loss": 0.5992, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 1.041537507749535, | |
| "grad_norm": 6.587271213531494, | |
| "learning_rate": 9.58617482951023e-06, | |
| "loss": 0.5811, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 1.0430874147551148, | |
| "grad_norm": 8.45751667022705, | |
| "learning_rate": 9.570675759454433e-06, | |
| "loss": 0.7937, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 1.0446373217606943, | |
| "grad_norm": 6.197624206542969, | |
| "learning_rate": 9.555176689398636e-06, | |
| "loss": 0.5891, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 1.0461872287662741, | |
| "grad_norm": 5.508509159088135, | |
| "learning_rate": 9.53967761934284e-06, | |
| "loss": 0.4753, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.0477371357718537, | |
| "grad_norm": 4.740791320800781, | |
| "learning_rate": 9.524178549287044e-06, | |
| "loss": 0.6337, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 1.0492870427774335, | |
| "grad_norm": 5.38060188293457, | |
| "learning_rate": 9.508679479231247e-06, | |
| "loss": 0.4552, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 1.050836949783013, | |
| "grad_norm": 11.015870094299316, | |
| "learning_rate": 9.49318040917545e-06, | |
| "loss": 0.6587, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 1.0523868567885928, | |
| "grad_norm": 6.224707126617432, | |
| "learning_rate": 9.477681339119653e-06, | |
| "loss": 0.5701, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 1.0539367637941723, | |
| "grad_norm": 5.798990249633789, | |
| "learning_rate": 9.462182269063856e-06, | |
| "loss": 0.6514, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.0554866707997521, | |
| "grad_norm": 4.468217372894287, | |
| "learning_rate": 9.44668319900806e-06, | |
| "loss": 0.6995, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 1.0570365778053317, | |
| "grad_norm": 9.877872467041016, | |
| "learning_rate": 9.431184128952264e-06, | |
| "loss": 0.4892, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 1.0585864848109114, | |
| "grad_norm": 1.780200481414795, | |
| "learning_rate": 9.415685058896467e-06, | |
| "loss": 0.498, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 1.060136391816491, | |
| "grad_norm": 6.5555195808410645, | |
| "learning_rate": 9.40018598884067e-06, | |
| "loss": 0.4536, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 1.0616862988220708, | |
| "grad_norm": 6.608431816101074, | |
| "learning_rate": 9.384686918784873e-06, | |
| "loss": 0.5125, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 1.0632362058276503, | |
| "grad_norm": 2.031834125518799, | |
| "learning_rate": 9.369187848729076e-06, | |
| "loss": 0.4829, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 1.06478611283323, | |
| "grad_norm": 2.5182507038116455, | |
| "learning_rate": 9.35368877867328e-06, | |
| "loss": 0.5326, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 1.0663360198388097, | |
| "grad_norm": 5.3375396728515625, | |
| "learning_rate": 9.338189708617484e-06, | |
| "loss": 0.6348, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 1.0678859268443894, | |
| "grad_norm": 3.0548906326293945, | |
| "learning_rate": 9.322690638561687e-06, | |
| "loss": 0.6643, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 1.069435833849969, | |
| "grad_norm": 7.555820465087891, | |
| "learning_rate": 9.30719156850589e-06, | |
| "loss": 0.7388, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.0709857408555488, | |
| "grad_norm": 6.28872537612915, | |
| "learning_rate": 9.291692498450093e-06, | |
| "loss": 0.5822, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 1.0725356478611283, | |
| "grad_norm": 7.888534069061279, | |
| "learning_rate": 9.276193428394296e-06, | |
| "loss": 0.7131, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 1.074085554866708, | |
| "grad_norm": 5.015232563018799, | |
| "learning_rate": 9.2606943583385e-06, | |
| "loss": 0.5975, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 1.0756354618722876, | |
| "grad_norm": 3.5587122440338135, | |
| "learning_rate": 9.245195288282704e-06, | |
| "loss": 0.4989, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 1.0771853688778674, | |
| "grad_norm": 6.01540994644165, | |
| "learning_rate": 9.229696218226907e-06, | |
| "loss": 0.7066, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 1.078735275883447, | |
| "grad_norm": 4.600519180297852, | |
| "learning_rate": 9.21419714817111e-06, | |
| "loss": 0.4749, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 1.0802851828890268, | |
| "grad_norm": 3.1370503902435303, | |
| "learning_rate": 9.198698078115313e-06, | |
| "loss": 0.4503, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 1.0818350898946063, | |
| "grad_norm": 3.2662289142608643, | |
| "learning_rate": 9.183199008059516e-06, | |
| "loss": 0.4964, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 1.083384996900186, | |
| "grad_norm": 5.026987075805664, | |
| "learning_rate": 9.167699938003721e-06, | |
| "loss": 0.7086, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 1.0849349039057656, | |
| "grad_norm": 4.068531036376953, | |
| "learning_rate": 9.152200867947924e-06, | |
| "loss": 0.5591, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.0864848109113454, | |
| "grad_norm": 6.460653781890869, | |
| "learning_rate": 9.136701797892127e-06, | |
| "loss": 0.5162, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 1.088034717916925, | |
| "grad_norm": 4.980759620666504, | |
| "learning_rate": 9.12120272783633e-06, | |
| "loss": 0.6463, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 1.0895846249225047, | |
| "grad_norm": 10.006918907165527, | |
| "learning_rate": 9.105703657780533e-06, | |
| "loss": 0.5328, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 1.0911345319280843, | |
| "grad_norm": 8.877737998962402, | |
| "learning_rate": 9.090204587724736e-06, | |
| "loss": 0.7657, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 1.092684438933664, | |
| "grad_norm": 5.694168567657471, | |
| "learning_rate": 9.074705517668941e-06, | |
| "loss": 0.562, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 1.0942343459392436, | |
| "grad_norm": 3.7370598316192627, | |
| "learning_rate": 9.059206447613144e-06, | |
| "loss": 0.5905, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 1.0957842529448234, | |
| "grad_norm": 4.786223411560059, | |
| "learning_rate": 9.043707377557347e-06, | |
| "loss": 0.5709, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 1.097334159950403, | |
| "grad_norm": 3.233438014984131, | |
| "learning_rate": 9.02820830750155e-06, | |
| "loss": 0.5833, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 1.0988840669559827, | |
| "grad_norm": 4.648927688598633, | |
| "learning_rate": 9.012709237445753e-06, | |
| "loss": 0.618, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 1.1004339739615623, | |
| "grad_norm": 4.235779762268066, | |
| "learning_rate": 8.997210167389956e-06, | |
| "loss": 0.5516, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.101983880967142, | |
| "grad_norm": 12.350091934204102, | |
| "learning_rate": 8.981711097334161e-06, | |
| "loss": 0.6225, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 1.1035337879727216, | |
| "grad_norm": 2.811981678009033, | |
| "learning_rate": 8.966212027278364e-06, | |
| "loss": 0.6682, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 1.1050836949783014, | |
| "grad_norm": 6.797405242919922, | |
| "learning_rate": 8.950712957222567e-06, | |
| "loss": 0.4458, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 1.106633601983881, | |
| "grad_norm": 5.518606662750244, | |
| "learning_rate": 8.93521388716677e-06, | |
| "loss": 0.7404, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 1.1081835089894607, | |
| "grad_norm": 5.039639472961426, | |
| "learning_rate": 8.919714817110973e-06, | |
| "loss": 0.5223, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 1.1097334159950403, | |
| "grad_norm": 5.415022373199463, | |
| "learning_rate": 8.904215747055176e-06, | |
| "loss": 0.4564, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 1.11128332300062, | |
| "grad_norm": 8.877534866333008, | |
| "learning_rate": 8.888716676999381e-06, | |
| "loss": 0.5022, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 1.1128332300061996, | |
| "grad_norm": 2.6606900691986084, | |
| "learning_rate": 8.873217606943584e-06, | |
| "loss": 0.4871, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 1.1143831370117794, | |
| "grad_norm": 2.137470006942749, | |
| "learning_rate": 8.857718536887787e-06, | |
| "loss": 0.433, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 1.115933044017359, | |
| "grad_norm": 4.714048385620117, | |
| "learning_rate": 8.84221946683199e-06, | |
| "loss": 0.6005, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.1174829510229387, | |
| "grad_norm": 0.6257525086402893, | |
| "learning_rate": 8.826720396776193e-06, | |
| "loss": 0.6731, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 1.1190328580285183, | |
| "grad_norm": 6.286426544189453, | |
| "learning_rate": 8.811221326720397e-06, | |
| "loss": 0.6277, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 1.120582765034098, | |
| "grad_norm": 2.719097137451172, | |
| "learning_rate": 8.795722256664601e-06, | |
| "loss": 0.4603, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 1.1221326720396776, | |
| "grad_norm": 6.69702672958374, | |
| "learning_rate": 8.780223186608804e-06, | |
| "loss": 0.4442, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 1.1236825790452574, | |
| "grad_norm": 8.40479850769043, | |
| "learning_rate": 8.764724116553007e-06, | |
| "loss": 0.5393, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 1.125232486050837, | |
| "grad_norm": 1.8190903663635254, | |
| "learning_rate": 8.74922504649721e-06, | |
| "loss": 0.5887, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 1.1267823930564167, | |
| "grad_norm": 6.128231048583984, | |
| "learning_rate": 8.733725976441414e-06, | |
| "loss": 0.815, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 1.1283323000619963, | |
| "grad_norm": 4.16558313369751, | |
| "learning_rate": 8.718226906385617e-06, | |
| "loss": 0.374, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 1.129882207067576, | |
| "grad_norm": 3.754733085632324, | |
| "learning_rate": 8.702727836329821e-06, | |
| "loss": 0.5951, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 1.1314321140731556, | |
| "grad_norm": 4.329035758972168, | |
| "learning_rate": 8.687228766274024e-06, | |
| "loss": 0.5914, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.1329820210787354, | |
| "grad_norm": 8.114266395568848, | |
| "learning_rate": 8.671729696218228e-06, | |
| "loss": 0.5051, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 1.134531928084315, | |
| "grad_norm": 4.3097310066223145, | |
| "learning_rate": 8.65623062616243e-06, | |
| "loss": 0.649, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 1.1360818350898947, | |
| "grad_norm": 8.02452278137207, | |
| "learning_rate": 8.640731556106634e-06, | |
| "loss": 0.583, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 1.1376317420954742, | |
| "grad_norm": 9.19294261932373, | |
| "learning_rate": 8.625232486050837e-06, | |
| "loss": 0.611, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 1.139181649101054, | |
| "grad_norm": 4.483541965484619, | |
| "learning_rate": 8.609733415995041e-06, | |
| "loss": 0.6056, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 1.1407315561066336, | |
| "grad_norm": 7.466578960418701, | |
| "learning_rate": 8.594234345939245e-06, | |
| "loss": 0.5618, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 1.1422814631122133, | |
| "grad_norm": 7.183987617492676, | |
| "learning_rate": 8.578735275883448e-06, | |
| "loss": 0.6829, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 1.143831370117793, | |
| "grad_norm": 10.142129898071289, | |
| "learning_rate": 8.56323620582765e-06, | |
| "loss": 0.5294, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 1.1453812771233727, | |
| "grad_norm": 6.052461624145508, | |
| "learning_rate": 8.547737135771854e-06, | |
| "loss": 0.6231, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 1.1469311841289522, | |
| "grad_norm": 6.29191255569458, | |
| "learning_rate": 8.532238065716057e-06, | |
| "loss": 0.7254, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.148481091134532, | |
| "grad_norm": 3.439829111099243, | |
| "learning_rate": 8.516738995660262e-06, | |
| "loss": 0.475, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 1.1500309981401116, | |
| "grad_norm": 6.537298679351807, | |
| "learning_rate": 8.501239925604465e-06, | |
| "loss": 0.5389, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 1.1515809051456913, | |
| "grad_norm": 6.487965106964111, | |
| "learning_rate": 8.485740855548668e-06, | |
| "loss": 0.7397, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 1.153130812151271, | |
| "grad_norm": 8.069173812866211, | |
| "learning_rate": 8.47024178549287e-06, | |
| "loss": 0.6333, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 1.1546807191568507, | |
| "grad_norm": 3.9428892135620117, | |
| "learning_rate": 8.454742715437074e-06, | |
| "loss": 0.5218, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 1.1562306261624302, | |
| "grad_norm": 4.833446502685547, | |
| "learning_rate": 8.439243645381277e-06, | |
| "loss": 0.6174, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 1.15778053316801, | |
| "grad_norm": 3.746244192123413, | |
| "learning_rate": 8.423744575325482e-06, | |
| "loss": 0.6375, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 1.1593304401735895, | |
| "grad_norm": 11.369379043579102, | |
| "learning_rate": 8.408245505269685e-06, | |
| "loss": 0.5214, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 1.1608803471791693, | |
| "grad_norm": 4.5933146476745605, | |
| "learning_rate": 8.392746435213888e-06, | |
| "loss": 0.5573, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 1.1624302541847489, | |
| "grad_norm": 11.557657241821289, | |
| "learning_rate": 8.377247365158091e-06, | |
| "loss": 0.5721, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.1639801611903287, | |
| "grad_norm": 8.945573806762695, | |
| "learning_rate": 8.361748295102294e-06, | |
| "loss": 0.6176, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 1.1655300681959082, | |
| "grad_norm": 3.769106864929199, | |
| "learning_rate": 8.346249225046497e-06, | |
| "loss": 0.4116, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 1.167079975201488, | |
| "grad_norm": 1.9121352434158325, | |
| "learning_rate": 8.330750154990702e-06, | |
| "loss": 0.4073, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 1.1686298822070675, | |
| "grad_norm": 5.85322380065918, | |
| "learning_rate": 8.315251084934905e-06, | |
| "loss": 0.6172, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 1.1701797892126473, | |
| "grad_norm": 5.601438045501709, | |
| "learning_rate": 8.299752014879108e-06, | |
| "loss": 0.6072, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 1.1717296962182269, | |
| "grad_norm": 10.24169921875, | |
| "learning_rate": 8.284252944823311e-06, | |
| "loss": 0.7041, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 1.1732796032238066, | |
| "grad_norm": 5.549180030822754, | |
| "learning_rate": 8.268753874767514e-06, | |
| "loss": 0.7634, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 1.1748295102293862, | |
| "grad_norm": 9.62266731262207, | |
| "learning_rate": 8.253254804711717e-06, | |
| "loss": 0.6324, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 1.176379417234966, | |
| "grad_norm": 10.882791519165039, | |
| "learning_rate": 8.237755734655922e-06, | |
| "loss": 0.5471, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 1.1779293242405455, | |
| "grad_norm": 4.671418190002441, | |
| "learning_rate": 8.222256664600125e-06, | |
| "loss": 0.5437, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.1794792312461253, | |
| "grad_norm": 4.53378963470459, | |
| "learning_rate": 8.206757594544328e-06, | |
| "loss": 0.4634, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 1.1810291382517049, | |
| "grad_norm": 7.487468719482422, | |
| "learning_rate": 8.191258524488531e-06, | |
| "loss": 0.5741, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 1.1825790452572846, | |
| "grad_norm": 2.953185796737671, | |
| "learning_rate": 8.175759454432734e-06, | |
| "loss": 0.4571, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 1.1841289522628642, | |
| "grad_norm": 4.020585536956787, | |
| "learning_rate": 8.160260384376937e-06, | |
| "loss": 0.5718, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 1.185678859268444, | |
| "grad_norm": 15.922945976257324, | |
| "learning_rate": 8.144761314321142e-06, | |
| "loss": 0.5363, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 1.1872287662740235, | |
| "grad_norm": 12.803021430969238, | |
| "learning_rate": 8.129262244265345e-06, | |
| "loss": 0.5454, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 1.1887786732796033, | |
| "grad_norm": 7.86555814743042, | |
| "learning_rate": 8.113763174209548e-06, | |
| "loss": 0.6119, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 1.1903285802851828, | |
| "grad_norm": 0.6643087267875671, | |
| "learning_rate": 8.098264104153751e-06, | |
| "loss": 0.5261, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 1.1918784872907626, | |
| "grad_norm": 6.745521545410156, | |
| "learning_rate": 8.082765034097954e-06, | |
| "loss": 0.55, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 1.1934283942963422, | |
| "grad_norm": 9.781978607177734, | |
| "learning_rate": 8.067265964042157e-06, | |
| "loss": 0.838, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.194978301301922, | |
| "grad_norm": 8.02038288116455, | |
| "learning_rate": 8.051766893986362e-06, | |
| "loss": 0.6207, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 1.1965282083075015, | |
| "grad_norm": 12.820682525634766, | |
| "learning_rate": 8.036267823930565e-06, | |
| "loss": 0.7424, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 1.1980781153130813, | |
| "grad_norm": 4.991171836853027, | |
| "learning_rate": 8.020768753874768e-06, | |
| "loss": 0.5592, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 1.1996280223186608, | |
| "grad_norm": 4.925070762634277, | |
| "learning_rate": 8.005269683818971e-06, | |
| "loss": 0.5062, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 1.2011779293242406, | |
| "grad_norm": 7.093862533569336, | |
| "learning_rate": 7.989770613763174e-06, | |
| "loss": 0.5812, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 1.2027278363298202, | |
| "grad_norm": 4.071722507476807, | |
| "learning_rate": 7.974271543707377e-06, | |
| "loss": 0.5521, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 1.2042777433354, | |
| "grad_norm": 3.5116024017333984, | |
| "learning_rate": 7.958772473651582e-06, | |
| "loss": 0.7707, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 1.2058276503409795, | |
| "grad_norm": 2.9257960319519043, | |
| "learning_rate": 7.943273403595785e-06, | |
| "loss": 0.5412, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 1.2073775573465593, | |
| "grad_norm": 10.328827857971191, | |
| "learning_rate": 7.927774333539988e-06, | |
| "loss": 0.5527, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 1.2089274643521388, | |
| "grad_norm": 5.835555076599121, | |
| "learning_rate": 7.912275263484191e-06, | |
| "loss": 0.5774, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.2104773713577186, | |
| "grad_norm": 16.687910079956055, | |
| "learning_rate": 7.896776193428394e-06, | |
| "loss": 0.7163, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 1.2120272783632982, | |
| "grad_norm": 4.149749755859375, | |
| "learning_rate": 7.881277123372597e-06, | |
| "loss": 0.4435, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 1.213577185368878, | |
| "grad_norm": 2.3621935844421387, | |
| "learning_rate": 7.865778053316802e-06, | |
| "loss": 0.4906, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 1.2151270923744575, | |
| "grad_norm": 3.1414833068847656, | |
| "learning_rate": 7.850278983261005e-06, | |
| "loss": 0.4805, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 1.2166769993800373, | |
| "grad_norm": 9.7128267288208, | |
| "learning_rate": 7.834779913205208e-06, | |
| "loss": 0.664, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 1.2182269063856168, | |
| "grad_norm": 3.530634641647339, | |
| "learning_rate": 7.819280843149411e-06, | |
| "loss": 0.7153, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 1.2197768133911966, | |
| "grad_norm": 7.077465534210205, | |
| "learning_rate": 7.803781773093614e-06, | |
| "loss": 0.5632, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 1.2213267203967761, | |
| "grad_norm": 3.9259166717529297, | |
| "learning_rate": 7.788282703037818e-06, | |
| "loss": 0.6019, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 1.222876627402356, | |
| "grad_norm": 8.732478141784668, | |
| "learning_rate": 7.772783632982022e-06, | |
| "loss": 0.6607, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 1.2244265344079355, | |
| "grad_norm": 9.4745512008667, | |
| "learning_rate": 7.757284562926225e-06, | |
| "loss": 0.5651, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.2259764414135152, | |
| "grad_norm": 3.427568197250366, | |
| "learning_rate": 7.741785492870428e-06, | |
| "loss": 0.5922, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 1.2275263484190948, | |
| "grad_norm": 6.197938919067383, | |
| "learning_rate": 7.726286422814632e-06, | |
| "loss": 0.5734, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 1.2290762554246746, | |
| "grad_norm": 5.879312038421631, | |
| "learning_rate": 7.710787352758835e-06, | |
| "loss": 0.7146, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 1.2306261624302541, | |
| "grad_norm": 4.559432506561279, | |
| "learning_rate": 7.695288282703038e-06, | |
| "loss": 0.4991, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 1.232176069435834, | |
| "grad_norm": 5.727523326873779, | |
| "learning_rate": 7.679789212647242e-06, | |
| "loss": 0.6537, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 1.2337259764414135, | |
| "grad_norm": 7.712017059326172, | |
| "learning_rate": 7.664290142591445e-06, | |
| "loss": 0.6054, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 1.2352758834469932, | |
| "grad_norm": 3.195868968963623, | |
| "learning_rate": 7.648791072535649e-06, | |
| "loss": 0.8091, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 1.2368257904525728, | |
| "grad_norm": 9.342826843261719, | |
| "learning_rate": 7.633292002479852e-06, | |
| "loss": 0.5558, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 1.2383756974581526, | |
| "grad_norm": 4.6453375816345215, | |
| "learning_rate": 7.6177929324240555e-06, | |
| "loss": 0.4514, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 1.2399256044637321, | |
| "grad_norm": 8.93087100982666, | |
| "learning_rate": 7.602293862368259e-06, | |
| "loss": 0.6491, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.241475511469312, | |
| "grad_norm": 3.648250102996826, | |
| "learning_rate": 7.586794792312462e-06, | |
| "loss": 0.6337, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 1.2430254184748915, | |
| "grad_norm": 8.233920097351074, | |
| "learning_rate": 7.5712957222566656e-06, | |
| "loss": 0.5982, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 1.2445753254804712, | |
| "grad_norm": 7.656149864196777, | |
| "learning_rate": 7.555796652200869e-06, | |
| "loss": 0.4163, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 1.2461252324860508, | |
| "grad_norm": 3.028169631958008, | |
| "learning_rate": 7.540297582145072e-06, | |
| "loss": 0.5173, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 1.2476751394916306, | |
| "grad_norm": 10.122218132019043, | |
| "learning_rate": 7.524798512089276e-06, | |
| "loss": 0.389, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 1.2492250464972101, | |
| "grad_norm": 5.4110236167907715, | |
| "learning_rate": 7.509299442033479e-06, | |
| "loss": 0.5307, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 1.2507749535027899, | |
| "grad_norm": 11.099316596984863, | |
| "learning_rate": 7.493800371977682e-06, | |
| "loss": 0.62, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 1.2523248605083694, | |
| "grad_norm": 5.948646545410156, | |
| "learning_rate": 7.478301301921886e-06, | |
| "loss": 0.676, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 1.2538747675139492, | |
| "grad_norm": 13.428414344787598, | |
| "learning_rate": 7.462802231866089e-06, | |
| "loss": 0.5017, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 1.2554246745195288, | |
| "grad_norm": 7.999103546142578, | |
| "learning_rate": 7.447303161810292e-06, | |
| "loss": 0.511, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.2569745815251085, | |
| "grad_norm": 3.18576717376709, | |
| "learning_rate": 7.431804091754496e-06, | |
| "loss": 0.4457, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 1.258524488530688, | |
| "grad_norm": 6.90123176574707, | |
| "learning_rate": 7.416305021698699e-06, | |
| "loss": 0.4758, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 1.2600743955362679, | |
| "grad_norm": 6.826712608337402, | |
| "learning_rate": 7.400805951642902e-06, | |
| "loss": 0.596, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 1.2616243025418474, | |
| "grad_norm": 1.5102648735046387, | |
| "learning_rate": 7.385306881587106e-06, | |
| "loss": 0.5432, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 1.2631742095474272, | |
| "grad_norm": 6.042043209075928, | |
| "learning_rate": 7.369807811531309e-06, | |
| "loss": 0.7783, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 1.2647241165530068, | |
| "grad_norm": 4.189500331878662, | |
| "learning_rate": 7.354308741475512e-06, | |
| "loss": 0.4225, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 1.2662740235585865, | |
| "grad_norm": 3.8514328002929688, | |
| "learning_rate": 7.338809671419716e-06, | |
| "loss": 0.7909, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 1.267823930564166, | |
| "grad_norm": 10.183280944824219, | |
| "learning_rate": 7.323310601363919e-06, | |
| "loss": 0.7079, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 1.2693738375697459, | |
| "grad_norm": 7.852726936340332, | |
| "learning_rate": 7.307811531308122e-06, | |
| "loss": 0.5453, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 1.2709237445753254, | |
| "grad_norm": 6.670209884643555, | |
| "learning_rate": 7.292312461252326e-06, | |
| "loss": 0.534, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.2724736515809052, | |
| "grad_norm": 14.171867370605469, | |
| "learning_rate": 7.276813391196529e-06, | |
| "loss": 0.6302, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 1.2740235585864848, | |
| "grad_norm": 3.3877339363098145, | |
| "learning_rate": 7.261314321140732e-06, | |
| "loss": 0.5812, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 1.2755734655920645, | |
| "grad_norm": 5.41657018661499, | |
| "learning_rate": 7.245815251084936e-06, | |
| "loss": 0.5913, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 1.277123372597644, | |
| "grad_norm": 7.353067874908447, | |
| "learning_rate": 7.230316181029139e-06, | |
| "loss": 0.5484, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 1.2786732796032239, | |
| "grad_norm": 4.998973369598389, | |
| "learning_rate": 7.214817110973342e-06, | |
| "loss": 0.6293, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 1.2802231866088034, | |
| "grad_norm": 7.433141708374023, | |
| "learning_rate": 7.199318040917546e-06, | |
| "loss": 0.5088, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 1.2817730936143832, | |
| "grad_norm": 10.44473934173584, | |
| "learning_rate": 7.183818970861749e-06, | |
| "loss": 0.7561, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 1.2833230006199627, | |
| "grad_norm": 11.532959938049316, | |
| "learning_rate": 7.168319900805952e-06, | |
| "loss": 0.5057, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 1.2848729076255425, | |
| "grad_norm": 0.80668044090271, | |
| "learning_rate": 7.152820830750156e-06, | |
| "loss": 0.5564, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 1.286422814631122, | |
| "grad_norm": 5.980687141418457, | |
| "learning_rate": 7.137321760694359e-06, | |
| "loss": 0.4906, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.2879727216367018, | |
| "grad_norm": 6.244668006896973, | |
| "learning_rate": 7.121822690638562e-06, | |
| "loss": 0.4905, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 1.2895226286422814, | |
| "grad_norm": 6.673967361450195, | |
| "learning_rate": 7.106323620582766e-06, | |
| "loss": 0.5872, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 1.2910725356478612, | |
| "grad_norm": 4.508692264556885, | |
| "learning_rate": 7.090824550526969e-06, | |
| "loss": 0.4922, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 1.2926224426534407, | |
| "grad_norm": 9.3928804397583, | |
| "learning_rate": 7.075325480471172e-06, | |
| "loss": 0.4992, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 1.2941723496590205, | |
| "grad_norm": 11.144902229309082, | |
| "learning_rate": 7.059826410415376e-06, | |
| "loss": 0.5935, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 1.2957222566646, | |
| "grad_norm": 7.52712345123291, | |
| "learning_rate": 7.044327340359579e-06, | |
| "loss": 0.4821, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 1.2972721636701798, | |
| "grad_norm": 5.781824588775635, | |
| "learning_rate": 7.028828270303782e-06, | |
| "loss": 0.4129, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 1.2988220706757594, | |
| "grad_norm": 5.450551509857178, | |
| "learning_rate": 7.013329200247986e-06, | |
| "loss": 0.3729, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 1.3003719776813392, | |
| "grad_norm": 8.195019721984863, | |
| "learning_rate": 6.997830130192189e-06, | |
| "loss": 0.5217, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 1.3019218846869187, | |
| "grad_norm": 3.340574264526367, | |
| "learning_rate": 6.982331060136392e-06, | |
| "loss": 0.4438, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.3034717916924985, | |
| "grad_norm": 8.651931762695312, | |
| "learning_rate": 6.966831990080596e-06, | |
| "loss": 0.4275, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 1.305021698698078, | |
| "grad_norm": 7.959225654602051, | |
| "learning_rate": 6.951332920024799e-06, | |
| "loss": 0.6702, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 1.3065716057036578, | |
| "grad_norm": 2.9623653888702393, | |
| "learning_rate": 6.935833849969002e-06, | |
| "loss": 0.596, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 1.3081215127092374, | |
| "grad_norm": 4.792098522186279, | |
| "learning_rate": 6.920334779913206e-06, | |
| "loss": 0.5806, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 1.3096714197148172, | |
| "grad_norm": 6.721735000610352, | |
| "learning_rate": 6.904835709857409e-06, | |
| "loss": 0.452, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 1.3112213267203967, | |
| "grad_norm": 4.370045185089111, | |
| "learning_rate": 6.889336639801612e-06, | |
| "loss": 0.5525, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 1.3127712337259765, | |
| "grad_norm": 8.37392520904541, | |
| "learning_rate": 6.873837569745816e-06, | |
| "loss": 0.3949, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 1.314321140731556, | |
| "grad_norm": 3.662019968032837, | |
| "learning_rate": 6.858338499690019e-06, | |
| "loss": 0.6239, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 1.3158710477371358, | |
| "grad_norm": 8.094034194946289, | |
| "learning_rate": 6.842839429634222e-06, | |
| "loss": 0.6033, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 1.3174209547427154, | |
| "grad_norm": 4.170316696166992, | |
| "learning_rate": 6.827340359578426e-06, | |
| "loss": 0.7513, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.3189708617482951, | |
| "grad_norm": 6.951697826385498, | |
| "learning_rate": 6.811841289522629e-06, | |
| "loss": 0.6167, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 1.3205207687538747, | |
| "grad_norm": 8.492193222045898, | |
| "learning_rate": 6.7963422194668325e-06, | |
| "loss": 0.4893, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 1.3220706757594545, | |
| "grad_norm": 1.626686692237854, | |
| "learning_rate": 6.780843149411036e-06, | |
| "loss": 0.5387, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 1.323620582765034, | |
| "grad_norm": 4.959685802459717, | |
| "learning_rate": 6.7653440793552394e-06, | |
| "loss": 0.6753, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 1.3251704897706138, | |
| "grad_norm": 5.371678352355957, | |
| "learning_rate": 6.7498450092994425e-06, | |
| "loss": 0.5722, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 1.3267203967761934, | |
| "grad_norm": 5.171933650970459, | |
| "learning_rate": 6.734345939243646e-06, | |
| "loss": 0.4267, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 1.3282703037817731, | |
| "grad_norm": 5.8038201332092285, | |
| "learning_rate": 6.7188468691878495e-06, | |
| "loss": 0.4323, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 1.3298202107873527, | |
| "grad_norm": 7.018991470336914, | |
| "learning_rate": 6.7033477991320526e-06, | |
| "loss": 0.5106, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 1.3313701177929325, | |
| "grad_norm": 6.064144611358643, | |
| "learning_rate": 6.6878487290762565e-06, | |
| "loss": 0.4794, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 1.332920024798512, | |
| "grad_norm": 12.810579299926758, | |
| "learning_rate": 6.6723496590204595e-06, | |
| "loss": 0.4787, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.3344699318040918, | |
| "grad_norm": 7.543432235717773, | |
| "learning_rate": 6.656850588964663e-06, | |
| "loss": 0.6821, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 1.3360198388096713, | |
| "grad_norm": 6.412388801574707, | |
| "learning_rate": 6.6413515189088665e-06, | |
| "loss": 0.6768, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 1.3375697458152511, | |
| "grad_norm": 7.301428318023682, | |
| "learning_rate": 6.62585244885307e-06, | |
| "loss": 0.3714, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 1.3391196528208307, | |
| "grad_norm": 5.536515712738037, | |
| "learning_rate": 6.610353378797273e-06, | |
| "loss": 0.6862, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 1.3406695598264105, | |
| "grad_norm": 7.190949440002441, | |
| "learning_rate": 6.5948543087414766e-06, | |
| "loss": 0.6415, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 1.34221946683199, | |
| "grad_norm": 4.371210098266602, | |
| "learning_rate": 6.57935523868568e-06, | |
| "loss": 0.4792, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 1.3437693738375698, | |
| "grad_norm": 3.757610559463501, | |
| "learning_rate": 6.563856168629883e-06, | |
| "loss": 0.6153, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 1.3453192808431493, | |
| "grad_norm": 7.589756488800049, | |
| "learning_rate": 6.548357098574087e-06, | |
| "loss": 0.5786, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 1.346869187848729, | |
| "grad_norm": 3.139828681945801, | |
| "learning_rate": 6.53285802851829e-06, | |
| "loss": 0.8185, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 1.3484190948543087, | |
| "grad_norm": 10.385107040405273, | |
| "learning_rate": 6.517358958462493e-06, | |
| "loss": 0.6463, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.3499690018598884, | |
| "grad_norm": 9.19307804107666, | |
| "learning_rate": 6.501859888406697e-06, | |
| "loss": 0.5454, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 1.351518908865468, | |
| "grad_norm": 7.461082935333252, | |
| "learning_rate": 6.4863608183509e-06, | |
| "loss": 0.5529, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 1.3530688158710478, | |
| "grad_norm": 8.469099998474121, | |
| "learning_rate": 6.470861748295103e-06, | |
| "loss": 0.521, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 1.3546187228766273, | |
| "grad_norm": 5.376317501068115, | |
| "learning_rate": 6.455362678239307e-06, | |
| "loss": 0.4741, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 1.356168629882207, | |
| "grad_norm": 7.034505367279053, | |
| "learning_rate": 6.43986360818351e-06, | |
| "loss": 0.5807, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 1.3577185368877867, | |
| "grad_norm": 6.779096603393555, | |
| "learning_rate": 6.424364538127713e-06, | |
| "loss": 0.6814, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 1.3592684438933664, | |
| "grad_norm": 7.2761077880859375, | |
| "learning_rate": 6.408865468071917e-06, | |
| "loss": 0.6118, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 1.360818350898946, | |
| "grad_norm": 5.111270427703857, | |
| "learning_rate": 6.39336639801612e-06, | |
| "loss": 0.4242, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 1.3623682579045258, | |
| "grad_norm": 7.900925159454346, | |
| "learning_rate": 6.377867327960323e-06, | |
| "loss": 0.6002, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 1.3639181649101053, | |
| "grad_norm": 4.93954610824585, | |
| "learning_rate": 6.362368257904527e-06, | |
| "loss": 0.6001, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.365468071915685, | |
| "grad_norm": 4.872836589813232, | |
| "learning_rate": 6.34686918784873e-06, | |
| "loss": 0.6633, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 1.3670179789212646, | |
| "grad_norm": 6.094171047210693, | |
| "learning_rate": 6.331370117792933e-06, | |
| "loss": 0.485, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 1.3685678859268444, | |
| "grad_norm": 6.762317180633545, | |
| "learning_rate": 6.315871047737137e-06, | |
| "loss": 0.5592, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 1.370117792932424, | |
| "grad_norm": 6.48795747756958, | |
| "learning_rate": 6.30037197768134e-06, | |
| "loss": 0.5515, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 1.3716676999380037, | |
| "grad_norm": 6.643194675445557, | |
| "learning_rate": 6.284872907625543e-06, | |
| "loss": 0.6364, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 1.3732176069435833, | |
| "grad_norm": 10.059488296508789, | |
| "learning_rate": 6.269373837569747e-06, | |
| "loss": 0.6145, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 1.374767513949163, | |
| "grad_norm": 6.285107612609863, | |
| "learning_rate": 6.25387476751395e-06, | |
| "loss": 0.571, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 1.3763174209547429, | |
| "grad_norm": 9.381699562072754, | |
| "learning_rate": 6.238375697458153e-06, | |
| "loss": 0.5779, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 1.3778673279603224, | |
| "grad_norm": 6.602116107940674, | |
| "learning_rate": 6.222876627402357e-06, | |
| "loss": 0.5559, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 1.379417234965902, | |
| "grad_norm": 5.42208194732666, | |
| "learning_rate": 6.20737755734656e-06, | |
| "loss": 0.5229, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 1.3809671419714817, | |
| "grad_norm": 5.746293067932129, | |
| "learning_rate": 6.191878487290763e-06, | |
| "loss": 0.4386, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 1.3825170489770615, | |
| "grad_norm": 8.15822982788086, | |
| "learning_rate": 6.176379417234967e-06, | |
| "loss": 0.6447, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 1.384066955982641, | |
| "grad_norm": 7.664370536804199, | |
| "learning_rate": 6.16088034717917e-06, | |
| "loss": 0.5102, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 1.3856168629882206, | |
| "grad_norm": 8.099589347839355, | |
| "learning_rate": 6.145381277123373e-06, | |
| "loss": 0.4791, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 1.3871667699938004, | |
| "grad_norm": 9.306650161743164, | |
| "learning_rate": 6.129882207067577e-06, | |
| "loss": 0.5595, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 1.3887166769993802, | |
| "grad_norm": 8.718387603759766, | |
| "learning_rate": 6.11438313701178e-06, | |
| "loss": 0.3803, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 1.3902665840049597, | |
| "grad_norm": 3.83396315574646, | |
| "learning_rate": 6.098884066955983e-06, | |
| "loss": 0.3988, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 1.3918164910105393, | |
| "grad_norm": 8.484441757202148, | |
| "learning_rate": 6.083384996900187e-06, | |
| "loss": 0.5799, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 1.393366398016119, | |
| "grad_norm": 5.386404037475586, | |
| "learning_rate": 6.06788592684439e-06, | |
| "loss": 0.3893, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 1.3949163050216988, | |
| "grad_norm": 4.610734939575195, | |
| "learning_rate": 6.052386856788593e-06, | |
| "loss": 0.5469, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.3964662120272784, | |
| "grad_norm": 11.3955717086792, | |
| "learning_rate": 6.036887786732797e-06, | |
| "loss": 0.4739, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 1.398016119032858, | |
| "grad_norm": 9.982507705688477, | |
| "learning_rate": 6.021388716677e-06, | |
| "loss": 0.5295, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 1.3995660260384377, | |
| "grad_norm": 6.788435459136963, | |
| "learning_rate": 6.005889646621203e-06, | |
| "loss": 0.5864, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 1.4011159330440175, | |
| "grad_norm": 8.976727485656738, | |
| "learning_rate": 5.990390576565407e-06, | |
| "loss": 0.6665, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 1.402665840049597, | |
| "grad_norm": 4.205535888671875, | |
| "learning_rate": 5.97489150650961e-06, | |
| "loss": 0.3614, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 1.4042157470551766, | |
| "grad_norm": 6.974489212036133, | |
| "learning_rate": 5.959392436453813e-06, | |
| "loss": 0.5822, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 1.4057656540607564, | |
| "grad_norm": 3.7475290298461914, | |
| "learning_rate": 5.943893366398017e-06, | |
| "loss": 0.2944, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 1.4073155610663362, | |
| "grad_norm": 6.615023612976074, | |
| "learning_rate": 5.92839429634222e-06, | |
| "loss": 0.6612, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 1.4088654680719157, | |
| "grad_norm": 3.359515905380249, | |
| "learning_rate": 5.912895226286423e-06, | |
| "loss": 0.6356, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 1.4104153750774953, | |
| "grad_norm": 3.1997714042663574, | |
| "learning_rate": 5.897396156230627e-06, | |
| "loss": 0.6412, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 1.411965282083075, | |
| "grad_norm": 8.04691219329834, | |
| "learning_rate": 5.88189708617483e-06, | |
| "loss": 0.4778, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 1.4135151890886548, | |
| "grad_norm": 5.734005451202393, | |
| "learning_rate": 5.866398016119033e-06, | |
| "loss": 0.5637, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 1.4150650960942344, | |
| "grad_norm": 8.435890197753906, | |
| "learning_rate": 5.850898946063237e-06, | |
| "loss": 0.6423, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 1.416615003099814, | |
| "grad_norm": 9.176706314086914, | |
| "learning_rate": 5.83539987600744e-06, | |
| "loss": 0.4785, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 1.4181649101053937, | |
| "grad_norm": 5.92742395401001, | |
| "learning_rate": 5.8199008059516434e-06, | |
| "loss": 0.5509, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 1.4197148171109735, | |
| "grad_norm": 5.5970869064331055, | |
| "learning_rate": 5.804401735895847e-06, | |
| "loss": 0.7684, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 1.421264724116553, | |
| "grad_norm": 7.216648578643799, | |
| "learning_rate": 5.7889026658400504e-06, | |
| "loss": 0.4177, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 1.4228146311221326, | |
| "grad_norm": 4.122847557067871, | |
| "learning_rate": 5.7734035957842535e-06, | |
| "loss": 0.519, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 1.4243645381277124, | |
| "grad_norm": 6.326296329498291, | |
| "learning_rate": 5.757904525728457e-06, | |
| "loss": 0.5741, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 1.4259144451332921, | |
| "grad_norm": 3.2332475185394287, | |
| "learning_rate": 5.7424054556726605e-06, | |
| "loss": 0.5284, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 1.4274643521388717, | |
| "grad_norm": 3.224771499633789, | |
| "learning_rate": 5.7269063856168635e-06, | |
| "loss": 0.5077, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 1.4290142591444512, | |
| "grad_norm": 3.3739428520202637, | |
| "learning_rate": 5.7114073155610675e-06, | |
| "loss": 0.4111, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 1.430564166150031, | |
| "grad_norm": 5.780101776123047, | |
| "learning_rate": 5.6959082455052705e-06, | |
| "loss": 0.4889, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 1.4321140731556108, | |
| "grad_norm": 10.54281234741211, | |
| "learning_rate": 5.680409175449474e-06, | |
| "loss": 0.5395, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 1.4336639801611903, | |
| "grad_norm": 4.650584697723389, | |
| "learning_rate": 5.6649101053936775e-06, | |
| "loss": 0.5086, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 1.43521388716677, | |
| "grad_norm": 3.0759034156799316, | |
| "learning_rate": 5.6494110353378806e-06, | |
| "loss": 0.7147, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 1.4367637941723497, | |
| "grad_norm": 5.136111259460449, | |
| "learning_rate": 5.633911965282084e-06, | |
| "loss": 0.6591, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 1.4383137011779294, | |
| "grad_norm": 9.995561599731445, | |
| "learning_rate": 5.6184128952262875e-06, | |
| "loss": 0.5049, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 1.439863608183509, | |
| "grad_norm": 5.644338130950928, | |
| "learning_rate": 5.602913825170491e-06, | |
| "loss": 0.4928, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 1.4414135151890886, | |
| "grad_norm": 13.791778564453125, | |
| "learning_rate": 5.587414755114694e-06, | |
| "loss": 0.6969, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 1.4429634221946683, | |
| "grad_norm": 3.6634018421173096, | |
| "learning_rate": 5.571915685058898e-06, | |
| "loss": 0.5763, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 1.444513329200248, | |
| "grad_norm": 11.883929252624512, | |
| "learning_rate": 5.556416615003101e-06, | |
| "loss": 0.6579, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 1.4460632362058277, | |
| "grad_norm": 10.741708755493164, | |
| "learning_rate": 5.540917544947304e-06, | |
| "loss": 0.6272, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 1.4476131432114072, | |
| "grad_norm": 4.298150539398193, | |
| "learning_rate": 5.525418474891508e-06, | |
| "loss": 0.5472, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 1.449163050216987, | |
| "grad_norm": 2.4050536155700684, | |
| "learning_rate": 5.509919404835711e-06, | |
| "loss": 0.6278, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 1.4507129572225668, | |
| "grad_norm": 4.59657096862793, | |
| "learning_rate": 5.494420334779914e-06, | |
| "loss": 0.5389, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 1.4522628642281463, | |
| "grad_norm": 2.8281540870666504, | |
| "learning_rate": 5.478921264724118e-06, | |
| "loss": 0.4964, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 1.4538127712337259, | |
| "grad_norm": 8.060725212097168, | |
| "learning_rate": 5.463422194668321e-06, | |
| "loss": 0.5654, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 1.4553626782393057, | |
| "grad_norm": 6.732635021209717, | |
| "learning_rate": 5.447923124612524e-06, | |
| "loss": 0.6084, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 1.4569125852448854, | |
| "grad_norm": 17.33676528930664, | |
| "learning_rate": 5.432424054556728e-06, | |
| "loss": 0.4096, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 1.458462492250465, | |
| "grad_norm": 8.642203330993652, | |
| "learning_rate": 5.416924984500931e-06, | |
| "loss": 0.6075, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 1.4600123992560445, | |
| "grad_norm": 2.123789072036743, | |
| "learning_rate": 5.401425914445134e-06, | |
| "loss": 0.3668, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 1.4615623062616243, | |
| "grad_norm": 7.111140251159668, | |
| "learning_rate": 5.385926844389338e-06, | |
| "loss": 0.5203, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 1.463112213267204, | |
| "grad_norm": 8.846921920776367, | |
| "learning_rate": 5.370427774333541e-06, | |
| "loss": 0.428, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 1.4646621202727836, | |
| "grad_norm": 4.978935718536377, | |
| "learning_rate": 5.354928704277744e-06, | |
| "loss": 0.5781, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 1.4662120272783632, | |
| "grad_norm": 14.11387825012207, | |
| "learning_rate": 5.339429634221948e-06, | |
| "loss": 0.6948, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 1.467761934283943, | |
| "grad_norm": 3.0608415603637695, | |
| "learning_rate": 5.323930564166151e-06, | |
| "loss": 0.535, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 1.4693118412895227, | |
| "grad_norm": 7.584042072296143, | |
| "learning_rate": 5.308431494110354e-06, | |
| "loss": 0.4748, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 1.4708617482951023, | |
| "grad_norm": 6.784172534942627, | |
| "learning_rate": 5.292932424054558e-06, | |
| "loss": 0.7326, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 1.4724116553006819, | |
| "grad_norm": 9.559980392456055, | |
| "learning_rate": 5.277433353998761e-06, | |
| "loss": 0.4411, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.4739615623062616, | |
| "grad_norm": 6.2408127784729, | |
| "learning_rate": 5.261934283942964e-06, | |
| "loss": 0.5753, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 1.4755114693118414, | |
| "grad_norm": 4.772825241088867, | |
| "learning_rate": 5.246435213887168e-06, | |
| "loss": 0.5637, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 1.477061376317421, | |
| "grad_norm": 4.578171730041504, | |
| "learning_rate": 5.230936143831371e-06, | |
| "loss": 0.5267, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 1.4786112833230005, | |
| "grad_norm": 3.323122024536133, | |
| "learning_rate": 5.215437073775574e-06, | |
| "loss": 0.6315, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 1.4801611903285803, | |
| "grad_norm": 4.294838905334473, | |
| "learning_rate": 5.199938003719778e-06, | |
| "loss": 0.6561, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 1.48171109733416, | |
| "grad_norm": 5.863995552062988, | |
| "learning_rate": 5.184438933663981e-06, | |
| "loss": 0.437, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 1.4832610043397396, | |
| "grad_norm": 8.996912002563477, | |
| "learning_rate": 5.168939863608184e-06, | |
| "loss": 0.5727, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 1.4848109113453192, | |
| "grad_norm": 6.797782897949219, | |
| "learning_rate": 5.153440793552388e-06, | |
| "loss": 0.616, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 1.486360818350899, | |
| "grad_norm": 3.5306284427642822, | |
| "learning_rate": 5.137941723496591e-06, | |
| "loss": 0.5054, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 1.4879107253564787, | |
| "grad_norm": 0.761473536491394, | |
| "learning_rate": 5.122442653440794e-06, | |
| "loss": 0.5933, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.4894606323620583, | |
| "grad_norm": 2.775047779083252, | |
| "learning_rate": 5.106943583384998e-06, | |
| "loss": 0.5478, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 1.4910105393676378, | |
| "grad_norm": 5.2198028564453125, | |
| "learning_rate": 5.091444513329201e-06, | |
| "loss": 0.6019, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 1.4925604463732176, | |
| "grad_norm": 5.083917140960693, | |
| "learning_rate": 5.075945443273404e-06, | |
| "loss": 0.3794, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 1.4941103533787974, | |
| "grad_norm": 8.67032527923584, | |
| "learning_rate": 5.060446373217608e-06, | |
| "loss": 0.6841, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 1.495660260384377, | |
| "grad_norm": 4.733889102935791, | |
| "learning_rate": 5.044947303161811e-06, | |
| "loss": 0.6018, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 1.4972101673899565, | |
| "grad_norm": 9.206461906433105, | |
| "learning_rate": 5.029448233106014e-06, | |
| "loss": 0.5624, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 1.4987600743955363, | |
| "grad_norm": 4.749617099761963, | |
| "learning_rate": 5.013949163050218e-06, | |
| "loss": 0.5591, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 1.500309981401116, | |
| "grad_norm": 7.325601100921631, | |
| "learning_rate": 4.99845009299442e-06, | |
| "loss": 0.5467, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 1.5018598884066956, | |
| "grad_norm": 7.182321071624756, | |
| "learning_rate": 4.982951022938624e-06, | |
| "loss": 0.769, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 1.5034097954122752, | |
| "grad_norm": 7.016432285308838, | |
| "learning_rate": 4.967451952882827e-06, | |
| "loss": 0.7083, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 1.504959702417855, | |
| "grad_norm": 1.7481181621551514, | |
| "learning_rate": 4.9519528828270304e-06, | |
| "loss": 0.4016, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 1.5065096094234347, | |
| "grad_norm": 6.876219272613525, | |
| "learning_rate": 4.936453812771234e-06, | |
| "loss": 0.3005, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 1.5080595164290143, | |
| "grad_norm": 17.568605422973633, | |
| "learning_rate": 4.920954742715437e-06, | |
| "loss": 0.483, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 1.5096094234345938, | |
| "grad_norm": 7.156910419464111, | |
| "learning_rate": 4.9054556726596405e-06, | |
| "loss": 0.5408, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 1.5111593304401736, | |
| "grad_norm": 2.5366811752319336, | |
| "learning_rate": 4.889956602603844e-06, | |
| "loss": 0.7988, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 1.5127092374457534, | |
| "grad_norm": 10.356922149658203, | |
| "learning_rate": 4.8744575325480475e-06, | |
| "loss": 0.6228, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 1.514259144451333, | |
| "grad_norm": 11.088748931884766, | |
| "learning_rate": 4.8589584624922505e-06, | |
| "loss": 0.639, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 1.5158090514569125, | |
| "grad_norm": 10.523143768310547, | |
| "learning_rate": 4.8434593924364544e-06, | |
| "loss": 0.8511, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 1.5173589584624922, | |
| "grad_norm": 6.977451324462891, | |
| "learning_rate": 4.8279603223806575e-06, | |
| "loss": 0.5901, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 1.518908865468072, | |
| "grad_norm": 2.387295961380005, | |
| "learning_rate": 4.8124612523248606e-06, | |
| "loss": 0.5298, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 1.5204587724736516, | |
| "grad_norm": 11.128771781921387, | |
| "learning_rate": 4.7969621822690645e-06, | |
| "loss": 0.4541, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 1.5220086794792311, | |
| "grad_norm": 11.29007625579834, | |
| "learning_rate": 4.7814631122132675e-06, | |
| "loss": 0.564, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 1.523558586484811, | |
| "grad_norm": 3.435899019241333, | |
| "learning_rate": 4.765964042157471e-06, | |
| "loss": 0.5353, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 1.5251084934903907, | |
| "grad_norm": 6.636521816253662, | |
| "learning_rate": 4.7504649721016745e-06, | |
| "loss": 0.6093, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 1.5266584004959702, | |
| "grad_norm": 7.338555335998535, | |
| "learning_rate": 4.734965902045878e-06, | |
| "loss": 0.4354, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 1.5282083075015498, | |
| "grad_norm": 12.102242469787598, | |
| "learning_rate": 4.719466831990081e-06, | |
| "loss": 0.6617, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 1.5297582145071296, | |
| "grad_norm": 6.004841327667236, | |
| "learning_rate": 4.7039677619342846e-06, | |
| "loss": 0.6266, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 1.5313081215127093, | |
| "grad_norm": 9.303327560424805, | |
| "learning_rate": 4.688468691878488e-06, | |
| "loss": 0.5878, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 1.532858028518289, | |
| "grad_norm": 5.710174083709717, | |
| "learning_rate": 4.672969621822691e-06, | |
| "loss": 0.4985, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 1.5344079355238684, | |
| "grad_norm": 6.517455101013184, | |
| "learning_rate": 4.657470551766895e-06, | |
| "loss": 0.5533, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 1.5359578425294482, | |
| "grad_norm": 8.90954875946045, | |
| "learning_rate": 4.641971481711098e-06, | |
| "loss": 0.7229, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 1.537507749535028, | |
| "grad_norm": 5.908111572265625, | |
| "learning_rate": 4.626472411655301e-06, | |
| "loss": 0.5185, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 1.5390576565406076, | |
| "grad_norm": 10.088567733764648, | |
| "learning_rate": 4.610973341599505e-06, | |
| "loss": 0.5258, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 1.540607563546187, | |
| "grad_norm": 6.859228610992432, | |
| "learning_rate": 4.595474271543708e-06, | |
| "loss": 0.4885, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 1.5421574705517669, | |
| "grad_norm": 7.489620208740234, | |
| "learning_rate": 4.579975201487911e-06, | |
| "loss": 0.453, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 1.5437073775573467, | |
| "grad_norm": 8.25145435333252, | |
| "learning_rate": 4.564476131432115e-06, | |
| "loss": 0.5938, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 1.5452572845629262, | |
| "grad_norm": 5.98444938659668, | |
| "learning_rate": 4.548977061376318e-06, | |
| "loss": 0.5637, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 1.5468071915685058, | |
| "grad_norm": 4.948822498321533, | |
| "learning_rate": 4.533477991320521e-06, | |
| "loss": 0.4356, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 1.5483570985740855, | |
| "grad_norm": 4.464200973510742, | |
| "learning_rate": 4.517978921264725e-06, | |
| "loss": 0.5302, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 1.5499070055796653, | |
| "grad_norm": 9.048888206481934, | |
| "learning_rate": 4.502479851208928e-06, | |
| "loss": 0.5684, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.5514569125852449, | |
| "grad_norm": 4.196654319763184, | |
| "learning_rate": 4.486980781153131e-06, | |
| "loss": 0.5561, | |
| "step": 10010 | |
| }, | |
| { | |
| "epoch": 1.5530068195908244, | |
| "grad_norm": 5.6792144775390625, | |
| "learning_rate": 4.471481711097335e-06, | |
| "loss": 0.6369, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 1.5545567265964042, | |
| "grad_norm": 4.345642566680908, | |
| "learning_rate": 4.455982641041538e-06, | |
| "loss": 0.4938, | |
| "step": 10030 | |
| }, | |
| { | |
| "epoch": 1.556106633601984, | |
| "grad_norm": 5.05332612991333, | |
| "learning_rate": 4.440483570985741e-06, | |
| "loss": 0.4505, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 1.5576565406075635, | |
| "grad_norm": 7.0360870361328125, | |
| "learning_rate": 4.424984500929945e-06, | |
| "loss": 0.4599, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 1.559206447613143, | |
| "grad_norm": 11.911025047302246, | |
| "learning_rate": 4.409485430874148e-06, | |
| "loss": 0.5653, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 1.5607563546187229, | |
| "grad_norm": 9.599674224853516, | |
| "learning_rate": 4.393986360818351e-06, | |
| "loss": 0.5005, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 1.5623062616243026, | |
| "grad_norm": 6.970069885253906, | |
| "learning_rate": 4.378487290762555e-06, | |
| "loss": 0.7005, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 1.5638561686298822, | |
| "grad_norm": 5.077493667602539, | |
| "learning_rate": 4.362988220706758e-06, | |
| "loss": 0.5908, | |
| "step": 10090 | |
| }, | |
| { | |
| "epoch": 1.5654060756354617, | |
| "grad_norm": 10.840348243713379, | |
| "learning_rate": 4.347489150650961e-06, | |
| "loss": 0.394, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 1.5669559826410415, | |
| "grad_norm": 4.2803120613098145, | |
| "learning_rate": 4.331990080595165e-06, | |
| "loss": 0.7187, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 1.5685058896466213, | |
| "grad_norm": 5.350848197937012, | |
| "learning_rate": 4.316491010539368e-06, | |
| "loss": 0.622, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 1.5700557966522009, | |
| "grad_norm": 4.250338554382324, | |
| "learning_rate": 4.300991940483571e-06, | |
| "loss": 0.4338, | |
| "step": 10130 | |
| }, | |
| { | |
| "epoch": 1.5716057036577804, | |
| "grad_norm": 10.858535766601562, | |
| "learning_rate": 4.285492870427775e-06, | |
| "loss": 0.7283, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 1.5731556106633602, | |
| "grad_norm": 6.90775728225708, | |
| "learning_rate": 4.269993800371978e-06, | |
| "loss": 0.4537, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 1.57470551766894, | |
| "grad_norm": 3.502438545227051, | |
| "learning_rate": 4.254494730316181e-06, | |
| "loss": 0.4618, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 1.5762554246745195, | |
| "grad_norm": 7.183608531951904, | |
| "learning_rate": 4.238995660260385e-06, | |
| "loss": 0.5586, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 1.577805331680099, | |
| "grad_norm": 3.889638662338257, | |
| "learning_rate": 4.223496590204588e-06, | |
| "loss": 0.4065, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 1.5793552386856788, | |
| "grad_norm": 6.343893527984619, | |
| "learning_rate": 4.207997520148791e-06, | |
| "loss": 0.4437, | |
| "step": 10190 | |
| }, | |
| { | |
| "epoch": 1.5809051456912586, | |
| "grad_norm": 18.638702392578125, | |
| "learning_rate": 4.192498450092995e-06, | |
| "loss": 0.6633, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 1.5824550526968382, | |
| "grad_norm": 11.82913875579834, | |
| "learning_rate": 4.176999380037198e-06, | |
| "loss": 0.8167, | |
| "step": 10210 | |
| }, | |
| { | |
| "epoch": 1.5840049597024177, | |
| "grad_norm": 4.050469875335693, | |
| "learning_rate": 4.161500309981401e-06, | |
| "loss": 0.6837, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 1.5855548667079975, | |
| "grad_norm": 13.781719207763672, | |
| "learning_rate": 4.146001239925605e-06, | |
| "loss": 0.6684, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 1.5871047737135773, | |
| "grad_norm": 3.5966341495513916, | |
| "learning_rate": 4.130502169869808e-06, | |
| "loss": 0.5273, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 1.5886546807191568, | |
| "grad_norm": 6.0468926429748535, | |
| "learning_rate": 4.115003099814012e-06, | |
| "loss": 0.4655, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 1.5902045877247364, | |
| "grad_norm": 9.262929916381836, | |
| "learning_rate": 4.099504029758215e-06, | |
| "loss": 0.5839, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 1.5917544947303162, | |
| "grad_norm": 8.276415824890137, | |
| "learning_rate": 4.084004959702418e-06, | |
| "loss": 0.5158, | |
| "step": 10270 | |
| }, | |
| { | |
| "epoch": 1.593304401735896, | |
| "grad_norm": 7.415068626403809, | |
| "learning_rate": 4.068505889646622e-06, | |
| "loss": 0.4451, | |
| "step": 10280 | |
| }, | |
| { | |
| "epoch": 1.5948543087414755, | |
| "grad_norm": 6.222273826599121, | |
| "learning_rate": 4.053006819590825e-06, | |
| "loss": 0.5222, | |
| "step": 10290 | |
| }, | |
| { | |
| "epoch": 1.596404215747055, | |
| "grad_norm": 7.769200801849365, | |
| "learning_rate": 4.037507749535028e-06, | |
| "loss": 0.585, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 1.5979541227526348, | |
| "grad_norm": 8.023396492004395, | |
| "learning_rate": 4.022008679479232e-06, | |
| "loss": 0.4002, | |
| "step": 10310 | |
| }, | |
| { | |
| "epoch": 1.5995040297582146, | |
| "grad_norm": 10.689138412475586, | |
| "learning_rate": 4.006509609423435e-06, | |
| "loss": 0.7155, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 1.6010539367637941, | |
| "grad_norm": 7.788012504577637, | |
| "learning_rate": 3.991010539367638e-06, | |
| "loss": 0.7031, | |
| "step": 10330 | |
| }, | |
| { | |
| "epoch": 1.6026038437693737, | |
| "grad_norm": 6.007388114929199, | |
| "learning_rate": 3.975511469311842e-06, | |
| "loss": 0.4373, | |
| "step": 10340 | |
| }, | |
| { | |
| "epoch": 1.6041537507749535, | |
| "grad_norm": 10.727594375610352, | |
| "learning_rate": 3.960012399256045e-06, | |
| "loss": 0.6134, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 1.6057036577805333, | |
| "grad_norm": 4.113864898681641, | |
| "learning_rate": 3.944513329200248e-06, | |
| "loss": 0.5774, | |
| "step": 10360 | |
| }, | |
| { | |
| "epoch": 1.6072535647861128, | |
| "grad_norm": 6.425714015960693, | |
| "learning_rate": 3.929014259144452e-06, | |
| "loss": 0.6391, | |
| "step": 10370 | |
| }, | |
| { | |
| "epoch": 1.6088034717916924, | |
| "grad_norm": 4.605624675750732, | |
| "learning_rate": 3.913515189088655e-06, | |
| "loss": 0.5495, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 1.6103533787972721, | |
| "grad_norm": 3.2867257595062256, | |
| "learning_rate": 3.8980161190328584e-06, | |
| "loss": 0.5751, | |
| "step": 10390 | |
| }, | |
| { | |
| "epoch": 1.611903285802852, | |
| "grad_norm": 5.345250129699707, | |
| "learning_rate": 3.882517048977062e-06, | |
| "loss": 0.5068, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 1.6134531928084315, | |
| "grad_norm": 3.655217409133911, | |
| "learning_rate": 3.867017978921265e-06, | |
| "loss": 0.679, | |
| "step": 10410 | |
| }, | |
| { | |
| "epoch": 1.615003099814011, | |
| "grad_norm": 8.845053672790527, | |
| "learning_rate": 3.8515189088654685e-06, | |
| "loss": 0.5818, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 1.6165530068195908, | |
| "grad_norm": 10.273194313049316, | |
| "learning_rate": 3.836019838809672e-06, | |
| "loss": 0.6227, | |
| "step": 10430 | |
| }, | |
| { | |
| "epoch": 1.6181029138251706, | |
| "grad_norm": 2.203364849090576, | |
| "learning_rate": 3.8205207687538755e-06, | |
| "loss": 0.4314, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 1.6196528208307501, | |
| "grad_norm": 5.975243091583252, | |
| "learning_rate": 3.8050216986980785e-06, | |
| "loss": 0.3087, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 1.6212027278363297, | |
| "grad_norm": 4.337436676025391, | |
| "learning_rate": 3.789522628642282e-06, | |
| "loss": 0.6271, | |
| "step": 10460 | |
| }, | |
| { | |
| "epoch": 1.6227526348419095, | |
| "grad_norm": 6.358315467834473, | |
| "learning_rate": 3.7740235585864855e-06, | |
| "loss": 0.6318, | |
| "step": 10470 | |
| }, | |
| { | |
| "epoch": 1.6243025418474892, | |
| "grad_norm": 12.044097900390625, | |
| "learning_rate": 3.7585244885306886e-06, | |
| "loss": 0.3999, | |
| "step": 10480 | |
| }, | |
| { | |
| "epoch": 1.6258524488530688, | |
| "grad_norm": 8.828185081481934, | |
| "learning_rate": 3.7430254184748916e-06, | |
| "loss": 0.5629, | |
| "step": 10490 | |
| }, | |
| { | |
| "epoch": 1.6274023558586483, | |
| "grad_norm": 6.134864330291748, | |
| "learning_rate": 3.7275263484190947e-06, | |
| "loss": 0.5611, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.6289522628642281, | |
| "grad_norm": 11.847331047058105, | |
| "learning_rate": 3.712027278363298e-06, | |
| "loss": 0.7173, | |
| "step": 10510 | |
| }, | |
| { | |
| "epoch": 1.630502169869808, | |
| "grad_norm": 4.5125226974487305, | |
| "learning_rate": 3.6965282083075017e-06, | |
| "loss": 0.3994, | |
| "step": 10520 | |
| }, | |
| { | |
| "epoch": 1.6320520768753874, | |
| "grad_norm": 12.239794731140137, | |
| "learning_rate": 3.6810291382517048e-06, | |
| "loss": 0.5533, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 1.633601983880967, | |
| "grad_norm": 3.501194953918457, | |
| "learning_rate": 3.6655300681959082e-06, | |
| "loss": 0.6103, | |
| "step": 10540 | |
| }, | |
| { | |
| "epoch": 1.6351518908865468, | |
| "grad_norm": 9.775247573852539, | |
| "learning_rate": 3.6500309981401117e-06, | |
| "loss": 0.8142, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 1.6367017978921266, | |
| "grad_norm": 6.553702354431152, | |
| "learning_rate": 3.634531928084315e-06, | |
| "loss": 0.5786, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 1.638251704897706, | |
| "grad_norm": 3.682405471801758, | |
| "learning_rate": 3.6190328580285183e-06, | |
| "loss": 0.4615, | |
| "step": 10570 | |
| }, | |
| { | |
| "epoch": 1.6398016119032857, | |
| "grad_norm": 3.184781789779663, | |
| "learning_rate": 3.6035337879727218e-06, | |
| "loss": 0.4539, | |
| "step": 10580 | |
| }, | |
| { | |
| "epoch": 1.6413515189088654, | |
| "grad_norm": 3.740588426589966, | |
| "learning_rate": 3.588034717916925e-06, | |
| "loss": 0.493, | |
| "step": 10590 | |
| }, | |
| { | |
| "epoch": 1.6429014259144452, | |
| "grad_norm": 8.489069938659668, | |
| "learning_rate": 3.5725356478611283e-06, | |
| "loss": 0.4983, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 1.6444513329200248, | |
| "grad_norm": 6.311090469360352, | |
| "learning_rate": 3.557036577805332e-06, | |
| "loss": 0.4644, | |
| "step": 10610 | |
| }, | |
| { | |
| "epoch": 1.6460012399256043, | |
| "grad_norm": 14.392030715942383, | |
| "learning_rate": 3.541537507749535e-06, | |
| "loss": 0.503, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 1.647551146931184, | |
| "grad_norm": 5.263167858123779, | |
| "learning_rate": 3.5260384376937384e-06, | |
| "loss": 0.4371, | |
| "step": 10630 | |
| }, | |
| { | |
| "epoch": 1.6491010539367639, | |
| "grad_norm": 5.845542907714844, | |
| "learning_rate": 3.510539367637942e-06, | |
| "loss": 0.571, | |
| "step": 10640 | |
| }, | |
| { | |
| "epoch": 1.6506509609423434, | |
| "grad_norm": 4.867492198944092, | |
| "learning_rate": 3.495040297582145e-06, | |
| "loss": 0.5561, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 1.652200867947923, | |
| "grad_norm": 4.042713165283203, | |
| "learning_rate": 3.4795412275263484e-06, | |
| "loss": 0.3367, | |
| "step": 10660 | |
| }, | |
| { | |
| "epoch": 1.6537507749535028, | |
| "grad_norm": 7.001228332519531, | |
| "learning_rate": 3.464042157470552e-06, | |
| "loss": 0.4961, | |
| "step": 10670 | |
| }, | |
| { | |
| "epoch": 1.6553006819590825, | |
| "grad_norm": 5.529093265533447, | |
| "learning_rate": 3.448543087414755e-06, | |
| "loss": 0.536, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 1.656850588964662, | |
| "grad_norm": 1.5025368928909302, | |
| "learning_rate": 3.4330440173589585e-06, | |
| "loss": 0.537, | |
| "step": 10690 | |
| }, | |
| { | |
| "epoch": 1.6584004959702416, | |
| "grad_norm": 10.480669975280762, | |
| "learning_rate": 3.417544947303162e-06, | |
| "loss": 0.6538, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 1.6599504029758214, | |
| "grad_norm": 6.886585712432861, | |
| "learning_rate": 3.402045877247365e-06, | |
| "loss": 0.6508, | |
| "step": 10710 | |
| }, | |
| { | |
| "epoch": 1.6615003099814012, | |
| "grad_norm": 7.004822254180908, | |
| "learning_rate": 3.3865468071915685e-06, | |
| "loss": 0.5135, | |
| "step": 10720 | |
| }, | |
| { | |
| "epoch": 1.6630502169869807, | |
| "grad_norm": 6.180261135101318, | |
| "learning_rate": 3.371047737135772e-06, | |
| "loss": 0.4208, | |
| "step": 10730 | |
| }, | |
| { | |
| "epoch": 1.6646001239925603, | |
| "grad_norm": 12.792762756347656, | |
| "learning_rate": 3.355548667079975e-06, | |
| "loss": 0.5253, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 1.66615003099814, | |
| "grad_norm": 8.852483749389648, | |
| "learning_rate": 3.3400495970241786e-06, | |
| "loss": 0.5145, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 1.6676999380037199, | |
| "grad_norm": 7.686612129211426, | |
| "learning_rate": 3.324550526968382e-06, | |
| "loss": 0.4965, | |
| "step": 10760 | |
| }, | |
| { | |
| "epoch": 1.6692498450092994, | |
| "grad_norm": 5.723041534423828, | |
| "learning_rate": 3.309051456912585e-06, | |
| "loss": 0.5359, | |
| "step": 10770 | |
| }, | |
| { | |
| "epoch": 1.670799752014879, | |
| "grad_norm": 11.530805587768555, | |
| "learning_rate": 3.2935523868567886e-06, | |
| "loss": 0.5656, | |
| "step": 10780 | |
| }, | |
| { | |
| "epoch": 1.6723496590204587, | |
| "grad_norm": 8.491327285766602, | |
| "learning_rate": 3.278053316800992e-06, | |
| "loss": 0.5958, | |
| "step": 10790 | |
| }, | |
| { | |
| "epoch": 1.6738995660260385, | |
| "grad_norm": 4.736944198608398, | |
| "learning_rate": 3.262554246745195e-06, | |
| "loss": 0.5544, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 1.675449473031618, | |
| "grad_norm": 11.811408042907715, | |
| "learning_rate": 3.2470551766893987e-06, | |
| "loss": 0.6796, | |
| "step": 10810 | |
| }, | |
| { | |
| "epoch": 1.6769993800371976, | |
| "grad_norm": 6.621671199798584, | |
| "learning_rate": 3.231556106633602e-06, | |
| "loss": 0.664, | |
| "step": 10820 | |
| }, | |
| { | |
| "epoch": 1.6785492870427774, | |
| "grad_norm": 5.999893665313721, | |
| "learning_rate": 3.2160570365778052e-06, | |
| "loss": 0.5491, | |
| "step": 10830 | |
| }, | |
| { | |
| "epoch": 1.6800991940483572, | |
| "grad_norm": 7.547499179840088, | |
| "learning_rate": 3.2005579665220087e-06, | |
| "loss": 0.6801, | |
| "step": 10840 | |
| }, | |
| { | |
| "epoch": 1.6816491010539367, | |
| "grad_norm": 3.695556402206421, | |
| "learning_rate": 3.185058896466212e-06, | |
| "loss": 0.6289, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 1.6831990080595163, | |
| "grad_norm": 5.461249351501465, | |
| "learning_rate": 3.1695598264104153e-06, | |
| "loss": 0.5556, | |
| "step": 10860 | |
| }, | |
| { | |
| "epoch": 1.684748915065096, | |
| "grad_norm": 1.9557623863220215, | |
| "learning_rate": 3.1540607563546188e-06, | |
| "loss": 0.3568, | |
| "step": 10870 | |
| }, | |
| { | |
| "epoch": 1.6862988220706758, | |
| "grad_norm": 10.149141311645508, | |
| "learning_rate": 3.1385616862988223e-06, | |
| "loss": 0.6667, | |
| "step": 10880 | |
| }, | |
| { | |
| "epoch": 1.6878487290762554, | |
| "grad_norm": 4.037074565887451, | |
| "learning_rate": 3.1230626162430257e-06, | |
| "loss": 0.4963, | |
| "step": 10890 | |
| }, | |
| { | |
| "epoch": 1.689398636081835, | |
| "grad_norm": 3.6165964603424072, | |
| "learning_rate": 3.107563546187229e-06, | |
| "loss": 0.2717, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 1.6909485430874147, | |
| "grad_norm": 10.908464431762695, | |
| "learning_rate": 3.0920644761314323e-06, | |
| "loss": 0.5929, | |
| "step": 10910 | |
| }, | |
| { | |
| "epoch": 1.6924984500929945, | |
| "grad_norm": 7.784704208374023, | |
| "learning_rate": 3.076565406075636e-06, | |
| "loss": 0.4904, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 1.694048357098574, | |
| "grad_norm": 3.434318780899048, | |
| "learning_rate": 3.061066336019839e-06, | |
| "loss": 0.4701, | |
| "step": 10930 | |
| }, | |
| { | |
| "epoch": 1.6955982641041536, | |
| "grad_norm": 6.939949989318848, | |
| "learning_rate": 3.0455672659640423e-06, | |
| "loss": 0.5454, | |
| "step": 10940 | |
| }, | |
| { | |
| "epoch": 1.6971481711097334, | |
| "grad_norm": 6.944652080535889, | |
| "learning_rate": 3.030068195908246e-06, | |
| "loss": 0.4567, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 1.6986980781153131, | |
| "grad_norm": 6.0426788330078125, | |
| "learning_rate": 3.014569125852449e-06, | |
| "loss": 0.6604, | |
| "step": 10960 | |
| }, | |
| { | |
| "epoch": 1.7002479851208927, | |
| "grad_norm": 3.7648766040802, | |
| "learning_rate": 2.9990700557966524e-06, | |
| "loss": 0.5468, | |
| "step": 10970 | |
| }, | |
| { | |
| "epoch": 1.7017978921264723, | |
| "grad_norm": 5.345854759216309, | |
| "learning_rate": 2.983570985740856e-06, | |
| "loss": 0.6152, | |
| "step": 10980 | |
| }, | |
| { | |
| "epoch": 1.703347799132052, | |
| "grad_norm": 5.971490383148193, | |
| "learning_rate": 2.968071915685059e-06, | |
| "loss": 0.7541, | |
| "step": 10990 | |
| }, | |
| { | |
| "epoch": 1.7048977061376318, | |
| "grad_norm": 8.548897743225098, | |
| "learning_rate": 2.9525728456292624e-06, | |
| "loss": 0.5634, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.7064476131432114, | |
| "grad_norm": 1.5280627012252808, | |
| "learning_rate": 2.937073775573466e-06, | |
| "loss": 0.4575, | |
| "step": 11010 | |
| }, | |
| { | |
| "epoch": 1.707997520148791, | |
| "grad_norm": 10.257830619812012, | |
| "learning_rate": 2.921574705517669e-06, | |
| "loss": 0.444, | |
| "step": 11020 | |
| }, | |
| { | |
| "epoch": 1.7095474271543707, | |
| "grad_norm": 3.970982313156128, | |
| "learning_rate": 2.9060756354618725e-06, | |
| "loss": 0.4747, | |
| "step": 11030 | |
| }, | |
| { | |
| "epoch": 1.7110973341599505, | |
| "grad_norm": 4.753783702850342, | |
| "learning_rate": 2.890576565406076e-06, | |
| "loss": 0.6571, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 1.71264724116553, | |
| "grad_norm": 8.187065124511719, | |
| "learning_rate": 2.875077495350279e-06, | |
| "loss": 0.5535, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 1.7141971481711096, | |
| "grad_norm": 3.5233545303344727, | |
| "learning_rate": 2.8595784252944825e-06, | |
| "loss": 0.366, | |
| "step": 11060 | |
| }, | |
| { | |
| "epoch": 1.7157470551766894, | |
| "grad_norm": 13.607765197753906, | |
| "learning_rate": 2.844079355238686e-06, | |
| "loss": 0.4742, | |
| "step": 11070 | |
| }, | |
| { | |
| "epoch": 1.7172969621822691, | |
| "grad_norm": 14.473544120788574, | |
| "learning_rate": 2.828580285182889e-06, | |
| "loss": 0.3733, | |
| "step": 11080 | |
| }, | |
| { | |
| "epoch": 1.7188468691878487, | |
| "grad_norm": 3.89443302154541, | |
| "learning_rate": 2.8130812151270926e-06, | |
| "loss": 0.7239, | |
| "step": 11090 | |
| }, | |
| { | |
| "epoch": 1.7203967761934282, | |
| "grad_norm": 13.638595581054688, | |
| "learning_rate": 2.797582145071296e-06, | |
| "loss": 0.6501, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 1.721946683199008, | |
| "grad_norm": 2.860119581222534, | |
| "learning_rate": 2.782083075015499e-06, | |
| "loss": 0.5003, | |
| "step": 11110 | |
| }, | |
| { | |
| "epoch": 1.7234965902045878, | |
| "grad_norm": 5.019274711608887, | |
| "learning_rate": 2.7665840049597026e-06, | |
| "loss": 0.4175, | |
| "step": 11120 | |
| }, | |
| { | |
| "epoch": 1.7250464972101673, | |
| "grad_norm": 10.342511177062988, | |
| "learning_rate": 2.751084934903906e-06, | |
| "loss": 0.6349, | |
| "step": 11130 | |
| }, | |
| { | |
| "epoch": 1.726596404215747, | |
| "grad_norm": 11.173345565795898, | |
| "learning_rate": 2.735585864848109e-06, | |
| "loss": 0.6483, | |
| "step": 11140 | |
| }, | |
| { | |
| "epoch": 1.7281463112213267, | |
| "grad_norm": 8.214896202087402, | |
| "learning_rate": 2.7200867947923127e-06, | |
| "loss": 0.5219, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 1.7296962182269064, | |
| "grad_norm": 5.524158477783203, | |
| "learning_rate": 2.704587724736516e-06, | |
| "loss": 0.5078, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 1.731246125232486, | |
| "grad_norm": 2.100583791732788, | |
| "learning_rate": 2.6890886546807192e-06, | |
| "loss": 0.7127, | |
| "step": 11170 | |
| }, | |
| { | |
| "epoch": 1.7327960322380656, | |
| "grad_norm": 7.552937030792236, | |
| "learning_rate": 2.6735895846249227e-06, | |
| "loss": 0.3605, | |
| "step": 11180 | |
| }, | |
| { | |
| "epoch": 1.7343459392436453, | |
| "grad_norm": 8.49806022644043, | |
| "learning_rate": 2.6580905145691262e-06, | |
| "loss": 0.4601, | |
| "step": 11190 | |
| }, | |
| { | |
| "epoch": 1.735895846249225, | |
| "grad_norm": 7.082111835479736, | |
| "learning_rate": 2.6425914445133293e-06, | |
| "loss": 0.7208, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 1.7374457532548047, | |
| "grad_norm": 3.600315809249878, | |
| "learning_rate": 2.6270923744575328e-06, | |
| "loss": 0.4493, | |
| "step": 11210 | |
| }, | |
| { | |
| "epoch": 1.7389956602603842, | |
| "grad_norm": 1.5405011177062988, | |
| "learning_rate": 2.6115933044017363e-06, | |
| "loss": 0.4534, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 1.740545567265964, | |
| "grad_norm": 9.279645919799805, | |
| "learning_rate": 2.5960942343459393e-06, | |
| "loss": 0.7194, | |
| "step": 11230 | |
| }, | |
| { | |
| "epoch": 1.7420954742715438, | |
| "grad_norm": 5.783839702606201, | |
| "learning_rate": 2.580595164290143e-06, | |
| "loss": 0.5796, | |
| "step": 11240 | |
| }, | |
| { | |
| "epoch": 1.7436453812771233, | |
| "grad_norm": 9.030817985534668, | |
| "learning_rate": 2.5650960942343463e-06, | |
| "loss": 0.5363, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 1.7451952882827029, | |
| "grad_norm": 5.028315544128418, | |
| "learning_rate": 2.5495970241785494e-06, | |
| "loss": 0.4579, | |
| "step": 11260 | |
| }, | |
| { | |
| "epoch": 1.7467451952882826, | |
| "grad_norm": 3.2063095569610596, | |
| "learning_rate": 2.534097954122753e-06, | |
| "loss": 0.3426, | |
| "step": 11270 | |
| }, | |
| { | |
| "epoch": 1.7482951022938624, | |
| "grad_norm": 8.42487621307373, | |
| "learning_rate": 2.5185988840669564e-06, | |
| "loss": 0.4996, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 1.749845009299442, | |
| "grad_norm": 5.428744792938232, | |
| "learning_rate": 2.5030998140111594e-06, | |
| "loss": 0.5806, | |
| "step": 11290 | |
| }, | |
| { | |
| "epoch": 1.7513949163050218, | |
| "grad_norm": 14.892745971679688, | |
| "learning_rate": 2.487600743955363e-06, | |
| "loss": 0.5464, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 1.7529448233106013, | |
| "grad_norm": 6.309720039367676, | |
| "learning_rate": 2.4721016738995664e-06, | |
| "loss": 0.5558, | |
| "step": 11310 | |
| }, | |
| { | |
| "epoch": 1.754494730316181, | |
| "grad_norm": 4.976221084594727, | |
| "learning_rate": 2.4566026038437695e-06, | |
| "loss": 0.4486, | |
| "step": 11320 | |
| }, | |
| { | |
| "epoch": 1.7560446373217609, | |
| "grad_norm": 5.923755645751953, | |
| "learning_rate": 2.441103533787973e-06, | |
| "loss": 0.4702, | |
| "step": 11330 | |
| }, | |
| { | |
| "epoch": 1.7575945443273404, | |
| "grad_norm": 9.575067520141602, | |
| "learning_rate": 2.4256044637321764e-06, | |
| "loss": 0.5632, | |
| "step": 11340 | |
| }, | |
| { | |
| "epoch": 1.75914445133292, | |
| "grad_norm": 12.65174388885498, | |
| "learning_rate": 2.4101053936763795e-06, | |
| "loss": 0.4884, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 1.7606943583384997, | |
| "grad_norm": 10.048745155334473, | |
| "learning_rate": 2.394606323620583e-06, | |
| "loss": 0.4622, | |
| "step": 11360 | |
| }, | |
| { | |
| "epoch": 1.7622442653440795, | |
| "grad_norm": 10.388143539428711, | |
| "learning_rate": 2.3791072535647865e-06, | |
| "loss": 0.6734, | |
| "step": 11370 | |
| }, | |
| { | |
| "epoch": 1.763794172349659, | |
| "grad_norm": 7.223123073577881, | |
| "learning_rate": 2.3636081835089896e-06, | |
| "loss": 0.5858, | |
| "step": 11380 | |
| }, | |
| { | |
| "epoch": 1.7653440793552386, | |
| "grad_norm": 14.674286842346191, | |
| "learning_rate": 2.348109113453193e-06, | |
| "loss": 0.589, | |
| "step": 11390 | |
| }, | |
| { | |
| "epoch": 1.7668939863608184, | |
| "grad_norm": 10.186848640441895, | |
| "learning_rate": 2.3326100433973965e-06, | |
| "loss": 0.5389, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 1.7684438933663982, | |
| "grad_norm": 2.5273337364196777, | |
| "learning_rate": 2.3171109733415996e-06, | |
| "loss": 0.4664, | |
| "step": 11410 | |
| }, | |
| { | |
| "epoch": 1.7699938003719777, | |
| "grad_norm": 7.200957775115967, | |
| "learning_rate": 2.301611903285803e-06, | |
| "loss": 0.496, | |
| "step": 11420 | |
| }, | |
| { | |
| "epoch": 1.7715437073775573, | |
| "grad_norm": 5.758810997009277, | |
| "learning_rate": 2.2861128332300066e-06, | |
| "loss": 0.5685, | |
| "step": 11430 | |
| }, | |
| { | |
| "epoch": 1.773093614383137, | |
| "grad_norm": 4.028495788574219, | |
| "learning_rate": 2.2706137631742097e-06, | |
| "loss": 0.5332, | |
| "step": 11440 | |
| }, | |
| { | |
| "epoch": 1.7746435213887168, | |
| "grad_norm": 6.298703193664551, | |
| "learning_rate": 2.255114693118413e-06, | |
| "loss": 0.5661, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 1.7761934283942964, | |
| "grad_norm": 6.164602756500244, | |
| "learning_rate": 2.2396156230626166e-06, | |
| "loss": 0.519, | |
| "step": 11460 | |
| }, | |
| { | |
| "epoch": 1.777743335399876, | |
| "grad_norm": 5.306787490844727, | |
| "learning_rate": 2.2241165530068197e-06, | |
| "loss": 0.6083, | |
| "step": 11470 | |
| }, | |
| { | |
| "epoch": 1.7792932424054557, | |
| "grad_norm": 9.495424270629883, | |
| "learning_rate": 2.208617482951023e-06, | |
| "loss": 0.4819, | |
| "step": 11480 | |
| }, | |
| { | |
| "epoch": 1.7808431494110355, | |
| "grad_norm": 2.389009475708008, | |
| "learning_rate": 2.1931184128952267e-06, | |
| "loss": 0.387, | |
| "step": 11490 | |
| }, | |
| { | |
| "epoch": 1.782393056416615, | |
| "grad_norm": 8.894001007080078, | |
| "learning_rate": 2.1776193428394298e-06, | |
| "loss": 0.4082, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.7839429634221946, | |
| "grad_norm": 8.785544395446777, | |
| "learning_rate": 2.1621202727836332e-06, | |
| "loss": 0.3576, | |
| "step": 11510 | |
| }, | |
| { | |
| "epoch": 1.7854928704277744, | |
| "grad_norm": 12.12935733795166, | |
| "learning_rate": 2.1466212027278367e-06, | |
| "loss": 0.5719, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 1.7870427774333542, | |
| "grad_norm": 7.645413398742676, | |
| "learning_rate": 2.13112213267204e-06, | |
| "loss": 0.9, | |
| "step": 11530 | |
| }, | |
| { | |
| "epoch": 1.7885926844389337, | |
| "grad_norm": 6.584036827087402, | |
| "learning_rate": 2.1156230626162433e-06, | |
| "loss": 0.4937, | |
| "step": 11540 | |
| }, | |
| { | |
| "epoch": 1.7901425914445133, | |
| "grad_norm": 9.352211952209473, | |
| "learning_rate": 2.1001239925604468e-06, | |
| "loss": 0.5416, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 1.791692498450093, | |
| "grad_norm": 3.5377469062805176, | |
| "learning_rate": 2.08462492250465e-06, | |
| "loss": 0.4514, | |
| "step": 11560 | |
| }, | |
| { | |
| "epoch": 1.7932424054556728, | |
| "grad_norm": 8.766286849975586, | |
| "learning_rate": 2.0691258524488533e-06, | |
| "loss": 0.4314, | |
| "step": 11570 | |
| }, | |
| { | |
| "epoch": 1.7947923124612524, | |
| "grad_norm": 9.854792594909668, | |
| "learning_rate": 2.053626782393057e-06, | |
| "loss": 0.6012, | |
| "step": 11580 | |
| }, | |
| { | |
| "epoch": 1.796342219466832, | |
| "grad_norm": 8.957939147949219, | |
| "learning_rate": 2.03812771233726e-06, | |
| "loss": 0.5714, | |
| "step": 11590 | |
| }, | |
| { | |
| "epoch": 1.7978921264724117, | |
| "grad_norm": 5.054236888885498, | |
| "learning_rate": 2.0226286422814634e-06, | |
| "loss": 0.6261, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 1.7994420334779915, | |
| "grad_norm": 6.757805824279785, | |
| "learning_rate": 2.007129572225667e-06, | |
| "loss": 0.5811, | |
| "step": 11610 | |
| }, | |
| { | |
| "epoch": 1.800991940483571, | |
| "grad_norm": 4.554663181304932, | |
| "learning_rate": 1.99163050216987e-06, | |
| "loss": 0.5902, | |
| "step": 11620 | |
| }, | |
| { | |
| "epoch": 1.8025418474891506, | |
| "grad_norm": 5.606589317321777, | |
| "learning_rate": 1.9761314321140734e-06, | |
| "loss": 0.4664, | |
| "step": 11630 | |
| }, | |
| { | |
| "epoch": 1.8040917544947304, | |
| "grad_norm": 9.500225067138672, | |
| "learning_rate": 1.960632362058277e-06, | |
| "loss": 0.6353, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 1.8056416615003101, | |
| "grad_norm": 9.842873573303223, | |
| "learning_rate": 1.94513329200248e-06, | |
| "loss": 0.4525, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 1.8071915685058897, | |
| "grad_norm": 6.289135456085205, | |
| "learning_rate": 1.9296342219466835e-06, | |
| "loss": 0.486, | |
| "step": 11660 | |
| }, | |
| { | |
| "epoch": 1.8087414755114692, | |
| "grad_norm": 5.714487075805664, | |
| "learning_rate": 1.914135151890887e-06, | |
| "loss": 0.4664, | |
| "step": 11670 | |
| }, | |
| { | |
| "epoch": 1.810291382517049, | |
| "grad_norm": 6.768747806549072, | |
| "learning_rate": 1.8986360818350902e-06, | |
| "loss": 0.6371, | |
| "step": 11680 | |
| }, | |
| { | |
| "epoch": 1.8118412895226288, | |
| "grad_norm": 6.78218412399292, | |
| "learning_rate": 1.8831370117792935e-06, | |
| "loss": 0.473, | |
| "step": 11690 | |
| }, | |
| { | |
| "epoch": 1.8133911965282083, | |
| "grad_norm": 5.904227256774902, | |
| "learning_rate": 1.8676379417234966e-06, | |
| "loss": 0.5261, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 1.814941103533788, | |
| "grad_norm": 10.271391868591309, | |
| "learning_rate": 1.8521388716676999e-06, | |
| "loss": 0.4882, | |
| "step": 11710 | |
| }, | |
| { | |
| "epoch": 1.8164910105393677, | |
| "grad_norm": 3.0156373977661133, | |
| "learning_rate": 1.8366398016119034e-06, | |
| "loss": 0.5023, | |
| "step": 11720 | |
| }, | |
| { | |
| "epoch": 1.8180409175449475, | |
| "grad_norm": 1.9927630424499512, | |
| "learning_rate": 1.8211407315561066e-06, | |
| "loss": 0.3889, | |
| "step": 11730 | |
| }, | |
| { | |
| "epoch": 1.819590824550527, | |
| "grad_norm": 7.668587684631348, | |
| "learning_rate": 1.80564166150031e-06, | |
| "loss": 0.6783, | |
| "step": 11740 | |
| }, | |
| { | |
| "epoch": 1.8211407315561066, | |
| "grad_norm": 6.309301853179932, | |
| "learning_rate": 1.7901425914445134e-06, | |
| "loss": 0.6409, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 1.8226906385616863, | |
| "grad_norm": 10.230514526367188, | |
| "learning_rate": 1.7746435213887167e-06, | |
| "loss": 0.621, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 1.8242405455672661, | |
| "grad_norm": 4.287505149841309, | |
| "learning_rate": 1.75914445133292e-06, | |
| "loss": 0.5267, | |
| "step": 11770 | |
| }, | |
| { | |
| "epoch": 1.8257904525728457, | |
| "grad_norm": 10.235265731811523, | |
| "learning_rate": 1.7436453812771235e-06, | |
| "loss": 0.5312, | |
| "step": 11780 | |
| }, | |
| { | |
| "epoch": 1.8273403595784252, | |
| "grad_norm": 4.249007701873779, | |
| "learning_rate": 1.7281463112213267e-06, | |
| "loss": 0.5245, | |
| "step": 11790 | |
| }, | |
| { | |
| "epoch": 1.828890266584005, | |
| "grad_norm": 8.38654613494873, | |
| "learning_rate": 1.71264724116553e-06, | |
| "loss": 0.4768, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 1.8304401735895848, | |
| "grad_norm": 6.889254093170166, | |
| "learning_rate": 1.6971481711097335e-06, | |
| "loss": 0.5, | |
| "step": 11810 | |
| }, | |
| { | |
| "epoch": 1.8319900805951643, | |
| "grad_norm": 10.057848930358887, | |
| "learning_rate": 1.6816491010539368e-06, | |
| "loss": 0.5757, | |
| "step": 11820 | |
| }, | |
| { | |
| "epoch": 1.8335399876007439, | |
| "grad_norm": 10.05346965789795, | |
| "learning_rate": 1.66615003099814e-06, | |
| "loss": 0.6162, | |
| "step": 11830 | |
| }, | |
| { | |
| "epoch": 1.8350898946063237, | |
| "grad_norm": 11.503827095031738, | |
| "learning_rate": 1.6506509609423435e-06, | |
| "loss": 0.505, | |
| "step": 11840 | |
| }, | |
| { | |
| "epoch": 1.8366398016119034, | |
| "grad_norm": 7.171069145202637, | |
| "learning_rate": 1.6351518908865468e-06, | |
| "loss": 0.6695, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 1.838189708617483, | |
| "grad_norm": 4.7285075187683105, | |
| "learning_rate": 1.61965282083075e-06, | |
| "loss": 0.5785, | |
| "step": 11860 | |
| }, | |
| { | |
| "epoch": 1.8397396156230625, | |
| "grad_norm": 4.173981666564941, | |
| "learning_rate": 1.6041537507749536e-06, | |
| "loss": 0.4007, | |
| "step": 11870 | |
| }, | |
| { | |
| "epoch": 1.8412895226286423, | |
| "grad_norm": 7.87106466293335, | |
| "learning_rate": 1.5886546807191569e-06, | |
| "loss": 0.5988, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 1.842839429634222, | |
| "grad_norm": 5.423832416534424, | |
| "learning_rate": 1.5731556106633604e-06, | |
| "loss": 0.4983, | |
| "step": 11890 | |
| }, | |
| { | |
| "epoch": 1.8443893366398016, | |
| "grad_norm": 12.526191711425781, | |
| "learning_rate": 1.5576565406075636e-06, | |
| "loss": 0.4422, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 1.8459392436453812, | |
| "grad_norm": 0.24000059068202972, | |
| "learning_rate": 1.542157470551767e-06, | |
| "loss": 0.4408, | |
| "step": 11910 | |
| }, | |
| { | |
| "epoch": 1.847489150650961, | |
| "grad_norm": 5.664487838745117, | |
| "learning_rate": 1.5266584004959704e-06, | |
| "loss": 0.3943, | |
| "step": 11920 | |
| }, | |
| { | |
| "epoch": 1.8490390576565408, | |
| "grad_norm": 4.590766906738281, | |
| "learning_rate": 1.5111593304401737e-06, | |
| "loss": 0.4653, | |
| "step": 11930 | |
| }, | |
| { | |
| "epoch": 1.8505889646621203, | |
| "grad_norm": 8.723493576049805, | |
| "learning_rate": 1.495660260384377e-06, | |
| "loss": 0.5275, | |
| "step": 11940 | |
| }, | |
| { | |
| "epoch": 1.8521388716676999, | |
| "grad_norm": 3.8995718955993652, | |
| "learning_rate": 1.4801611903285805e-06, | |
| "loss": 0.4465, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 1.8536887786732796, | |
| "grad_norm": 9.986695289611816, | |
| "learning_rate": 1.4646621202727837e-06, | |
| "loss": 0.3274, | |
| "step": 11960 | |
| }, | |
| { | |
| "epoch": 1.8552386856788594, | |
| "grad_norm": 9.503478050231934, | |
| "learning_rate": 1.449163050216987e-06, | |
| "loss": 0.6006, | |
| "step": 11970 | |
| }, | |
| { | |
| "epoch": 1.856788592684439, | |
| "grad_norm": 11.161650657653809, | |
| "learning_rate": 1.4336639801611905e-06, | |
| "loss": 0.6187, | |
| "step": 11980 | |
| }, | |
| { | |
| "epoch": 1.8583384996900185, | |
| "grad_norm": 4.863800048828125, | |
| "learning_rate": 1.4181649101053938e-06, | |
| "loss": 0.4654, | |
| "step": 11990 | |
| }, | |
| { | |
| "epoch": 1.8598884066955983, | |
| "grad_norm": 11.651344299316406, | |
| "learning_rate": 1.402665840049597e-06, | |
| "loss": 0.7257, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.861438313701178, | |
| "grad_norm": 6.425150394439697, | |
| "learning_rate": 1.3871667699938005e-06, | |
| "loss": 0.5763, | |
| "step": 12010 | |
| }, | |
| { | |
| "epoch": 1.8629882207067576, | |
| "grad_norm": 4.594536781311035, | |
| "learning_rate": 1.3716676999380038e-06, | |
| "loss": 0.5372, | |
| "step": 12020 | |
| }, | |
| { | |
| "epoch": 1.8645381277123372, | |
| "grad_norm": 5.005278587341309, | |
| "learning_rate": 1.356168629882207e-06, | |
| "loss": 0.4594, | |
| "step": 12030 | |
| }, | |
| { | |
| "epoch": 1.866088034717917, | |
| "grad_norm": 6.88616943359375, | |
| "learning_rate": 1.3406695598264106e-06, | |
| "loss": 0.4703, | |
| "step": 12040 | |
| }, | |
| { | |
| "epoch": 1.8676379417234967, | |
| "grad_norm": 4.090301513671875, | |
| "learning_rate": 1.3251704897706139e-06, | |
| "loss": 0.4165, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 1.8691878487290763, | |
| "grad_norm": 3.528439521789551, | |
| "learning_rate": 1.3096714197148172e-06, | |
| "loss": 0.7157, | |
| "step": 12060 | |
| }, | |
| { | |
| "epoch": 1.8707377557346558, | |
| "grad_norm": 5.5028533935546875, | |
| "learning_rate": 1.2941723496590206e-06, | |
| "loss": 0.5242, | |
| "step": 12070 | |
| }, | |
| { | |
| "epoch": 1.8722876627402356, | |
| "grad_norm": 7.8318657875061035, | |
| "learning_rate": 1.278673279603224e-06, | |
| "loss": 0.648, | |
| "step": 12080 | |
| }, | |
| { | |
| "epoch": 1.8738375697458154, | |
| "grad_norm": 6.823987007141113, | |
| "learning_rate": 1.2631742095474272e-06, | |
| "loss": 0.7727, | |
| "step": 12090 | |
| }, | |
| { | |
| "epoch": 1.875387476751395, | |
| "grad_norm": 7.5707526206970215, | |
| "learning_rate": 1.2476751394916307e-06, | |
| "loss": 0.552, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 1.8769373837569745, | |
| "grad_norm": 5.46683931350708, | |
| "learning_rate": 1.232176069435834e-06, | |
| "loss": 0.3831, | |
| "step": 12110 | |
| }, | |
| { | |
| "epoch": 1.8784872907625543, | |
| "grad_norm": 9.674421310424805, | |
| "learning_rate": 1.2166769993800372e-06, | |
| "loss": 0.6312, | |
| "step": 12120 | |
| }, | |
| { | |
| "epoch": 1.880037197768134, | |
| "grad_norm": 4.260156154632568, | |
| "learning_rate": 1.2011779293242407e-06, | |
| "loss": 0.5856, | |
| "step": 12130 | |
| }, | |
| { | |
| "epoch": 1.8815871047737136, | |
| "grad_norm": 3.7952609062194824, | |
| "learning_rate": 1.185678859268444e-06, | |
| "loss": 0.4176, | |
| "step": 12140 | |
| }, | |
| { | |
| "epoch": 1.8831370117792932, | |
| "grad_norm": 10.493324279785156, | |
| "learning_rate": 1.1701797892126473e-06, | |
| "loss": 0.578, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 1.884686918784873, | |
| "grad_norm": 1.78457772731781, | |
| "learning_rate": 1.1546807191568508e-06, | |
| "loss": 0.4001, | |
| "step": 12160 | |
| }, | |
| { | |
| "epoch": 1.8862368257904527, | |
| "grad_norm": 12.227059364318848, | |
| "learning_rate": 1.139181649101054e-06, | |
| "loss": 0.5989, | |
| "step": 12170 | |
| }, | |
| { | |
| "epoch": 1.8877867327960323, | |
| "grad_norm": 7.4753241539001465, | |
| "learning_rate": 1.1236825790452573e-06, | |
| "loss": 0.3872, | |
| "step": 12180 | |
| }, | |
| { | |
| "epoch": 1.8893366398016118, | |
| "grad_norm": 8.94823169708252, | |
| "learning_rate": 1.1081835089894608e-06, | |
| "loss": 0.3952, | |
| "step": 12190 | |
| }, | |
| { | |
| "epoch": 1.8908865468071916, | |
| "grad_norm": 9.308780670166016, | |
| "learning_rate": 1.0926844389336641e-06, | |
| "loss": 0.6887, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 1.8924364538127714, | |
| "grad_norm": 4.786229610443115, | |
| "learning_rate": 1.0771853688778674e-06, | |
| "loss": 0.4441, | |
| "step": 12210 | |
| }, | |
| { | |
| "epoch": 1.893986360818351, | |
| "grad_norm": 5.059142112731934, | |
| "learning_rate": 1.0616862988220709e-06, | |
| "loss": 0.6729, | |
| "step": 12220 | |
| }, | |
| { | |
| "epoch": 1.8955362678239305, | |
| "grad_norm": 2.991612195968628, | |
| "learning_rate": 1.0461872287662742e-06, | |
| "loss": 0.4542, | |
| "step": 12230 | |
| }, | |
| { | |
| "epoch": 1.8970861748295103, | |
| "grad_norm": 2.8348984718322754, | |
| "learning_rate": 1.0306881587104774e-06, | |
| "loss": 0.3195, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 1.89863608183509, | |
| "grad_norm": 10.625432014465332, | |
| "learning_rate": 1.015189088654681e-06, | |
| "loss": 0.5155, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 1.9001859888406696, | |
| "grad_norm": 4.587387561798096, | |
| "learning_rate": 9.996900185988842e-07, | |
| "loss": 0.6146, | |
| "step": 12260 | |
| }, | |
| { | |
| "epoch": 1.9017358958462491, | |
| "grad_norm": 5.292594909667969, | |
| "learning_rate": 9.841909485430875e-07, | |
| "loss": 0.4085, | |
| "step": 12270 | |
| }, | |
| { | |
| "epoch": 1.903285802851829, | |
| "grad_norm": 2.4424188137054443, | |
| "learning_rate": 9.68691878487291e-07, | |
| "loss": 0.403, | |
| "step": 12280 | |
| }, | |
| { | |
| "epoch": 1.9048357098574087, | |
| "grad_norm": 4.393378734588623, | |
| "learning_rate": 9.531928084314942e-07, | |
| "loss": 0.4688, | |
| "step": 12290 | |
| }, | |
| { | |
| "epoch": 1.9063856168629882, | |
| "grad_norm": 8.843658447265625, | |
| "learning_rate": 9.376937383756976e-07, | |
| "loss": 0.5858, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 1.9079355238685678, | |
| "grad_norm": 4.157933235168457, | |
| "learning_rate": 9.221946683199008e-07, | |
| "loss": 0.4303, | |
| "step": 12310 | |
| }, | |
| { | |
| "epoch": 1.9094854308741476, | |
| "grad_norm": 4.35939884185791, | |
| "learning_rate": 9.066955982641042e-07, | |
| "loss": 0.3844, | |
| "step": 12320 | |
| }, | |
| { | |
| "epoch": 1.9110353378797273, | |
| "grad_norm": 3.711383581161499, | |
| "learning_rate": 8.911965282083075e-07, | |
| "loss": 0.4983, | |
| "step": 12330 | |
| }, | |
| { | |
| "epoch": 1.912585244885307, | |
| "grad_norm": 10.645997047424316, | |
| "learning_rate": 8.756974581525109e-07, | |
| "loss": 0.5123, | |
| "step": 12340 | |
| }, | |
| { | |
| "epoch": 1.9141351518908865, | |
| "grad_norm": 11.995467185974121, | |
| "learning_rate": 8.601983880967142e-07, | |
| "loss": 0.5307, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 1.9156850588964662, | |
| "grad_norm": 10.4423828125, | |
| "learning_rate": 8.446993180409175e-07, | |
| "loss": 0.5145, | |
| "step": 12360 | |
| }, | |
| { | |
| "epoch": 1.917234965902046, | |
| "grad_norm": 2.7148125171661377, | |
| "learning_rate": 8.292002479851209e-07, | |
| "loss": 0.6484, | |
| "step": 12370 | |
| }, | |
| { | |
| "epoch": 1.9187848729076256, | |
| "grad_norm": 3.2615230083465576, | |
| "learning_rate": 8.137011779293243e-07, | |
| "loss": 0.5534, | |
| "step": 12380 | |
| }, | |
| { | |
| "epoch": 1.9203347799132051, | |
| "grad_norm": 5.097367286682129, | |
| "learning_rate": 7.982021078735276e-07, | |
| "loss": 0.475, | |
| "step": 12390 | |
| }, | |
| { | |
| "epoch": 1.921884686918785, | |
| "grad_norm": 4.083049774169922, | |
| "learning_rate": 7.827030378177309e-07, | |
| "loss": 0.4121, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 1.9234345939243647, | |
| "grad_norm": 7.365451335906982, | |
| "learning_rate": 7.672039677619343e-07, | |
| "loss": 0.518, | |
| "step": 12410 | |
| }, | |
| { | |
| "epoch": 1.9249845009299442, | |
| "grad_norm": 5.423826217651367, | |
| "learning_rate": 7.517048977061377e-07, | |
| "loss": 0.5673, | |
| "step": 12420 | |
| }, | |
| { | |
| "epoch": 1.9265344079355238, | |
| "grad_norm": 1.642021656036377, | |
| "learning_rate": 7.36205827650341e-07, | |
| "loss": 0.5057, | |
| "step": 12430 | |
| }, | |
| { | |
| "epoch": 1.9280843149411035, | |
| "grad_norm": 8.40982437133789, | |
| "learning_rate": 7.207067575945444e-07, | |
| "loss": 0.3904, | |
| "step": 12440 | |
| }, | |
| { | |
| "epoch": 1.9296342219466833, | |
| "grad_norm": 5.199375152587891, | |
| "learning_rate": 7.052076875387478e-07, | |
| "loss": 0.5082, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 1.9311841289522629, | |
| "grad_norm": 2.0132346153259277, | |
| "learning_rate": 6.89708617482951e-07, | |
| "loss": 0.3823, | |
| "step": 12460 | |
| }, | |
| { | |
| "epoch": 1.9327340359578424, | |
| "grad_norm": 7.0812201499938965, | |
| "learning_rate": 6.742095474271544e-07, | |
| "loss": 0.5928, | |
| "step": 12470 | |
| }, | |
| { | |
| "epoch": 1.9342839429634222, | |
| "grad_norm": 8.322930335998535, | |
| "learning_rate": 6.587104773713578e-07, | |
| "loss": 0.6861, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 1.935833849969002, | |
| "grad_norm": 3.1925439834594727, | |
| "learning_rate": 6.432114073155611e-07, | |
| "loss": 0.485, | |
| "step": 12490 | |
| }, | |
| { | |
| "epoch": 1.9373837569745815, | |
| "grad_norm": 8.817286491394043, | |
| "learning_rate": 6.277123372597645e-07, | |
| "loss": 0.5448, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.938933663980161, | |
| "grad_norm": 5.955672740936279, | |
| "learning_rate": 6.122132672039679e-07, | |
| "loss": 0.5664, | |
| "step": 12510 | |
| }, | |
| { | |
| "epoch": 1.9404835709857409, | |
| "grad_norm": 4.034615516662598, | |
| "learning_rate": 5.967141971481711e-07, | |
| "loss": 0.5256, | |
| "step": 12520 | |
| }, | |
| { | |
| "epoch": 1.9420334779913206, | |
| "grad_norm": 5.02003288269043, | |
| "learning_rate": 5.812151270923745e-07, | |
| "loss": 0.5397, | |
| "step": 12530 | |
| }, | |
| { | |
| "epoch": 1.9435833849969002, | |
| "grad_norm": 4.505518913269043, | |
| "learning_rate": 5.657160570365779e-07, | |
| "loss": 0.5262, | |
| "step": 12540 | |
| }, | |
| { | |
| "epoch": 1.9451332920024798, | |
| "grad_norm": 8.628135681152344, | |
| "learning_rate": 5.502169869807812e-07, | |
| "loss": 0.5993, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 1.9466831990080595, | |
| "grad_norm": 4.474428653717041, | |
| "learning_rate": 5.347179169249846e-07, | |
| "loss": 0.6789, | |
| "step": 12560 | |
| }, | |
| { | |
| "epoch": 1.9482331060136393, | |
| "grad_norm": 4.82116174697876, | |
| "learning_rate": 5.19218846869188e-07, | |
| "loss": 0.4887, | |
| "step": 12570 | |
| }, | |
| { | |
| "epoch": 1.9497830130192189, | |
| "grad_norm": 8.986303329467773, | |
| "learning_rate": 5.037197768133912e-07, | |
| "loss": 0.8879, | |
| "step": 12580 | |
| }, | |
| { | |
| "epoch": 1.9513329200247984, | |
| "grad_norm": 4.798040866851807, | |
| "learning_rate": 4.882207067575946e-07, | |
| "loss": 0.4352, | |
| "step": 12590 | |
| }, | |
| { | |
| "epoch": 1.9528828270303782, | |
| "grad_norm": 3.8391406536102295, | |
| "learning_rate": 4.7272163670179794e-07, | |
| "loss": 0.5088, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 1.954432734035958, | |
| "grad_norm": 0.2043791562318802, | |
| "learning_rate": 4.572225666460012e-07, | |
| "loss": 0.5993, | |
| "step": 12610 | |
| }, | |
| { | |
| "epoch": 1.9559826410415375, | |
| "grad_norm": 6.672250747680664, | |
| "learning_rate": 4.417234965902046e-07, | |
| "loss": 0.6768, | |
| "step": 12620 | |
| }, | |
| { | |
| "epoch": 1.957532548047117, | |
| "grad_norm": 12.16546630859375, | |
| "learning_rate": 4.2622442653440794e-07, | |
| "loss": 0.6934, | |
| "step": 12630 | |
| }, | |
| { | |
| "epoch": 1.9590824550526968, | |
| "grad_norm": 10.988899230957031, | |
| "learning_rate": 4.1072535647861127e-07, | |
| "loss": 0.4547, | |
| "step": 12640 | |
| }, | |
| { | |
| "epoch": 1.9606323620582766, | |
| "grad_norm": 10.418371200561523, | |
| "learning_rate": 3.9522628642281465e-07, | |
| "loss": 0.4585, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 1.9621822690638562, | |
| "grad_norm": 9.15796184539795, | |
| "learning_rate": 3.79727216367018e-07, | |
| "loss": 0.473, | |
| "step": 12660 | |
| }, | |
| { | |
| "epoch": 1.9637321760694357, | |
| "grad_norm": 5.8964762687683105, | |
| "learning_rate": 3.6422814631122137e-07, | |
| "loss": 0.4932, | |
| "step": 12670 | |
| }, | |
| { | |
| "epoch": 1.9652820830750155, | |
| "grad_norm": 6.89042329788208, | |
| "learning_rate": 3.487290762554247e-07, | |
| "loss": 0.5725, | |
| "step": 12680 | |
| }, | |
| { | |
| "epoch": 1.9668319900805953, | |
| "grad_norm": 5.677920818328857, | |
| "learning_rate": 3.3323000619962803e-07, | |
| "loss": 0.4091, | |
| "step": 12690 | |
| }, | |
| { | |
| "epoch": 1.9683818970861748, | |
| "grad_norm": 7.661967754364014, | |
| "learning_rate": 3.177309361438314e-07, | |
| "loss": 0.4824, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 1.9699318040917544, | |
| "grad_norm": 4.617781162261963, | |
| "learning_rate": 3.0223186608803475e-07, | |
| "loss": 0.588, | |
| "step": 12710 | |
| }, | |
| { | |
| "epoch": 1.9714817110973342, | |
| "grad_norm": 4.782810688018799, | |
| "learning_rate": 2.867327960322381e-07, | |
| "loss": 0.5242, | |
| "step": 12720 | |
| }, | |
| { | |
| "epoch": 1.973031618102914, | |
| "grad_norm": 8.13632583618164, | |
| "learning_rate": 2.7123372597644146e-07, | |
| "loss": 0.4487, | |
| "step": 12730 | |
| }, | |
| { | |
| "epoch": 1.9745815251084935, | |
| "grad_norm": 3.317437171936035, | |
| "learning_rate": 2.557346559206448e-07, | |
| "loss": 0.4886, | |
| "step": 12740 | |
| }, | |
| { | |
| "epoch": 1.976131432114073, | |
| "grad_norm": 12.30063533782959, | |
| "learning_rate": 2.402355858648481e-07, | |
| "loss": 0.6661, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 1.9776813391196528, | |
| "grad_norm": 5.912576675415039, | |
| "learning_rate": 2.2473651580905146e-07, | |
| "loss": 0.407, | |
| "step": 12760 | |
| }, | |
| { | |
| "epoch": 1.9792312461252326, | |
| "grad_norm": 5.114182472229004, | |
| "learning_rate": 2.0923744575325482e-07, | |
| "loss": 0.494, | |
| "step": 12770 | |
| }, | |
| { | |
| "epoch": 1.9807811531308122, | |
| "grad_norm": 11.338470458984375, | |
| "learning_rate": 1.9373837569745817e-07, | |
| "loss": 0.5046, | |
| "step": 12780 | |
| }, | |
| { | |
| "epoch": 1.9823310601363917, | |
| "grad_norm": 2.7713358402252197, | |
| "learning_rate": 1.782393056416615e-07, | |
| "loss": 0.555, | |
| "step": 12790 | |
| }, | |
| { | |
| "epoch": 1.9838809671419715, | |
| "grad_norm": 4.620921611785889, | |
| "learning_rate": 1.6274023558586486e-07, | |
| "loss": 0.4252, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 1.9854308741475513, | |
| "grad_norm": 4.4270806312561035, | |
| "learning_rate": 1.4724116553006822e-07, | |
| "loss": 0.4965, | |
| "step": 12810 | |
| }, | |
| { | |
| "epoch": 1.9869807811531308, | |
| "grad_norm": 9.595443725585938, | |
| "learning_rate": 1.3174209547427155e-07, | |
| "loss": 0.6023, | |
| "step": 12820 | |
| }, | |
| { | |
| "epoch": 1.9885306881587104, | |
| "grad_norm": 6.762941837310791, | |
| "learning_rate": 1.162430254184749e-07, | |
| "loss": 0.5376, | |
| "step": 12830 | |
| }, | |
| { | |
| "epoch": 1.9900805951642901, | |
| "grad_norm": 7.066582679748535, | |
| "learning_rate": 1.0074395536267824e-07, | |
| "loss": 0.4774, | |
| "step": 12840 | |
| }, | |
| { | |
| "epoch": 1.99163050216987, | |
| "grad_norm": 5.059195041656494, | |
| "learning_rate": 8.52448853068816e-08, | |
| "loss": 0.639, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 1.9931804091754495, | |
| "grad_norm": 9.503751754760742, | |
| "learning_rate": 6.974581525108494e-08, | |
| "loss": 0.5106, | |
| "step": 12860 | |
| }, | |
| { | |
| "epoch": 1.994730316181029, | |
| "grad_norm": 7.847515106201172, | |
| "learning_rate": 5.424674519528828e-08, | |
| "loss": 0.4999, | |
| "step": 12870 | |
| }, | |
| { | |
| "epoch": 1.9962802231866088, | |
| "grad_norm": 4.666672706604004, | |
| "learning_rate": 3.874767513949163e-08, | |
| "loss": 0.4127, | |
| "step": 12880 | |
| }, | |
| { | |
| "epoch": 1.9978301301921886, | |
| "grad_norm": 0.40573740005493164, | |
| "learning_rate": 2.324860508369498e-08, | |
| "loss": 0.5357, | |
| "step": 12890 | |
| }, | |
| { | |
| "epoch": 1.9993800371977681, | |
| "grad_norm": 15.386001586914062, | |
| "learning_rate": 7.749535027898328e-09, | |
| "loss": 0.4906, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.3503767491926803, | |
| "eval_loss": 1.858621597290039, | |
| "eval_runtime": 26.7923, | |
| "eval_samples_per_second": 69.348, | |
| "eval_steps_per_second": 8.697, | |
| "step": 12904 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 12904, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.999081618146877e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |