| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9996222709073053, | |
| "eval_steps": 500, | |
| "global_step": 3308, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0030218327415577548, | |
| "grad_norm": 166.1145477294922, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 13.8334, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0060436654831155096, | |
| "grad_norm": 81.33020782470703, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 13.1022, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.009065498224673264, | |
| "grad_norm": 51.09122085571289, | |
| "learning_rate": 3e-06, | |
| "loss": 12.0899, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.012087330966231019, | |
| "grad_norm": 49.01457214355469, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 10.2942, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.015109163707788774, | |
| "grad_norm": 46.00205612182617, | |
| "learning_rate": 5e-06, | |
| "loss": 9.538, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01813099644934653, | |
| "grad_norm": 41.61635971069336, | |
| "learning_rate": 6e-06, | |
| "loss": 8.3357, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.021152829190904283, | |
| "grad_norm": 31.584325790405273, | |
| "learning_rate": 7e-06, | |
| "loss": 6.9115, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.024174661932462038, | |
| "grad_norm": 51.664695739746094, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 6.7596, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.027196494674019793, | |
| "grad_norm": 31.014076232910156, | |
| "learning_rate": 9e-06, | |
| "loss": 7.0298, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.030218327415577548, | |
| "grad_norm": 37.792179107666016, | |
| "learning_rate": 1e-05, | |
| "loss": 6.7021, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0332401601571353, | |
| "grad_norm": 37.68498611450195, | |
| "learning_rate": 9.999760394462267e-06, | |
| "loss": 7.3545, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.03626199289869306, | |
| "grad_norm": 38.4805793762207, | |
| "learning_rate": 9.999041600813393e-06, | |
| "loss": 7.0073, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03928382564025081, | |
| "grad_norm": 32.300174713134766, | |
| "learning_rate": 9.997843687944153e-06, | |
| "loss": 6.2416, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.04230565838180857, | |
| "grad_norm": 29.263317108154297, | |
| "learning_rate": 9.996166770665168e-06, | |
| "loss": 5.5583, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.04532749112336632, | |
| "grad_norm": 33.3656005859375, | |
| "learning_rate": 9.994011009695908e-06, | |
| "loss": 5.6737, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.048349323864924076, | |
| "grad_norm": 32.699825286865234, | |
| "learning_rate": 9.991376611649278e-06, | |
| "loss": 6.0879, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.05137115660648183, | |
| "grad_norm": 27.45968246459961, | |
| "learning_rate": 9.988263829011821e-06, | |
| "loss": 5.4056, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.054392989348039586, | |
| "grad_norm": 25.30878448486328, | |
| "learning_rate": 9.984672960119523e-06, | |
| "loss": 5.3618, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.05741482208959734, | |
| "grad_norm": 40.055721282958984, | |
| "learning_rate": 9.980604349129212e-06, | |
| "loss": 5.7602, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.060436654831155096, | |
| "grad_norm": 26.245195388793945, | |
| "learning_rate": 9.976058385985575e-06, | |
| "loss": 5.186, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.06345848757271286, | |
| "grad_norm": 34.81965637207031, | |
| "learning_rate": 9.971035506383791e-06, | |
| "loss": 5.5341, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.0664803203142706, | |
| "grad_norm": 29.513893127441406, | |
| "learning_rate": 9.96553619172777e-06, | |
| "loss": 5.0542, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.06950215305582837, | |
| "grad_norm": 32.30284118652344, | |
| "learning_rate": 9.959560969084004e-06, | |
| "loss": 5.3365, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.07252398579738611, | |
| "grad_norm": 27.652576446533203, | |
| "learning_rate": 9.953110411131073e-06, | |
| "loss": 4.7513, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.07554581853894388, | |
| "grad_norm": 28.387413024902344, | |
| "learning_rate": 9.946185136104736e-06, | |
| "loss": 5.4127, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.07856765128050162, | |
| "grad_norm": 29.694316864013672, | |
| "learning_rate": 9.938785807738692e-06, | |
| "loss": 4.813, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.08158948402205939, | |
| "grad_norm": 31.964120864868164, | |
| "learning_rate": 9.930913135200964e-06, | |
| "loss": 5.4212, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.08461131676361713, | |
| "grad_norm": 23.594715118408203, | |
| "learning_rate": 9.922567873025924e-06, | |
| "loss": 5.2445, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.0876331495051749, | |
| "grad_norm": 23.0896053314209, | |
| "learning_rate": 9.913750821041988e-06, | |
| "loss": 4.5194, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.09065498224673264, | |
| "grad_norm": 25.44329833984375, | |
| "learning_rate": 9.904462824294945e-06, | |
| "loss": 4.6093, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0936768149882904, | |
| "grad_norm": 27.408288955688477, | |
| "learning_rate": 9.894704772966978e-06, | |
| "loss": 4.512, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.09669864772984815, | |
| "grad_norm": 24.26542091369629, | |
| "learning_rate": 9.884477602291343e-06, | |
| "loss": 4.5071, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.09972048047140591, | |
| "grad_norm": 35.8819694519043, | |
| "learning_rate": 9.873782292462727e-06, | |
| "loss": 4.3557, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.10274231321296366, | |
| "grad_norm": 29.487594604492188, | |
| "learning_rate": 9.862619868543323e-06, | |
| "loss": 8.2236, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.10576414595452142, | |
| "grad_norm": 38.13749694824219, | |
| "learning_rate": 9.850991400364557e-06, | |
| "loss": 5.1538, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.10878597869607917, | |
| "grad_norm": 25.492799758911133, | |
| "learning_rate": 9.838898002424586e-06, | |
| "loss": 6.0666, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.11180781143763693, | |
| "grad_norm": 31.119089126586914, | |
| "learning_rate": 9.826340833781448e-06, | |
| "loss": 5.8633, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.11482964417919468, | |
| "grad_norm": 21.065149307250977, | |
| "learning_rate": 9.813321097942005e-06, | |
| "loss": 5.1017, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.11785147692075244, | |
| "grad_norm": 29.40814971923828, | |
| "learning_rate": 9.79984004274658e-06, | |
| "loss": 4.9132, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.12087330966231019, | |
| "grad_norm": 22.45477294921875, | |
| "learning_rate": 9.785898960249365e-06, | |
| "loss": 4.2496, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.12389514240386795, | |
| "grad_norm": 19.05487060546875, | |
| "learning_rate": 9.771499186594586e-06, | |
| "loss": 5.0767, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.12691697514542571, | |
| "grad_norm": 31.310686111450195, | |
| "learning_rate": 9.756642101888449e-06, | |
| "loss": 5.192, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.12993880788698345, | |
| "grad_norm": 25.689640045166016, | |
| "learning_rate": 9.74132913006686e-06, | |
| "loss": 3.445, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.1329606406285412, | |
| "grad_norm": 21.052574157714844, | |
| "learning_rate": 9.725561738758956e-06, | |
| "loss": 3.3354, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.13598247337009897, | |
| "grad_norm": 24.987884521484375, | |
| "learning_rate": 9.709341439146452e-06, | |
| "loss": 5.0777, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.13900430611165673, | |
| "grad_norm": 26.842397689819336, | |
| "learning_rate": 9.692669785818787e-06, | |
| "loss": 6.4292, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.14202613885321447, | |
| "grad_norm": 35.66836166381836, | |
| "learning_rate": 9.675548376624149e-06, | |
| "loss": 5.7348, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.14504797159477223, | |
| "grad_norm": 29.318471908569336, | |
| "learning_rate": 9.657978852516318e-06, | |
| "loss": 5.6924, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.14806980433633, | |
| "grad_norm": 23.544092178344727, | |
| "learning_rate": 9.639962897397405e-06, | |
| "loss": 4.183, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.15109163707788775, | |
| "grad_norm": 22.90180206298828, | |
| "learning_rate": 9.621502237956452e-06, | |
| "loss": 5.085, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.1541134698194455, | |
| "grad_norm": 23.748275756835938, | |
| "learning_rate": 9.602598643503957e-06, | |
| "loss": 3.2694, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.15713530256100325, | |
| "grad_norm": 29.096708297729492, | |
| "learning_rate": 9.583253925802283e-06, | |
| "loss": 4.2373, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.160157135302561, | |
| "grad_norm": 24.87314796447754, | |
| "learning_rate": 9.563469938892023e-06, | |
| "loss": 4.8482, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.16317896804411877, | |
| "grad_norm": 24.310091018676758, | |
| "learning_rate": 9.543248578914309e-06, | |
| "loss": 3.2299, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.1662008007856765, | |
| "grad_norm": 24.80878257751465, | |
| "learning_rate": 9.522591783929069e-06, | |
| "loss": 4.8424, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.16922263352723427, | |
| "grad_norm": 22.142215728759766, | |
| "learning_rate": 9.501501533729297e-06, | |
| "loss": 4.1786, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.17224446626879203, | |
| "grad_norm": 33.77587890625, | |
| "learning_rate": 9.479979849651287e-06, | |
| "loss": 5.7505, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.1752662990103498, | |
| "grad_norm": 25.414831161499023, | |
| "learning_rate": 9.45802879438091e-06, | |
| "loss": 6.3392, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.17828813175190752, | |
| "grad_norm": 25.716073989868164, | |
| "learning_rate": 9.43565047175593e-06, | |
| "loss": 4.1603, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.1813099644934653, | |
| "grad_norm": 25.389522552490234, | |
| "learning_rate": 9.412847026564359e-06, | |
| "loss": 3.9676, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.18433179723502305, | |
| "grad_norm": 22.911640167236328, | |
| "learning_rate": 9.389620644338893e-06, | |
| "loss": 4.1508, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.1873536299765808, | |
| "grad_norm": 36.27210998535156, | |
| "learning_rate": 9.365973551147453e-06, | |
| "loss": 4.691, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.19037546271813854, | |
| "grad_norm": 23.555246353149414, | |
| "learning_rate": 9.341908013379832e-06, | |
| "loss": 4.7148, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.1933972954596963, | |
| "grad_norm": 25.42097282409668, | |
| "learning_rate": 9.317426337530477e-06, | |
| "loss": 4.0105, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.19641912820125407, | |
| "grad_norm": 24.92901611328125, | |
| "learning_rate": 9.292530869977432e-06, | |
| "loss": 5.5589, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.19944096094281183, | |
| "grad_norm": 26.411352157592773, | |
| "learning_rate": 9.26722399675745e-06, | |
| "loss": 3.1881, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.20246279368436956, | |
| "grad_norm": 22.39121437072754, | |
| "learning_rate": 9.24150814333732e-06, | |
| "loss": 3.9177, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.20548462642592732, | |
| "grad_norm": 21.436046600341797, | |
| "learning_rate": 9.215385774381395e-06, | |
| "loss": 6.2124, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.2085064591674851, | |
| "grad_norm": 42.19996643066406, | |
| "learning_rate": 9.188859393515382e-06, | |
| "loss": 4.863, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.21152829190904285, | |
| "grad_norm": 24.43948745727539, | |
| "learning_rate": 9.16193154308638e-06, | |
| "loss": 6.0562, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.21455012465060058, | |
| "grad_norm": 36.5896110534668, | |
| "learning_rate": 9.13460480391922e-06, | |
| "loss": 6.1878, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.21757195739215834, | |
| "grad_norm": 39.19657897949219, | |
| "learning_rate": 9.106881795069116e-06, | |
| "loss": 6.4964, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.2205937901337161, | |
| "grad_norm": 19.438859939575195, | |
| "learning_rate": 9.078765173570649e-06, | |
| "loss": 3.1914, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.22361562287527387, | |
| "grad_norm": 26.316898345947266, | |
| "learning_rate": 9.0502576341831e-06, | |
| "loss": 4.0543, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.2266374556168316, | |
| "grad_norm": 21.5406436920166, | |
| "learning_rate": 9.02136190913219e-06, | |
| "loss": 5.4649, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.22965928835838936, | |
| "grad_norm": 38.014617919921875, | |
| "learning_rate": 8.99208076784822e-06, | |
| "loss": 4.6499, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.23268112109994712, | |
| "grad_norm": 16.046876907348633, | |
| "learning_rate": 8.962417016700624e-06, | |
| "loss": 3.0368, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.2357029538415049, | |
| "grad_norm": 25.170169830322266, | |
| "learning_rate": 8.932373498729026e-06, | |
| "loss": 4.6374, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.23872478658306262, | |
| "grad_norm": 28.294591903686523, | |
| "learning_rate": 8.901953093370734e-06, | |
| "loss": 4.0344, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.24174661932462038, | |
| "grad_norm": 25.618423461914062, | |
| "learning_rate": 8.871158716184784e-06, | |
| "loss": 3.9153, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.24476845206617814, | |
| "grad_norm": 33.044132232666016, | |
| "learning_rate": 8.839993318572497e-06, | |
| "loss": 4.852, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.2477902848077359, | |
| "grad_norm": 19.522127151489258, | |
| "learning_rate": 8.808459887494617e-06, | |
| "loss": 3.0679, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.25081211754929367, | |
| "grad_norm": 17.915157318115234, | |
| "learning_rate": 8.77656144518502e-06, | |
| "loss": 3.832, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.25383395029085143, | |
| "grad_norm": 18.468053817749023, | |
| "learning_rate": 8.744301048861083e-06, | |
| "loss": 2.9134, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.25685578303240914, | |
| "grad_norm": 25.19109535217285, | |
| "learning_rate": 8.711681790430646e-06, | |
| "loss": 2.9987, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.2598776157739669, | |
| "grad_norm": 27.227184295654297, | |
| "learning_rate": 8.678706796195694e-06, | |
| "loss": 4.7592, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.26289944851552466, | |
| "grad_norm": 28.04375457763672, | |
| "learning_rate": 8.645379226552712e-06, | |
| "loss": 3.7402, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.2659212812570824, | |
| "grad_norm": 21.457616806030273, | |
| "learning_rate": 8.611702275689805e-06, | |
| "loss": 4.6756, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.2689431139986402, | |
| "grad_norm": 35.01508331298828, | |
| "learning_rate": 8.577679171280538e-06, | |
| "loss": 4.5315, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.27196494674019794, | |
| "grad_norm": 20.160045623779297, | |
| "learning_rate": 8.543313174174601e-06, | |
| "loss": 5.2698, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.2749867794817557, | |
| "grad_norm": 22.52850341796875, | |
| "learning_rate": 8.508607578085281e-06, | |
| "loss": 3.849, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.27800861222331347, | |
| "grad_norm": 21.895462036132812, | |
| "learning_rate": 8.473565709273786e-06, | |
| "loss": 3.8616, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.2810304449648712, | |
| "grad_norm": 16.077316284179688, | |
| "learning_rate": 8.438190926230439e-06, | |
| "loss": 3.8386, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.28405227770642894, | |
| "grad_norm": 33.1984977722168, | |
| "learning_rate": 8.40248661935281e-06, | |
| "loss": 4.3994, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.2870741104479867, | |
| "grad_norm": 27.1571102142334, | |
| "learning_rate": 8.366456210620756e-06, | |
| "loss": 3.1029, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.29009594318954446, | |
| "grad_norm": 31.706750869750977, | |
| "learning_rate": 8.330103153268464e-06, | |
| "loss": 3.7567, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.2931177759311022, | |
| "grad_norm": 24.30504608154297, | |
| "learning_rate": 8.29343093145347e-06, | |
| "loss": 3.6988, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.29613960867266, | |
| "grad_norm": 24.231523513793945, | |
| "learning_rate": 8.25644305992275e-06, | |
| "loss": 3.6097, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.29916144141421774, | |
| "grad_norm": 19.621383666992188, | |
| "learning_rate": 8.21914308367584e-06, | |
| "loss": 4.5566, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.3021832741557755, | |
| "grad_norm": 21.627859115600586, | |
| "learning_rate": 8.181534577625088e-06, | |
| "loss": 3.7714, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3052051068973332, | |
| "grad_norm": 14.206421852111816, | |
| "learning_rate": 8.143621146253022e-06, | |
| "loss": 4.6373, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.308226939638891, | |
| "grad_norm": 27.084983825683594, | |
| "learning_rate": 8.105406423266884e-06, | |
| "loss": 4.6538, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.31124877238044873, | |
| "grad_norm": 20.950910568237305, | |
| "learning_rate": 8.066894071250374e-06, | |
| "loss": 4.4614, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.3142706051220065, | |
| "grad_norm": 20.357742309570312, | |
| "learning_rate": 8.02808778131262e-06, | |
| "loss": 3.7694, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.31729243786356426, | |
| "grad_norm": 18.685476303100586, | |
| "learning_rate": 7.988991272734407e-06, | |
| "loss": 4.4575, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.320314270605122, | |
| "grad_norm": 24.249338150024414, | |
| "learning_rate": 7.94960829261172e-06, | |
| "loss": 4.4394, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.3233361033466798, | |
| "grad_norm": 22.846027374267578, | |
| "learning_rate": 7.909942615496613e-06, | |
| "loss": 4.7241, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.32635793608823754, | |
| "grad_norm": 30.40308952331543, | |
| "learning_rate": 7.869998043035442e-06, | |
| "loss": 5.3999, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.32937976882979525, | |
| "grad_norm": 17.647789001464844, | |
| "learning_rate": 7.829778403604512e-06, | |
| "loss": 5.0469, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.332401601571353, | |
| "grad_norm": 33.98617935180664, | |
| "learning_rate": 7.789287551943158e-06, | |
| "loss": 6.0896, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.3354234343129108, | |
| "grad_norm": 21.646024703979492, | |
| "learning_rate": 7.748529368784293e-06, | |
| "loss": 4.5196, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.33844526705446853, | |
| "grad_norm": 18.94881820678711, | |
| "learning_rate": 7.707507760482473e-06, | |
| "loss": 6.1607, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.3414670997960263, | |
| "grad_norm": 18.058412551879883, | |
| "learning_rate": 7.666226658639507e-06, | |
| "loss": 3.7909, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.34448893253758406, | |
| "grad_norm": 22.541349411010742, | |
| "learning_rate": 7.624690019727636e-06, | |
| "loss": 3.638, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.3475107652791418, | |
| "grad_norm": 23.882991790771484, | |
| "learning_rate": 7.58290182471034e-06, | |
| "loss": 4.53, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.3505325980206996, | |
| "grad_norm": 19.6879940032959, | |
| "learning_rate": 7.5408660786607976e-06, | |
| "loss": 3.6987, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.3535544307622573, | |
| "grad_norm": 20.6401309967041, | |
| "learning_rate": 7.498586810378019e-06, | |
| "loss": 2.9513, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.35657626350381505, | |
| "grad_norm": 22.658132553100586, | |
| "learning_rate": 7.456068072000731e-06, | |
| "loss": 2.8103, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.3595980962453728, | |
| "grad_norm": 23.935726165771484, | |
| "learning_rate": 7.4133139386190026e-06, | |
| "loss": 4.5498, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.3626199289869306, | |
| "grad_norm": 18.697385787963867, | |
| "learning_rate": 7.3703285078836796e-06, | |
| "loss": 5.2042, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.36564176172848833, | |
| "grad_norm": 17.5216064453125, | |
| "learning_rate": 7.3271158996136625e-06, | |
| "loss": 3.7229, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.3686635944700461, | |
| "grad_norm": 18.313034057617188, | |
| "learning_rate": 7.283680255401049e-06, | |
| "loss": 4.403, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.37168542721160386, | |
| "grad_norm": 19.784748077392578, | |
| "learning_rate": 7.240025738214193e-06, | |
| "loss": 6.1978, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.3747072599531616, | |
| "grad_norm": 33.28024673461914, | |
| "learning_rate": 7.196156531998718e-06, | |
| "loss": 4.4892, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.3777290926947193, | |
| "grad_norm": 20.449913024902344, | |
| "learning_rate": 7.152076841276527e-06, | |
| "loss": 3.6566, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.3807509254362771, | |
| "grad_norm": 19.441957473754883, | |
| "learning_rate": 7.1077908907428154e-06, | |
| "loss": 3.7812, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.38377275817783485, | |
| "grad_norm": 32.515724182128906, | |
| "learning_rate": 7.063302924861182e-06, | |
| "loss": 3.8969, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.3867945909193926, | |
| "grad_norm": 22.129140853881836, | |
| "learning_rate": 7.018617207456821e-06, | |
| "loss": 3.5997, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.3898164236609504, | |
| "grad_norm": 19.576011657714844, | |
| "learning_rate": 6.973738021307872e-06, | |
| "loss": 3.6646, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.39283825640250813, | |
| "grad_norm": 17.848796844482422, | |
| "learning_rate": 6.9286696677349455e-06, | |
| "loss": 5.9623, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.3958600891440659, | |
| "grad_norm": 15.815289497375488, | |
| "learning_rate": 6.883416466188881e-06, | |
| "loss": 3.6821, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.39888192188562366, | |
| "grad_norm": 17.62392807006836, | |
| "learning_rate": 6.837982753836755e-06, | |
| "loss": 2.8778, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.40190375462718136, | |
| "grad_norm": 34.39213180541992, | |
| "learning_rate": 6.7923728851461955e-06, | |
| "loss": 6.0046, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.4049255873687391, | |
| "grad_norm": 22.834793090820312, | |
| "learning_rate": 6.74659123146805e-06, | |
| "loss": 3.6498, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.4079474201102969, | |
| "grad_norm": 18.146869659423828, | |
| "learning_rate": 6.70064218061742e-06, | |
| "loss": 2.8181, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.41096925285185465, | |
| "grad_norm": 18.262357711791992, | |
| "learning_rate": 6.654530136453119e-06, | |
| "loss": 4.3635, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.4139910855934124, | |
| "grad_norm": 18.1636905670166, | |
| "learning_rate": 6.608259518455599e-06, | |
| "loss": 5.2127, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.4170129183349702, | |
| "grad_norm": 17.246234893798828, | |
| "learning_rate": 6.5618347613033875e-06, | |
| "loss": 5.1173, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.42003475107652793, | |
| "grad_norm": 19.54306983947754, | |
| "learning_rate": 6.5152603144480406e-06, | |
| "loss": 5.9817, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.4230565838180857, | |
| "grad_norm": 31.445457458496094, | |
| "learning_rate": 6.468540641687716e-06, | |
| "loss": 4.5568, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.4260784165596434, | |
| "grad_norm": 19.258493423461914, | |
| "learning_rate": 6.421680220739337e-06, | |
| "loss": 3.9311, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.42910024930120116, | |
| "grad_norm": 33.21185302734375, | |
| "learning_rate": 6.374683542809447e-06, | |
| "loss": 7.8417, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.4321220820427589, | |
| "grad_norm": 19.956239700317383, | |
| "learning_rate": 6.327555112163761e-06, | |
| "loss": 4.3582, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.4351439147843167, | |
| "grad_norm": 19.256486892700195, | |
| "learning_rate": 6.280299445695469e-06, | |
| "loss": 5.2, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.43816574752587445, | |
| "grad_norm": 20.045286178588867, | |
| "learning_rate": 6.232921072492319e-06, | |
| "loss": 4.3409, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.4411875802674322, | |
| "grad_norm": 24.16641616821289, | |
| "learning_rate": 6.185424533402543e-06, | |
| "loss": 4.3162, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.44420941300899, | |
| "grad_norm": 23.316164016723633, | |
| "learning_rate": 6.13781438059966e-06, | |
| "loss": 3.5112, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.44723124575054773, | |
| "grad_norm": 34.204627990722656, | |
| "learning_rate": 6.090095177146178e-06, | |
| "loss": 5.1696, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.45025307849210544, | |
| "grad_norm": 17.53434181213379, | |
| "learning_rate": 6.042271496556255e-06, | |
| "loss": 2.7874, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.4532749112336632, | |
| "grad_norm": 21.362934112548828, | |
| "learning_rate": 5.994347922357372e-06, | |
| "loss": 3.8133, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.45629674397522096, | |
| "grad_norm": 19.935638427734375, | |
| "learning_rate": 5.946329047651037e-06, | |
| "loss": 3.592, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.4593185767167787, | |
| "grad_norm": 17.95412826538086, | |
| "learning_rate": 5.8982194746725686e-06, | |
| "loss": 2.7345, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.4623404094583365, | |
| "grad_norm": 24.026193618774414, | |
| "learning_rate": 5.850023814350007e-06, | |
| "loss": 4.2519, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.46536224219989425, | |
| "grad_norm": 12.00658893585205, | |
| "learning_rate": 5.801746685862197e-06, | |
| "loss": 6.0717, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.468384074941452, | |
| "grad_norm": 14.519695281982422, | |
| "learning_rate": 5.753392716196069e-06, | |
| "loss": 2.8474, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.4714059076830098, | |
| "grad_norm": 15.277630805969238, | |
| "learning_rate": 5.704966539703185e-06, | |
| "loss": 3.6301, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.4744277404245675, | |
| "grad_norm": 17.934938430786133, | |
| "learning_rate": 5.656472797655571e-06, | |
| "loss": 4.4189, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.47744957316612524, | |
| "grad_norm": 17.185529708862305, | |
| "learning_rate": 5.60791613780088e-06, | |
| "loss": 2.7758, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.480471405907683, | |
| "grad_norm": 25.111557006835938, | |
| "learning_rate": 5.5593012139169525e-06, | |
| "loss": 4.296, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.48349323864924076, | |
| "grad_norm": 23.77570343017578, | |
| "learning_rate": 5.510632685365777e-06, | |
| "loss": 4.4462, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.4865150713907985, | |
| "grad_norm": 17.37128448486328, | |
| "learning_rate": 5.461915216646938e-06, | |
| "loss": 2.7426, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.4895369041323563, | |
| "grad_norm": 23.484580993652344, | |
| "learning_rate": 5.41315347695055e-06, | |
| "loss": 4.2378, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.49255873687391405, | |
| "grad_norm": 23.495826721191406, | |
| "learning_rate": 5.364352139709758e-06, | |
| "loss": 4.8879, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.4955805696154718, | |
| "grad_norm": 16.23356819152832, | |
| "learning_rate": 5.315515882152822e-06, | |
| "loss": 3.5359, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.4986024023570295, | |
| "grad_norm": 16.77799415588379, | |
| "learning_rate": 5.266649384854842e-06, | |
| "loss": 4.2516, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.5016242350985873, | |
| "grad_norm": 21.264799118041992, | |
| "learning_rate": 5.217757331289165e-06, | |
| "loss": 3.6844, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.5046460678401451, | |
| "grad_norm": 18.198184967041016, | |
| "learning_rate": 5.168844407378506e-06, | |
| "loss": 4.8485, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.5076679005817029, | |
| "grad_norm": 13.497072219848633, | |
| "learning_rate": 5.119915301045836e-06, | |
| "loss": 2.8835, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.5106897333232605, | |
| "grad_norm": 24.342716217041016, | |
| "learning_rate": 5.070974701765089e-06, | |
| "loss": 5.1527, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.5137115660648183, | |
| "grad_norm": 25.917234420776367, | |
| "learning_rate": 5.022027300111712e-06, | |
| "loss": 4.3981, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.516733398806376, | |
| "grad_norm": 15.280237197875977, | |
| "learning_rate": 4.973077787313099e-06, | |
| "loss": 4.4554, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.5197552315479338, | |
| "grad_norm": 17.290264129638672, | |
| "learning_rate": 4.924130854798983e-06, | |
| "loss": 5.1108, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.5227770642894916, | |
| "grad_norm": 15.63051700592041, | |
| "learning_rate": 4.875191193751803e-06, | |
| "loss": 2.8006, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.5257988970310493, | |
| "grad_norm": 15.663633346557617, | |
| "learning_rate": 4.826263494657077e-06, | |
| "loss": 3.4979, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.5288207297726071, | |
| "grad_norm": 35.42136001586914, | |
| "learning_rate": 4.777352446853863e-06, | |
| "loss": 4.9996, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.5318425625141648, | |
| "grad_norm": 23.063594818115234, | |
| "learning_rate": 4.72846273808533e-06, | |
| "loss": 3.509, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.5348643952557226, | |
| "grad_norm": 21.706233978271484, | |
| "learning_rate": 4.679599054049458e-06, | |
| "loss": 3.3899, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.5378862279972804, | |
| "grad_norm": 20.82579231262207, | |
| "learning_rate": 4.630766077949965e-06, | |
| "loss": 5.9861, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.5409080607388381, | |
| "grad_norm": 32.06898880004883, | |
| "learning_rate": 4.5819684900474484e-06, | |
| "loss": 4.3172, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.5439298934803959, | |
| "grad_norm": 16.330984115600586, | |
| "learning_rate": 4.5332109672108245e-06, | |
| "loss": 4.4365, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.5469517262219536, | |
| "grad_norm": 17.189834594726562, | |
| "learning_rate": 4.484498182469085e-06, | |
| "loss": 3.6319, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.5499735589635114, | |
| "grad_norm": 19.211336135864258, | |
| "learning_rate": 4.435834804563422e-06, | |
| "loss": 5.8999, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.5529953917050692, | |
| "grad_norm": 26.310638427734375, | |
| "learning_rate": 4.387225497499767e-06, | |
| "loss": 3.5792, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.5560172244466269, | |
| "grad_norm": 20.680715560913086, | |
| "learning_rate": 4.3386749201017856e-06, | |
| "loss": 3.4555, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.5590390571881846, | |
| "grad_norm": 15.533769607543945, | |
| "learning_rate": 4.290187725564356e-06, | |
| "loss": 6.0278, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.5620608899297423, | |
| "grad_norm": 13.684257507324219, | |
| "learning_rate": 4.2417685610076135e-06, | |
| "loss": 3.4758, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.5650827226713001, | |
| "grad_norm": 15.711587905883789, | |
| "learning_rate": 4.193422067031535e-06, | |
| "loss": 4.3166, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.5681045554128579, | |
| "grad_norm": 18.764991760253906, | |
| "learning_rate": 4.145152877271196e-06, | |
| "loss": 4.1625, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.5711263881544156, | |
| "grad_norm": 19.19873809814453, | |
| "learning_rate": 4.096965617952667e-06, | |
| "loss": 4.4233, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.5741482208959734, | |
| "grad_norm": 20.817365646362305, | |
| "learning_rate": 4.048864907449619e-06, | |
| "loss": 3.5268, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.5771700536375312, | |
| "grad_norm": 18.440645217895508, | |
| "learning_rate": 4.000855355840695e-06, | |
| "loss": 3.5747, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.5801918863790889, | |
| "grad_norm": 15.997143745422363, | |
| "learning_rate": 3.952941564467665e-06, | |
| "loss": 4.2257, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.5832137191206467, | |
| "grad_norm": 20.629562377929688, | |
| "learning_rate": 3.905128125494427e-06, | |
| "loss": 4.3136, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.5862355518622044, | |
| "grad_norm": 33.730995178222656, | |
| "learning_rate": 3.8574196214668876e-06, | |
| "loss": 4.509, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.5892573846037622, | |
| "grad_norm": 30.045576095581055, | |
| "learning_rate": 3.8098206248737486e-06, | |
| "loss": 5.139, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.59227921734532, | |
| "grad_norm": 23.693470001220703, | |
| "learning_rate": 3.7623356977082794e-06, | |
| "loss": 2.5913, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.5953010500868777, | |
| "grad_norm": 18.655092239379883, | |
| "learning_rate": 3.714969391031084e-06, | |
| "loss": 4.3328, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.5983228828284355, | |
| "grad_norm": 15.45345687866211, | |
| "learning_rate": 3.6677262445339136e-06, | |
| "loss": 3.5691, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.6013447155699932, | |
| "grad_norm": 21.302995681762695, | |
| "learning_rate": 3.6206107861045803e-06, | |
| "loss": 2.5934, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.604366548311551, | |
| "grad_norm": 13.75935173034668, | |
| "learning_rate": 3.5736275313929826e-06, | |
| "loss": 4.3405, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.6073883810531087, | |
| "grad_norm": 17.593429565429688, | |
| "learning_rate": 3.5267809833783213e-06, | |
| "loss": 4.8443, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.6104102137946664, | |
| "grad_norm": 23.467853546142578, | |
| "learning_rate": 3.4800756319375326e-06, | |
| "loss": 3.4879, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.6134320465362242, | |
| "grad_norm": 25.12725830078125, | |
| "learning_rate": 3.433515953414953e-06, | |
| "loss": 2.7966, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.616453879277782, | |
| "grad_norm": 33.0245475769043, | |
| "learning_rate": 3.387106410193308e-06, | |
| "loss": 5.8078, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.6194757120193397, | |
| "grad_norm": 18.8001651763916, | |
| "learning_rate": 3.3408514502660195e-06, | |
| "loss": 5.2049, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.6224975447608975, | |
| "grad_norm": 16.787553787231445, | |
| "learning_rate": 3.2947555068109057e-06, | |
| "loss": 3.3988, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.6255193775024552, | |
| "grad_norm": 21.532262802124023, | |
| "learning_rate": 3.248822997765295e-06, | |
| "loss": 2.815, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.628541210244013, | |
| "grad_norm": 24.630603790283203, | |
| "learning_rate": 3.203058325402599e-06, | |
| "loss": 4.3332, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.6315630429855708, | |
| "grad_norm": 16.667922973632812, | |
| "learning_rate": 3.1574658759103904e-06, | |
| "loss": 4.3038, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.6345848757271285, | |
| "grad_norm": 20.671772003173828, | |
| "learning_rate": 3.1120500189700204e-06, | |
| "loss": 3.4132, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.6376067084686863, | |
| "grad_norm": 21.932987213134766, | |
| "learning_rate": 3.066815107337815e-06, | |
| "loss": 4.1988, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.640628541210244, | |
| "grad_norm": 17.348411560058594, | |
| "learning_rate": 3.0217654764279114e-06, | |
| "loss": 3.5937, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.6436503739518018, | |
| "grad_norm": 25.625871658325195, | |
| "learning_rate": 2.9769054438967192e-06, | |
| "loss": 5.9817, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.6466722066933596, | |
| "grad_norm": 31.0660457611084, | |
| "learning_rate": 2.9322393092291256e-06, | |
| "loss": 5.6772, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.6496940394349173, | |
| "grad_norm": 20.511960983276367, | |
| "learning_rate": 2.887771353326422e-06, | |
| "loss": 4.2915, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.6527158721764751, | |
| "grad_norm": 17.798234939575195, | |
| "learning_rate": 2.8435058380959957e-06, | |
| "loss": 2.642, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.6557377049180327, | |
| "grad_norm": 18.133886337280273, | |
| "learning_rate": 2.7994470060428835e-06, | |
| "loss": 4.1208, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.6587595376595905, | |
| "grad_norm": 18.74016571044922, | |
| "learning_rate": 2.7555990798631436e-06, | |
| "loss": 4.8817, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.6617813704011483, | |
| "grad_norm": 15.885804176330566, | |
| "learning_rate": 2.711966262039145e-06, | |
| "loss": 3.3242, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.664803203142706, | |
| "grad_norm": 24.100414276123047, | |
| "learning_rate": 2.668552734436802e-06, | |
| "loss": 4.3377, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.6678250358842638, | |
| "grad_norm": 17.113306045532227, | |
| "learning_rate": 2.6253626579047653e-06, | |
| "loss": 5.7855, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.6708468686258215, | |
| "grad_norm": 33.268699645996094, | |
| "learning_rate": 2.582400171875638e-06, | |
| "loss": 3.4326, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.6738687013673793, | |
| "grad_norm": 29.673768997192383, | |
| "learning_rate": 2.5396693939692474e-06, | |
| "loss": 4.8596, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.6768905341089371, | |
| "grad_norm": 14.550185203552246, | |
| "learning_rate": 2.4971744195979985e-06, | |
| "loss": 5.1031, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.6799123668504948, | |
| "grad_norm": 32.16508102416992, | |
| "learning_rate": 2.4549193215743706e-06, | |
| "loss": 5.833, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.6829341995920526, | |
| "grad_norm": 18.873088836669922, | |
| "learning_rate": 2.4129081497205536e-06, | |
| "loss": 3.3544, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.6859560323336104, | |
| "grad_norm": 31.875137329101562, | |
| "learning_rate": 2.3711449304803174e-06, | |
| "loss": 4.0864, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.6889778650751681, | |
| "grad_norm": 27.996572494506836, | |
| "learning_rate": 2.329633666533103e-06, | |
| "loss": 4.0582, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.6919996978167259, | |
| "grad_norm": 19.299062728881836, | |
| "learning_rate": 2.288378336410398e-06, | |
| "loss": 4.2188, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.6950215305582836, | |
| "grad_norm": 21.146148681640625, | |
| "learning_rate": 2.2473828941144277e-06, | |
| "loss": 4.8756, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.6980433632998414, | |
| "grad_norm": 28.3226261138916, | |
| "learning_rate": 2.20665126873919e-06, | |
| "loss": 3.3593, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.7010651960413992, | |
| "grad_norm": 16.02470588684082, | |
| "learning_rate": 2.1661873640938818e-06, | |
| "loss": 4.1255, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.7040870287829568, | |
| "grad_norm": 21.263837814331055, | |
| "learning_rate": 2.1259950583287633e-06, | |
| "loss": 4.145, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.7071088615245146, | |
| "grad_norm": 22.879661560058594, | |
| "learning_rate": 2.086078203563439e-06, | |
| "loss": 4.7453, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.7101306942660723, | |
| "grad_norm": 15.726652145385742, | |
| "learning_rate": 2.0464406255176967e-06, | |
| "loss": 4.019, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.7131525270076301, | |
| "grad_norm": 30.606904983520508, | |
| "learning_rate": 2.0070861231448142e-06, | |
| "loss": 4.9014, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.7161743597491879, | |
| "grad_norm": 17.185054779052734, | |
| "learning_rate": 1.968018468267472e-06, | |
| "loss": 4.1918, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.7191961924907456, | |
| "grad_norm": 15.510167121887207, | |
| "learning_rate": 1.929241405216254e-06, | |
| "loss": 4.0934, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.7222180252323034, | |
| "grad_norm": 20.12055206298828, | |
| "learning_rate": 1.8907586504707776e-06, | |
| "loss": 4.701, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.7252398579738611, | |
| "grad_norm": 19.135282516479492, | |
| "learning_rate": 1.8525738923035002e-06, | |
| "loss": 2.5439, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.7282616907154189, | |
| "grad_norm": 19.167003631591797, | |
| "learning_rate": 1.8146907904262268e-06, | |
| "loss": 4.2791, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.7312835234569767, | |
| "grad_norm": 24.79986572265625, | |
| "learning_rate": 1.7771129756393545e-06, | |
| "loss": 3.4256, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.7343053561985344, | |
| "grad_norm": 20.59393310546875, | |
| "learning_rate": 1.7398440494838947e-06, | |
| "loss": 3.5206, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.7373271889400922, | |
| "grad_norm": 25.903627395629883, | |
| "learning_rate": 1.7028875838962822e-06, | |
| "loss": 4.1281, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.74034902168165, | |
| "grad_norm": 35.45489501953125, | |
| "learning_rate": 1.6662471208660392e-06, | |
| "loss": 4.0468, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.7433708544232077, | |
| "grad_norm": 20.3117618560791, | |
| "learning_rate": 1.6299261720963095e-06, | |
| "loss": 4.1749, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.7463926871647655, | |
| "grad_norm": 15.878867149353027, | |
| "learning_rate": 1.5939282186672705e-06, | |
| "loss": 4.8916, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.7494145199063232, | |
| "grad_norm": 19.15277099609375, | |
| "learning_rate": 1.5582567107025237e-06, | |
| "loss": 4.8288, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.7524363526478809, | |
| "grad_norm": 29.44374656677246, | |
| "learning_rate": 1.5229150670384057e-06, | |
| "loss": 3.3806, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.7554581853894387, | |
| "grad_norm": 23.206140518188477, | |
| "learning_rate": 1.4879066748963295e-06, | |
| "loss": 2.5563, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.7584800181309964, | |
| "grad_norm": 27.133193969726562, | |
| "learning_rate": 1.4532348895581466e-06, | |
| "loss": 3.4434, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.7615018508725542, | |
| "grad_norm": 29.599319458007812, | |
| "learning_rate": 1.4189030340445648e-06, | |
| "loss": 6.7087, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.7645236836141119, | |
| "grad_norm": 17.123348236083984, | |
| "learning_rate": 1.3849143987966646e-06, | |
| "loss": 4.9595, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.7675455163556697, | |
| "grad_norm": 16.49233627319336, | |
| "learning_rate": 1.3512722413605356e-06, | |
| "loss": 4.0857, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.7705673490972275, | |
| "grad_norm": 16.6666316986084, | |
| "learning_rate": 1.3179797860750654e-06, | |
| "loss": 4.8943, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.7735891818387852, | |
| "grad_norm": 19.440494537353516, | |
| "learning_rate": 1.2850402237629184e-06, | |
| "loss": 4.1448, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.776611014580343, | |
| "grad_norm": 14.674943923950195, | |
| "learning_rate": 1.2524567114247083e-06, | |
| "loss": 3.3491, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.7796328473219007, | |
| "grad_norm": 16.349637985229492, | |
| "learning_rate": 1.2202323719364324e-06, | |
| "loss": 3.2897, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.7826546800634585, | |
| "grad_norm": 19.67890739440918, | |
| "learning_rate": 1.1883702937501708e-06, | |
| "loss": 4.0901, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.7856765128050163, | |
| "grad_norm": 21.339618682861328, | |
| "learning_rate": 1.1568735305980694e-06, | |
| "loss": 4.1003, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.788698345546574, | |
| "grad_norm": 21.269119262695312, | |
| "learning_rate": 1.1257451011996807e-06, | |
| "loss": 3.4165, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.7917201782881318, | |
| "grad_norm": 33.041419982910156, | |
| "learning_rate": 1.0949879889726295e-06, | |
| "loss": 3.4622, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.7947420110296896, | |
| "grad_norm": 28.960115432739258, | |
| "learning_rate": 1.0646051417466801e-06, | |
| "loss": 3.4136, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.7977638437712473, | |
| "grad_norm": 24.76239013671875, | |
| "learning_rate": 1.0345994714812135e-06, | |
| "loss": 4.1335, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.800785676512805, | |
| "grad_norm": 15.773963928222656, | |
| "learning_rate": 1.0049738539861332e-06, | |
| "loss": 3.2818, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.8038075092543627, | |
| "grad_norm": 21.248395919799805, | |
| "learning_rate": 9.757311286462428e-07, | |
| "loss": 4.1348, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.8068293419959205, | |
| "grad_norm": 23.75290298461914, | |
| "learning_rate": 9.468740981491143e-07, | |
| "loss": 4.1947, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.8098511747374783, | |
| "grad_norm": 16.7280330657959, | |
| "learning_rate": 9.1840552821647e-07, | |
| "loss": 4.0364, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.812873007479036, | |
| "grad_norm": 17.696247100830078, | |
| "learning_rate": 8.903281473391152e-07, | |
| "loss": 3.3641, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.8158948402205938, | |
| "grad_norm": 16.840299606323242, | |
| "learning_rate": 8.62644646515427e-07, | |
| "loss": 5.7446, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.8189166729621515, | |
| "grad_norm": 13.25534725189209, | |
| "learning_rate": 8.353576789934436e-07, | |
| "loss": 3.3763, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.8219385057037093, | |
| "grad_norm": 19.88932991027832, | |
| "learning_rate": 8.084698600165797e-07, | |
| "loss": 3.5133, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.8249603384452671, | |
| "grad_norm": 17.921199798583984, | |
| "learning_rate": 7.819837665729596e-07, | |
| "loss": 4.1018, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.8279821711868248, | |
| "grad_norm": 29.57664680480957, | |
| "learning_rate": 7.559019371484521e-07, | |
| "loss": 3.3378, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.8310040039283826, | |
| "grad_norm": 17.720863342285156, | |
| "learning_rate": 7.302268714833622e-07, | |
| "loss": 4.1487, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.8340258366699403, | |
| "grad_norm": 17.34684944152832, | |
| "learning_rate": 7.049610303328541e-07, | |
| "loss": 3.5199, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.8370476694114981, | |
| "grad_norm": 16.739910125732422, | |
| "learning_rate": 6.80106835231113e-07, | |
| "loss": 4.2899, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.8400695021530559, | |
| "grad_norm": 17.1294002532959, | |
| "learning_rate": 6.556666682592494e-07, | |
| "loss": 3.3016, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.8430913348946136, | |
| "grad_norm": 14.801079750061035, | |
| "learning_rate": 6.316428718170037e-07, | |
| "loss": 2.4169, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.8461131676361714, | |
| "grad_norm": 19.354856491088867, | |
| "learning_rate": 6.080377483982425e-07, | |
| "loss": 3.2883, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.849135000377729, | |
| "grad_norm": 17.925838470458984, | |
| "learning_rate": 5.848535603702798e-07, | |
| "loss": 3.3497, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.8521568331192868, | |
| "grad_norm": 20.340959548950195, | |
| "learning_rate": 5.62092529757054e-07, | |
| "loss": 6.4132, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.8551786658608446, | |
| "grad_norm": 21.507797241210938, | |
| "learning_rate": 5.397568380261559e-07, | |
| "loss": 2.3404, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.8582004986024023, | |
| "grad_norm": 16.9514102935791, | |
| "learning_rate": 5.178486258797555e-07, | |
| "loss": 4.0876, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.8612223313439601, | |
| "grad_norm": 14.505171775817871, | |
| "learning_rate": 4.963699930494365e-07, | |
| "loss": 3.3715, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.8642441640855179, | |
| "grad_norm": 22.551313400268555, | |
| "learning_rate": 4.75322998094942e-07, | |
| "loss": 4.2347, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.8672659968270756, | |
| "grad_norm": 20.145078659057617, | |
| "learning_rate": 4.5470965820689384e-07, | |
| "loss": 2.5903, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.8702878295686334, | |
| "grad_norm": 17.447914123535156, | |
| "learning_rate": 4.345319490134453e-07, | |
| "loss": 3.0177, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.8733096623101911, | |
| "grad_norm": 16.10365104675293, | |
| "learning_rate": 4.147918043909405e-07, | |
| "loss": 4.764, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.8763314950517489, | |
| "grad_norm": 19.066129684448242, | |
| "learning_rate": 3.9549111627856794e-07, | |
| "loss": 4.7699, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.8793533277933067, | |
| "grad_norm": 19.604887008666992, | |
| "learning_rate": 3.766317344970288e-07, | |
| "loss": 4.1165, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.8823751605348644, | |
| "grad_norm": 17.465734481811523, | |
| "learning_rate": 3.582154665712473e-07, | |
| "loss": 2.4443, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.8853969932764222, | |
| "grad_norm": 22.400236129760742, | |
| "learning_rate": 3.402440775571364e-07, | |
| "loss": 4.0664, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.88841882601798, | |
| "grad_norm": 21.420312881469727, | |
| "learning_rate": 3.227192898724252e-07, | |
| "loss": 5.7203, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.8914406587595377, | |
| "grad_norm": 23.331478118896484, | |
| "learning_rate": 3.056427831315878e-07, | |
| "loss": 3.367, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.8944624915010955, | |
| "grad_norm": 21.29648208618164, | |
| "learning_rate": 2.890161939848535e-07, | |
| "loss": 4.1604, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.8974843242426531, | |
| "grad_norm": 15.172201156616211, | |
| "learning_rate": 2.72841115961357e-07, | |
| "loss": 4.2335, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.9005061569842109, | |
| "grad_norm": 16.736038208007812, | |
| "learning_rate": 2.5711909931640633e-07, | |
| "loss": 3.9793, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.9035279897257686, | |
| "grad_norm": 22.6779727935791, | |
| "learning_rate": 2.418516508829e-07, | |
| "loss": 2.4922, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.9065498224673264, | |
| "grad_norm": 32.2912712097168, | |
| "learning_rate": 2.270402339269162e-07, | |
| "loss": 5.6454, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.9095716552088842, | |
| "grad_norm": 18.107574462890625, | |
| "learning_rate": 2.126862680074643e-07, | |
| "loss": 5.0056, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.9125934879504419, | |
| "grad_norm": 32.63033676147461, | |
| "learning_rate": 1.9879112884043317e-07, | |
| "loss": 2.5369, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.9156153206919997, | |
| "grad_norm": 18.089956283569336, | |
| "learning_rate": 1.853561481667404e-07, | |
| "loss": 2.4556, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.9186371534335575, | |
| "grad_norm": 13.772138595581055, | |
| "learning_rate": 1.7238261362469256e-07, | |
| "loss": 3.0884, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.9216589861751152, | |
| "grad_norm": 22.537776947021484, | |
| "learning_rate": 1.5987176862657883e-07, | |
| "loss": 3.2805, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.924680818916673, | |
| "grad_norm": 30.13243865966797, | |
| "learning_rate": 1.4782481223949597e-07, | |
| "loss": 3.2507, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.9277026516582307, | |
| "grad_norm": 20.858510971069336, | |
| "learning_rate": 1.3624289907042787e-07, | |
| "loss": 4.1981, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.9307244843997885, | |
| "grad_norm": 30.669658660888672, | |
| "learning_rate": 1.2512713915559027e-07, | |
| "loss": 4.9341, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.9337463171413463, | |
| "grad_norm": 32.03891372680664, | |
| "learning_rate": 1.1447859785403359e-07, | |
| "loss": 4.8266, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.936768149882904, | |
| "grad_norm": 18.382429122924805, | |
| "learning_rate": 1.0429829574554573e-07, | |
| "loss": 3.4044, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.9397899826244618, | |
| "grad_norm": 16.341550827026367, | |
| "learning_rate": 9.458720853282977e-08, | |
| "loss": 4.1438, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.9428118153660195, | |
| "grad_norm": 32.575286865234375, | |
| "learning_rate": 8.534626694799485e-08, | |
| "loss": 5.6917, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.9458336481075772, | |
| "grad_norm": 19.515989303588867, | |
| "learning_rate": 7.657635666335317e-08, | |
| "loss": 2.5437, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.948855480849135, | |
| "grad_norm": 18.81734275817871, | |
| "learning_rate": 6.827831820653163e-08, | |
| "loss": 2.5297, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.9518773135906927, | |
| "grad_norm": 20.44892120361328, | |
| "learning_rate": 6.045294687991643e-08, | |
| "loss": 5.3046, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.9548991463322505, | |
| "grad_norm": 30.222261428833008, | |
| "learning_rate": 5.310099268443114e-08, | |
| "loss": 7.1585, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.9579209790738082, | |
| "grad_norm": 22.93487548828125, | |
| "learning_rate": 4.622316024765039e-08, | |
| "loss": 3.9296, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.960942811815366, | |
| "grad_norm": 20.129398345947266, | |
| "learning_rate": 3.982010875626885e-08, | |
| "loss": 3.2971, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.9639646445569238, | |
| "grad_norm": 20.64815330505371, | |
| "learning_rate": 3.389245189292622e-08, | |
| "loss": 4.1501, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.9669864772984815, | |
| "grad_norm": 19.435129165649414, | |
| "learning_rate": 2.8440757777385976e-08, | |
| "loss": 4.9552, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.9700083100400393, | |
| "grad_norm": 17.719867706298828, | |
| "learning_rate": 2.3465548912088298e-08, | |
| "loss": 2.6329, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.973030142781597, | |
| "grad_norm": 21.178937911987305, | |
| "learning_rate": 1.896730213207132e-08, | |
| "loss": 4.0836, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.9760519755231548, | |
| "grad_norm": 16.906330108642578, | |
| "learning_rate": 1.4946448559270964e-08, | |
| "loss": 2.397, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.9790738082647126, | |
| "grad_norm": 23.301292419433594, | |
| "learning_rate": 1.1403373561199583e-08, | |
| "loss": 4.2365, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.9820956410062703, | |
| "grad_norm": 20.07245635986328, | |
| "learning_rate": 8.338416714013254e-09, | |
| "loss": 3.444, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.9851174737478281, | |
| "grad_norm": 16.27911949157715, | |
| "learning_rate": 5.751871769965056e-09, | |
| "loss": 5.5038, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.9881393064893859, | |
| "grad_norm": 21.404827117919922, | |
| "learning_rate": 3.643986629253138e-09, | |
| "loss": 4.1734, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.9911611392309436, | |
| "grad_norm": 32.63972473144531, | |
| "learning_rate": 2.014963316257501e-09, | |
| "loss": 4.9837, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.9941829719725013, | |
| "grad_norm": 19.831165313720703, | |
| "learning_rate": 8.649579601810454e-10, | |
| "loss": 4.9867, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.997204804714059, | |
| "grad_norm": 21.53673553466797, | |
| "learning_rate": 1.9408078008431587e-10, | |
| "loss": 3.3738, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.9996222709073053, | |
| "step": 3308, | |
| "total_flos": 1.175877708593234e+19, | |
| "train_loss": 4.4771003486744005, | |
| "train_runtime": 52781.7624, | |
| "train_samples_per_second": 4.013, | |
| "train_steps_per_second": 0.063 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3309, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.175877708593234e+19, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |