| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.7920341394025604, | |
| "eval_steps": 348, | |
| "global_step": 1392, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0005689900426742532, | |
| "grad_norm": 0.922553300857544, | |
| "learning_rate": 2e-05, | |
| "loss": 1.7225, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0005689900426742532, | |
| "eval_loss": 1.6560131311416626, | |
| "eval_runtime": 17.2854, | |
| "eval_samples_per_second": 42.811, | |
| "eval_steps_per_second": 21.405, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0011379800853485065, | |
| "grad_norm": 1.0872293710708618, | |
| "learning_rate": 4e-05, | |
| "loss": 1.7777, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0017069701280227596, | |
| "grad_norm": 1.0032234191894531, | |
| "learning_rate": 6e-05, | |
| "loss": 1.6594, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.002275960170697013, | |
| "grad_norm": 0.9296952486038208, | |
| "learning_rate": 8e-05, | |
| "loss": 1.6329, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.002844950213371266, | |
| "grad_norm": 0.8549262881278992, | |
| "learning_rate": 0.0001, | |
| "loss": 1.6946, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0034139402560455193, | |
| "grad_norm": 0.7175059914588928, | |
| "learning_rate": 0.00012, | |
| "loss": 1.605, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.003982930298719772, | |
| "grad_norm": 0.729087233543396, | |
| "learning_rate": 0.00014, | |
| "loss": 1.7539, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.004551920341394026, | |
| "grad_norm": 0.7559539675712585, | |
| "learning_rate": 0.00016, | |
| "loss": 1.7079, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.005120910384068279, | |
| "grad_norm": 0.9097371101379395, | |
| "learning_rate": 0.00018, | |
| "loss": 1.4693, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.005689900426742532, | |
| "grad_norm": 0.7562863230705261, | |
| "learning_rate": 0.0002, | |
| "loss": 1.7192, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.006258890469416785, | |
| "grad_norm": 0.8033550381660461, | |
| "learning_rate": 0.00019999974162322295, | |
| "loss": 1.6699, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0068278805120910386, | |
| "grad_norm": 0.6270872950553894, | |
| "learning_rate": 0.00019999896649422697, | |
| "loss": 1.7042, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.007396870554765292, | |
| "grad_norm": 0.6003552079200745, | |
| "learning_rate": 0.00019999767461701748, | |
| "loss": 1.672, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.007965860597439544, | |
| "grad_norm": 0.5751997232437134, | |
| "learning_rate": 0.00019999586599827042, | |
| "loss": 1.5727, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.008534850640113799, | |
| "grad_norm": 0.5488961338996887, | |
| "learning_rate": 0.00019999354064733184, | |
| "loss": 1.6477, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.009103840682788052, | |
| "grad_norm": 0.4690549671649933, | |
| "learning_rate": 0.00019999069857621807, | |
| "loss": 1.4063, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.009672830725462305, | |
| "grad_norm": 0.5245763659477234, | |
| "learning_rate": 0.00019998733979961563, | |
| "loss": 1.649, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.010241820768136558, | |
| "grad_norm": 0.4962601661682129, | |
| "learning_rate": 0.0001999834643348811, | |
| "loss": 1.5558, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.010810810810810811, | |
| "grad_norm": 0.5009298324584961, | |
| "learning_rate": 0.0001999790722020411, | |
| "loss": 1.6178, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.011379800853485065, | |
| "grad_norm": 0.5524196028709412, | |
| "learning_rate": 0.00019997416342379208, | |
| "loss": 1.6133, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.011948790896159318, | |
| "grad_norm": 0.48095259070396423, | |
| "learning_rate": 0.00019996873802550043, | |
| "loss": 1.4158, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.01251778093883357, | |
| "grad_norm": 0.5575169324874878, | |
| "learning_rate": 0.00019996279603520196, | |
| "loss": 1.7057, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.013086770981507824, | |
| "grad_norm": 0.5423071384429932, | |
| "learning_rate": 0.00019995633748360223, | |
| "loss": 1.5661, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.013655761024182077, | |
| "grad_norm": 0.49561819434165955, | |
| "learning_rate": 0.00019994936240407598, | |
| "loss": 1.4119, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.01422475106685633, | |
| "grad_norm": 0.4862682521343231, | |
| "learning_rate": 0.00019994187083266716, | |
| "loss": 1.519, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.014793741109530583, | |
| "grad_norm": 0.5174720883369446, | |
| "learning_rate": 0.0001999338628080888, | |
| "loss": 1.3668, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.015362731152204837, | |
| "grad_norm": 0.5306721329689026, | |
| "learning_rate": 0.0001999253383717226, | |
| "loss": 1.6097, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.015931721194879088, | |
| "grad_norm": 0.5307742357254028, | |
| "learning_rate": 0.00019991629756761886, | |
| "loss": 1.7738, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.016500711237553343, | |
| "grad_norm": 0.6086705327033997, | |
| "learning_rate": 0.00019990674044249634, | |
| "loss": 1.7079, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.017069701280227598, | |
| "grad_norm": 0.5047173500061035, | |
| "learning_rate": 0.00019989666704574175, | |
| "loss": 1.6998, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01763869132290185, | |
| "grad_norm": 0.5041013360023499, | |
| "learning_rate": 0.00019988607742940978, | |
| "loss": 1.7047, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.018207681365576104, | |
| "grad_norm": 0.4694116413593292, | |
| "learning_rate": 0.00019987497164822263, | |
| "loss": 1.3058, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.018776671408250355, | |
| "grad_norm": 0.5069786310195923, | |
| "learning_rate": 0.0001998633497595698, | |
| "loss": 1.6603, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.01934566145092461, | |
| "grad_norm": 0.4877070486545563, | |
| "learning_rate": 0.0001998512118235078, | |
| "loss": 1.5145, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.01991465149359886, | |
| "grad_norm": 0.5028818845748901, | |
| "learning_rate": 0.0001998385579027599, | |
| "loss": 1.5016, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.020483641536273117, | |
| "grad_norm": 0.4918319880962372, | |
| "learning_rate": 0.00019982538806271566, | |
| "loss": 1.5468, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.021052631578947368, | |
| "grad_norm": 0.5177620649337769, | |
| "learning_rate": 0.00019981170237143067, | |
| "loss": 1.5555, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.021621621621621623, | |
| "grad_norm": 0.49115803837776184, | |
| "learning_rate": 0.00019979750089962629, | |
| "loss": 1.592, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.022190611664295874, | |
| "grad_norm": 0.5621944069862366, | |
| "learning_rate": 0.00019978278372068906, | |
| "loss": 1.6697, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.02275960170697013, | |
| "grad_norm": 0.49260076880455017, | |
| "learning_rate": 0.00019976755091067054, | |
| "loss": 1.4688, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.02332859174964438, | |
| "grad_norm": 0.4910222589969635, | |
| "learning_rate": 0.00019975180254828688, | |
| "loss": 1.462, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.023897581792318635, | |
| "grad_norm": 0.5017576217651367, | |
| "learning_rate": 0.0001997355387149182, | |
| "loss": 1.6558, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.024466571834992887, | |
| "grad_norm": 0.5089415907859802, | |
| "learning_rate": 0.00019971875949460852, | |
| "loss": 1.6412, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.02503556187766714, | |
| "grad_norm": 0.4794662594795227, | |
| "learning_rate": 0.00019970146497406505, | |
| "loss": 1.6011, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.025604551920341393, | |
| "grad_norm": 0.5046934485435486, | |
| "learning_rate": 0.00019968365524265777, | |
| "loss": 1.6675, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.026173541963015648, | |
| "grad_norm": 0.4993690550327301, | |
| "learning_rate": 0.0001996653303924192, | |
| "loss": 1.6735, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.0267425320056899, | |
| "grad_norm": 0.48856502771377563, | |
| "learning_rate": 0.00019964649051804355, | |
| "loss": 1.5536, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.027311522048364154, | |
| "grad_norm": 0.4920005202293396, | |
| "learning_rate": 0.0001996271357168866, | |
| "loss": 1.6204, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.027880512091038406, | |
| "grad_norm": 0.5342410802841187, | |
| "learning_rate": 0.00019960726608896502, | |
| "loss": 1.719, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.02844950213371266, | |
| "grad_norm": 0.5041580200195312, | |
| "learning_rate": 0.00019958688173695572, | |
| "loss": 1.7053, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.029018492176386912, | |
| "grad_norm": 0.5237680077552795, | |
| "learning_rate": 0.00019956598276619562, | |
| "loss": 1.5091, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.029587482219061167, | |
| "grad_norm": 0.4911646246910095, | |
| "learning_rate": 0.0001995445692846809, | |
| "loss": 1.6085, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.030156472261735418, | |
| "grad_norm": 0.520005464553833, | |
| "learning_rate": 0.00019952264140306645, | |
| "loss": 1.4782, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.030725462304409673, | |
| "grad_norm": 0.49788954854011536, | |
| "learning_rate": 0.0001995001992346654, | |
| "loss": 1.4905, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.031294452347083924, | |
| "grad_norm": 0.5043379664421082, | |
| "learning_rate": 0.00019947724289544845, | |
| "loss": 1.6566, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.031863442389758176, | |
| "grad_norm": 0.5547715425491333, | |
| "learning_rate": 0.00019945377250404328, | |
| "loss": 1.7227, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.032432432432432434, | |
| "grad_norm": 0.5288915634155273, | |
| "learning_rate": 0.000199429788181734, | |
| "loss": 1.5921, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.033001422475106686, | |
| "grad_norm": 0.5353677868843079, | |
| "learning_rate": 0.00019940529005246048, | |
| "loss": 1.5371, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.03357041251778094, | |
| "grad_norm": 0.520143449306488, | |
| "learning_rate": 0.00019938027824281757, | |
| "loss": 1.6308, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.034139402560455195, | |
| "grad_norm": 0.50368732213974, | |
| "learning_rate": 0.0001993547528820548, | |
| "loss": 1.4645, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03470839260312945, | |
| "grad_norm": 0.5326752066612244, | |
| "learning_rate": 0.0001993287141020753, | |
| "loss": 1.5832, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.0352773826458037, | |
| "grad_norm": 0.48568812012672424, | |
| "learning_rate": 0.00019930216203743544, | |
| "loss": 1.4137, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.03584637268847795, | |
| "grad_norm": 0.4832801818847656, | |
| "learning_rate": 0.0001992750968253439, | |
| "loss": 1.4713, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.03641536273115221, | |
| "grad_norm": 0.49059394001960754, | |
| "learning_rate": 0.00019924751860566118, | |
| "loss": 1.6009, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.03698435277382646, | |
| "grad_norm": 0.5292865633964539, | |
| "learning_rate": 0.0001992194275208987, | |
| "loss": 1.6339, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.03755334281650071, | |
| "grad_norm": 0.520621120929718, | |
| "learning_rate": 0.00019919082371621811, | |
| "loss": 1.7033, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.03812233285917496, | |
| "grad_norm": 0.5552493929862976, | |
| "learning_rate": 0.0001991617073394306, | |
| "loss": 1.5704, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.03869132290184922, | |
| "grad_norm": 0.5199451446533203, | |
| "learning_rate": 0.0001991320785409961, | |
| "loss": 1.6266, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.03926031294452347, | |
| "grad_norm": 0.540593147277832, | |
| "learning_rate": 0.0001991019374740225, | |
| "loss": 1.7327, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.03982930298719772, | |
| "grad_norm": 0.5305120348930359, | |
| "learning_rate": 0.00019907128429426477, | |
| "loss": 1.6544, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.040398293029871975, | |
| "grad_norm": 0.5247764587402344, | |
| "learning_rate": 0.00019904011916012433, | |
| "loss": 1.429, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.04096728307254623, | |
| "grad_norm": 0.500156819820404, | |
| "learning_rate": 0.00019900844223264813, | |
| "loss": 1.6106, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.041536273115220484, | |
| "grad_norm": 0.49794986844062805, | |
| "learning_rate": 0.00019897625367552784, | |
| "loss": 1.5322, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.042105263157894736, | |
| "grad_norm": 0.5475789308547974, | |
| "learning_rate": 0.00019894355365509894, | |
| "loss": 1.4882, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.04267425320056899, | |
| "grad_norm": 0.5272343158721924, | |
| "learning_rate": 0.00019891034234033995, | |
| "loss": 1.5119, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.043243243243243246, | |
| "grad_norm": 0.4892237186431885, | |
| "learning_rate": 0.00019887661990287153, | |
| "loss": 1.5567, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.0438122332859175, | |
| "grad_norm": 0.528414249420166, | |
| "learning_rate": 0.00019884238651695556, | |
| "loss": 1.7716, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.04438122332859175, | |
| "grad_norm": 0.5159140229225159, | |
| "learning_rate": 0.00019880764235949427, | |
| "loss": 1.6873, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.044950213371266, | |
| "grad_norm": 0.5157197713851929, | |
| "learning_rate": 0.0001987723876100294, | |
| "loss": 1.5196, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.04551920341394026, | |
| "grad_norm": 0.518205463886261, | |
| "learning_rate": 0.00019873662245074102, | |
| "loss": 1.5238, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04608819345661451, | |
| "grad_norm": 0.5316376090049744, | |
| "learning_rate": 0.00019870034706644693, | |
| "loss": 1.4913, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.04665718349928876, | |
| "grad_norm": 0.5020834803581238, | |
| "learning_rate": 0.00019866356164460145, | |
| "loss": 1.4051, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.04722617354196301, | |
| "grad_norm": 0.4912559986114502, | |
| "learning_rate": 0.00019862626637529455, | |
| "loss": 1.4947, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.04779516358463727, | |
| "grad_norm": 0.5261936187744141, | |
| "learning_rate": 0.00019858846145125086, | |
| "loss": 1.659, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.04836415362731152, | |
| "grad_norm": 0.5002409815788269, | |
| "learning_rate": 0.00019855014706782867, | |
| "loss": 1.4743, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.048933143669985774, | |
| "grad_norm": 0.5293824672698975, | |
| "learning_rate": 0.0001985113234230189, | |
| "loss": 1.5796, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.049502133712660025, | |
| "grad_norm": 0.49084582924842834, | |
| "learning_rate": 0.00019847199071744415, | |
| "loss": 1.6052, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.05007112375533428, | |
| "grad_norm": 0.5251219868659973, | |
| "learning_rate": 0.00019843214915435758, | |
| "loss": 1.7684, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.050640113798008535, | |
| "grad_norm": 0.5003427267074585, | |
| "learning_rate": 0.0001983917989396418, | |
| "loss": 1.5715, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.051209103840682786, | |
| "grad_norm": 0.5283729434013367, | |
| "learning_rate": 0.0001983509402818081, | |
| "loss": 1.5396, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.051778093883357044, | |
| "grad_norm": 0.49652016162872314, | |
| "learning_rate": 0.00019830957339199494, | |
| "loss": 1.5353, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.052347083926031296, | |
| "grad_norm": 0.49297675490379333, | |
| "learning_rate": 0.00019826769848396727, | |
| "loss": 1.5012, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.05291607396870555, | |
| "grad_norm": 0.5100125670433044, | |
| "learning_rate": 0.0001982253157741151, | |
| "loss": 1.6194, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.0534850640113798, | |
| "grad_norm": 0.5218221545219421, | |
| "learning_rate": 0.00019818242548145265, | |
| "loss": 1.6505, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.05405405405405406, | |
| "grad_norm": 0.5490546226501465, | |
| "learning_rate": 0.000198139027827617, | |
| "loss": 1.498, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.05462304409672831, | |
| "grad_norm": 0.5228062868118286, | |
| "learning_rate": 0.00019809512303686706, | |
| "loss": 1.4592, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.05519203413940256, | |
| "grad_norm": 0.49827295541763306, | |
| "learning_rate": 0.00019805071133608242, | |
| "loss": 1.6593, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.05576102418207681, | |
| "grad_norm": 0.5081865191459656, | |
| "learning_rate": 0.0001980057929547621, | |
| "loss": 1.4226, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.05633001422475107, | |
| "grad_norm": 0.5018671751022339, | |
| "learning_rate": 0.00019796036812502347, | |
| "loss": 1.4995, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.05689900426742532, | |
| "grad_norm": 0.5807016491889954, | |
| "learning_rate": 0.00019791443708160094, | |
| "loss": 1.7405, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05746799431009957, | |
| "grad_norm": 0.5095066428184509, | |
| "learning_rate": 0.00019786800006184473, | |
| "loss": 1.4908, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.058036984352773824, | |
| "grad_norm": 0.5552268028259277, | |
| "learning_rate": 0.00019782105730571992, | |
| "loss": 1.5289, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.05860597439544808, | |
| "grad_norm": 0.47026970982551575, | |
| "learning_rate": 0.00019777360905580478, | |
| "loss": 1.3497, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.059174964438122334, | |
| "grad_norm": 0.5475593209266663, | |
| "learning_rate": 0.00019772565555728984, | |
| "loss": 1.6329, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.059743954480796585, | |
| "grad_norm": 0.5217400789260864, | |
| "learning_rate": 0.00019767719705797657, | |
| "loss": 1.6181, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.060312944523470836, | |
| "grad_norm": 0.5143265128135681, | |
| "learning_rate": 0.00019762823380827592, | |
| "loss": 1.6369, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.060881934566145095, | |
| "grad_norm": 0.501568615436554, | |
| "learning_rate": 0.0001975787660612072, | |
| "loss": 1.6871, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.061450924608819346, | |
| "grad_norm": 0.47950610518455505, | |
| "learning_rate": 0.00019752879407239685, | |
| "loss": 1.4494, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.0620199146514936, | |
| "grad_norm": 0.5488466024398804, | |
| "learning_rate": 0.0001974783181000768, | |
| "loss": 1.6457, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.06258890469416785, | |
| "grad_norm": 0.5165080428123474, | |
| "learning_rate": 0.0001974273384050835, | |
| "loss": 1.5463, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.06315789473684211, | |
| "grad_norm": 0.5002058744430542, | |
| "learning_rate": 0.0001973758552508563, | |
| "loss": 1.4333, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.06372688477951635, | |
| "grad_norm": 0.4927598237991333, | |
| "learning_rate": 0.00019732386890343624, | |
| "loss": 1.5576, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.06429587482219061, | |
| "grad_norm": 0.5156055688858032, | |
| "learning_rate": 0.0001972713796314646, | |
| "loss": 1.4821, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.06486486486486487, | |
| "grad_norm": 0.5108924508094788, | |
| "learning_rate": 0.0001972183877061816, | |
| "loss": 1.502, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.06543385490753911, | |
| "grad_norm": 0.5052126049995422, | |
| "learning_rate": 0.00019716489340142483, | |
| "loss": 1.7285, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.06600284495021337, | |
| "grad_norm": 0.5034211874008179, | |
| "learning_rate": 0.00019711089699362807, | |
| "loss": 1.4148, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.06657183499288763, | |
| "grad_norm": 0.5284733772277832, | |
| "learning_rate": 0.00019705639876181969, | |
| "loss": 1.5979, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.06714082503556187, | |
| "grad_norm": 0.5434923768043518, | |
| "learning_rate": 0.0001970013989876212, | |
| "loss": 1.6856, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.06770981507823613, | |
| "grad_norm": 0.48895972967147827, | |
| "learning_rate": 0.00019694589795524588, | |
| "loss": 1.5305, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.06827880512091039, | |
| "grad_norm": 0.5481955409049988, | |
| "learning_rate": 0.00019688989595149732, | |
| "loss": 1.473, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.06884779516358464, | |
| "grad_norm": 0.47966116666793823, | |
| "learning_rate": 0.00019683339326576781, | |
| "loss": 1.1899, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.0694167852062589, | |
| "grad_norm": 0.5007337927818298, | |
| "learning_rate": 0.00019677639019003706, | |
| "loss": 1.4747, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.06998577524893314, | |
| "grad_norm": 0.5798030495643616, | |
| "learning_rate": 0.00019671888701887046, | |
| "loss": 1.5881, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.0705547652916074, | |
| "grad_norm": 0.5382363200187683, | |
| "learning_rate": 0.0001966608840494177, | |
| "loss": 1.6345, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.07112375533428165, | |
| "grad_norm": 0.5181685090065002, | |
| "learning_rate": 0.00019660238158141112, | |
| "loss": 1.48, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.0716927453769559, | |
| "grad_norm": 0.5349889993667603, | |
| "learning_rate": 0.0001965433799171644, | |
| "loss": 1.5679, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.07226173541963016, | |
| "grad_norm": 0.496991902589798, | |
| "learning_rate": 0.00019648387936157068, | |
| "loss": 1.5596, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.07283072546230442, | |
| "grad_norm": 0.5177836418151855, | |
| "learning_rate": 0.0001964238802221012, | |
| "loss": 1.3765, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.07339971550497866, | |
| "grad_norm": 0.5253962874412537, | |
| "learning_rate": 0.00019636338280880366, | |
| "loss": 1.7268, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.07396870554765292, | |
| "grad_norm": 0.5878409743309021, | |
| "learning_rate": 0.00019630238743430058, | |
| "loss": 1.5933, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.07453769559032716, | |
| "grad_norm": 0.5072840452194214, | |
| "learning_rate": 0.00019624089441378775, | |
| "loss": 1.3819, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.07510668563300142, | |
| "grad_norm": 0.5567812323570251, | |
| "learning_rate": 0.0001961789040650325, | |
| "loss": 1.5582, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.07567567567567568, | |
| "grad_norm": 0.48109254240989685, | |
| "learning_rate": 0.00019611641670837219, | |
| "loss": 1.4227, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.07624466571834992, | |
| "grad_norm": 0.5404167175292969, | |
| "learning_rate": 0.00019605343266671245, | |
| "loss": 1.6807, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.07681365576102418, | |
| "grad_norm": 0.47476792335510254, | |
| "learning_rate": 0.00019598995226552556, | |
| "loss": 1.3462, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.07738264580369844, | |
| "grad_norm": 0.4884220361709595, | |
| "learning_rate": 0.0001959259758328487, | |
| "loss": 1.5956, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.07795163584637269, | |
| "grad_norm": 0.5190904140472412, | |
| "learning_rate": 0.00019586150369928245, | |
| "loss": 1.6685, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.07852062588904694, | |
| "grad_norm": 0.513028621673584, | |
| "learning_rate": 0.0001957965361979888, | |
| "loss": 1.7023, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.07908961593172119, | |
| "grad_norm": 0.4926295578479767, | |
| "learning_rate": 0.00019573107366468962, | |
| "loss": 1.4606, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.07965860597439545, | |
| "grad_norm": 0.5009914636611938, | |
| "learning_rate": 0.00019566511643766485, | |
| "loss": 1.5636, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.0802275960170697, | |
| "grad_norm": 0.54355388879776, | |
| "learning_rate": 0.00019559866485775084, | |
| "loss": 1.681, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.08079658605974395, | |
| "grad_norm": 0.5059416890144348, | |
| "learning_rate": 0.00019553171926833853, | |
| "loss": 1.6193, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.08136557610241821, | |
| "grad_norm": 0.5309209227561951, | |
| "learning_rate": 0.00019546428001537155, | |
| "loss": 1.5552, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.08193456614509247, | |
| "grad_norm": 0.4913862943649292, | |
| "learning_rate": 0.0001953963474473447, | |
| "loss": 1.5506, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.08250355618776671, | |
| "grad_norm": 0.5331928133964539, | |
| "learning_rate": 0.0001953279219153019, | |
| "loss": 1.7152, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.08307254623044097, | |
| "grad_norm": 0.5169084072113037, | |
| "learning_rate": 0.00019525900377283457, | |
| "loss": 1.6177, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.08364153627311523, | |
| "grad_norm": 0.5159075856208801, | |
| "learning_rate": 0.00019518959337607957, | |
| "loss": 1.5652, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.08421052631578947, | |
| "grad_norm": 0.5606206655502319, | |
| "learning_rate": 0.0001951196910837177, | |
| "loss": 1.6821, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.08477951635846373, | |
| "grad_norm": 0.47890591621398926, | |
| "learning_rate": 0.0001950492972569715, | |
| "loss": 1.5041, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.08534850640113797, | |
| "grad_norm": 0.5077673196792603, | |
| "learning_rate": 0.0001949784122596035, | |
| "loss": 1.5837, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.08591749644381223, | |
| "grad_norm": 0.5021458268165588, | |
| "learning_rate": 0.00019490703645791454, | |
| "loss": 1.5813, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.08648648648648649, | |
| "grad_norm": 0.5000331997871399, | |
| "learning_rate": 0.00019483517022074156, | |
| "loss": 1.5686, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.08705547652916074, | |
| "grad_norm": 0.5121405124664307, | |
| "learning_rate": 0.0001947628139194559, | |
| "loss": 1.4329, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.087624466571835, | |
| "grad_norm": 0.5058543682098389, | |
| "learning_rate": 0.00019468996792796137, | |
| "loss": 1.36, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.08819345661450925, | |
| "grad_norm": 0.5810546875, | |
| "learning_rate": 0.00019461663262269213, | |
| "loss": 1.3764, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.0887624466571835, | |
| "grad_norm": 0.5015589594841003, | |
| "learning_rate": 0.00019454280838261106, | |
| "loss": 1.4966, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.08933143669985776, | |
| "grad_norm": 0.5284256339073181, | |
| "learning_rate": 0.0001944684955892075, | |
| "loss": 1.4944, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.089900426742532, | |
| "grad_norm": 0.49957889318466187, | |
| "learning_rate": 0.0001943936946264955, | |
| "loss": 1.4641, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.09046941678520626, | |
| "grad_norm": 0.5073912143707275, | |
| "learning_rate": 0.00019431840588101157, | |
| "loss": 1.3371, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.09103840682788052, | |
| "grad_norm": 0.5323196649551392, | |
| "learning_rate": 0.00019424262974181313, | |
| "loss": 1.5312, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.09160739687055476, | |
| "grad_norm": 0.5276457071304321, | |
| "learning_rate": 0.00019416636660047595, | |
| "loss": 1.64, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.09217638691322902, | |
| "grad_norm": 0.49499741196632385, | |
| "learning_rate": 0.0001940896168510926, | |
| "loss": 1.3689, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.09274537695590328, | |
| "grad_norm": 0.5169721245765686, | |
| "learning_rate": 0.00019401238089027017, | |
| "loss": 1.5352, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.09331436699857752, | |
| "grad_norm": 0.48859354853630066, | |
| "learning_rate": 0.0001939346591171281, | |
| "loss": 1.4584, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.09388335704125178, | |
| "grad_norm": 0.5150989890098572, | |
| "learning_rate": 0.00019385645193329654, | |
| "loss": 1.5178, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.09445234708392602, | |
| "grad_norm": 0.48626863956451416, | |
| "learning_rate": 0.00019377775974291383, | |
| "loss": 1.3689, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.09502133712660028, | |
| "grad_norm": 0.5352733731269836, | |
| "learning_rate": 0.0001936985829526247, | |
| "loss": 1.5953, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.09559032716927454, | |
| "grad_norm": 0.5061799883842468, | |
| "learning_rate": 0.00019361892197157797, | |
| "loss": 1.6339, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.09615931721194879, | |
| "grad_norm": 0.5095758438110352, | |
| "learning_rate": 0.0001935387772114246, | |
| "loss": 1.5116, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.09672830725462304, | |
| "grad_norm": 0.4948934316635132, | |
| "learning_rate": 0.00019345814908631556, | |
| "loss": 1.3963, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.0972972972972973, | |
| "grad_norm": 0.5632720589637756, | |
| "learning_rate": 0.0001933770380128995, | |
| "loss": 1.618, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.09786628733997155, | |
| "grad_norm": 0.5013827681541443, | |
| "learning_rate": 0.00019329544441032076, | |
| "loss": 1.4847, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.0984352773826458, | |
| "grad_norm": 0.512117326259613, | |
| "learning_rate": 0.0001932133687002172, | |
| "loss": 1.4346, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.09900426742532005, | |
| "grad_norm": 0.5385090708732605, | |
| "learning_rate": 0.00019313081130671798, | |
| "loss": 1.6694, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.09957325746799431, | |
| "grad_norm": 0.5616840720176697, | |
| "learning_rate": 0.00019304777265644133, | |
| "loss": 1.5638, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.10014224751066857, | |
| "grad_norm": 0.5222409963607788, | |
| "learning_rate": 0.0001929642531784925, | |
| "loss": 1.6203, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.10071123755334281, | |
| "grad_norm": 0.5733211040496826, | |
| "learning_rate": 0.00019288025330446126, | |
| "loss": 1.6952, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.10128022759601707, | |
| "grad_norm": 0.5625792741775513, | |
| "learning_rate": 0.00019279577346842, | |
| "loss": 1.6639, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.10184921763869133, | |
| "grad_norm": 0.5778010487556458, | |
| "learning_rate": 0.0001927108141069213, | |
| "loss": 1.5719, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.10241820768136557, | |
| "grad_norm": 0.5034694671630859, | |
| "learning_rate": 0.00019262537565899564, | |
| "loss": 1.4461, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.10298719772403983, | |
| "grad_norm": 0.5446426272392273, | |
| "learning_rate": 0.0001925394585661492, | |
| "loss": 1.4904, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.10355618776671409, | |
| "grad_norm": 0.47503742575645447, | |
| "learning_rate": 0.00019245306327236172, | |
| "loss": 1.5012, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.10412517780938833, | |
| "grad_norm": 0.5337246656417847, | |
| "learning_rate": 0.00019236619022408387, | |
| "loss": 1.4175, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.10469416785206259, | |
| "grad_norm": 0.5157039165496826, | |
| "learning_rate": 0.00019227883987023523, | |
| "loss": 1.6435, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.10526315789473684, | |
| "grad_norm": 0.5278623700141907, | |
| "learning_rate": 0.00019219101266220188, | |
| "loss": 1.6746, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.1058321479374111, | |
| "grad_norm": 0.4916015565395355, | |
| "learning_rate": 0.000192102709053834, | |
| "loss": 1.4584, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.10640113798008535, | |
| "grad_norm": 0.5512337684631348, | |
| "learning_rate": 0.00019201392950144363, | |
| "loss": 1.6313, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.1069701280227596, | |
| "grad_norm": 0.506673276424408, | |
| "learning_rate": 0.0001919246744638023, | |
| "loss": 1.4842, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.10753911806543386, | |
| "grad_norm": 0.49428772926330566, | |
| "learning_rate": 0.00019183494440213857, | |
| "loss": 1.4246, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.10810810810810811, | |
| "grad_norm": 0.5020580887794495, | |
| "learning_rate": 0.0001917447397801357, | |
| "loss": 1.6966, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.10867709815078236, | |
| "grad_norm": 0.5004864931106567, | |
| "learning_rate": 0.00019165406106392928, | |
| "loss": 1.3144, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.10924608819345662, | |
| "grad_norm": 0.47853466868400574, | |
| "learning_rate": 0.00019156290872210488, | |
| "loss": 1.3321, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.10981507823613086, | |
| "grad_norm": 0.4940144121646881, | |
| "learning_rate": 0.00019147128322569533, | |
| "loss": 1.2719, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.11038406827880512, | |
| "grad_norm": 0.5355538725852966, | |
| "learning_rate": 0.00019137918504817878, | |
| "loss": 1.4551, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.11095305832147938, | |
| "grad_norm": 0.5604861378669739, | |
| "learning_rate": 0.00019128661466547576, | |
| "loss": 1.6109, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.11152204836415362, | |
| "grad_norm": 0.5061023235321045, | |
| "learning_rate": 0.000191193572555947, | |
| "loss": 1.511, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.11209103840682788, | |
| "grad_norm": 0.5125574469566345, | |
| "learning_rate": 0.0001911000592003909, | |
| "loss": 1.4209, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.11266002844950214, | |
| "grad_norm": 0.5150197744369507, | |
| "learning_rate": 0.00019100607508204114, | |
| "loss": 1.6323, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.11322901849217638, | |
| "grad_norm": 0.5164692997932434, | |
| "learning_rate": 0.0001909116206865639, | |
| "loss": 1.5086, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.11379800853485064, | |
| "grad_norm": 0.5399172306060791, | |
| "learning_rate": 0.00019081669650205564, | |
| "loss": 1.5051, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.11436699857752489, | |
| "grad_norm": 0.49494683742523193, | |
| "learning_rate": 0.0001907213030190405, | |
| "loss": 1.5123, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.11493598862019914, | |
| "grad_norm": 0.5344505906105042, | |
| "learning_rate": 0.00019062544073046768, | |
| "loss": 1.5364, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.1155049786628734, | |
| "grad_norm": 0.5201467871665955, | |
| "learning_rate": 0.00019052911013170892, | |
| "loss": 1.5027, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.11607396870554765, | |
| "grad_norm": 0.5991513729095459, | |
| "learning_rate": 0.00019043231172055603, | |
| "loss": 1.6402, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.1166429587482219, | |
| "grad_norm": 0.5526711940765381, | |
| "learning_rate": 0.00019033504599721827, | |
| "loss": 1.6166, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.11721194879089616, | |
| "grad_norm": 0.493965208530426, | |
| "learning_rate": 0.00019023731346431972, | |
| "loss": 1.3099, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.11778093883357041, | |
| "grad_norm": 0.5043678879737854, | |
| "learning_rate": 0.00019013911462689668, | |
| "loss": 1.3328, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.11834992887624467, | |
| "grad_norm": 0.518515944480896, | |
| "learning_rate": 0.00019004044999239517, | |
| "loss": 1.453, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.11891891891891893, | |
| "grad_norm": 0.547725260257721, | |
| "learning_rate": 0.00018994132007066816, | |
| "loss": 1.552, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.11948790896159317, | |
| "grad_norm": 0.5498734712600708, | |
| "learning_rate": 0.0001898417253739731, | |
| "loss": 1.6076, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.12005689900426743, | |
| "grad_norm": 0.5087684392929077, | |
| "learning_rate": 0.00018974166641696908, | |
| "loss": 1.3459, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.12062588904694167, | |
| "grad_norm": 0.49864476919174194, | |
| "learning_rate": 0.00018964114371671428, | |
| "loss": 1.502, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.12119487908961593, | |
| "grad_norm": 0.49818646907806396, | |
| "learning_rate": 0.0001895401577926634, | |
| "loss": 1.5047, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.12176386913229019, | |
| "grad_norm": 0.5151641964912415, | |
| "learning_rate": 0.00018943870916666476, | |
| "loss": 1.5276, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.12233285917496443, | |
| "grad_norm": 0.5294698476791382, | |
| "learning_rate": 0.00018933679836295777, | |
| "loss": 1.4735, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.12290184921763869, | |
| "grad_norm": 0.5169737339019775, | |
| "learning_rate": 0.0001892344259081701, | |
| "loss": 1.6458, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.12347083926031295, | |
| "grad_norm": 0.5262957811355591, | |
| "learning_rate": 0.000189131592331315, | |
| "loss": 1.6239, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.1240398293029872, | |
| "grad_norm": 0.5043689012527466, | |
| "learning_rate": 0.00018902829816378876, | |
| "loss": 1.5785, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.12460881934566145, | |
| "grad_norm": 0.5032008290290833, | |
| "learning_rate": 0.00018892454393936754, | |
| "loss": 1.4075, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.1251778093883357, | |
| "grad_norm": 0.5261518359184265, | |
| "learning_rate": 0.00018882033019420504, | |
| "loss": 1.4251, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.12574679943100997, | |
| "grad_norm": 0.5519723296165466, | |
| "learning_rate": 0.00018871565746682949, | |
| "loss": 1.6654, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.12631578947368421, | |
| "grad_norm": 0.5465745329856873, | |
| "learning_rate": 0.0001886105262981409, | |
| "loss": 1.5489, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.12688477951635846, | |
| "grad_norm": 0.6040769219398499, | |
| "learning_rate": 0.00018850493723140835, | |
| "loss": 1.6205, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.1274537695590327, | |
| "grad_norm": 0.5207870006561279, | |
| "learning_rate": 0.0001883988908122671, | |
| "loss": 1.5843, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.12802275960170698, | |
| "grad_norm": 0.5130170583724976, | |
| "learning_rate": 0.00018829238758871574, | |
| "loss": 1.5384, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.12859174964438122, | |
| "grad_norm": 0.5100380182266235, | |
| "learning_rate": 0.00018818542811111354, | |
| "loss": 1.5026, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.12916073968705546, | |
| "grad_norm": 0.5047493577003479, | |
| "learning_rate": 0.00018807801293217735, | |
| "loss": 1.4774, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.12972972972972974, | |
| "grad_norm": 0.5392350554466248, | |
| "learning_rate": 0.0001879701426069789, | |
| "loss": 1.2986, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.13029871977240398, | |
| "grad_norm": 0.4927089810371399, | |
| "learning_rate": 0.00018786181769294203, | |
| "loss": 1.3298, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.13086770981507823, | |
| "grad_norm": 0.5079994797706604, | |
| "learning_rate": 0.0001877530387498395, | |
| "loss": 1.4027, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.1314366998577525, | |
| "grad_norm": 0.5074231624603271, | |
| "learning_rate": 0.00018764380633979035, | |
| "loss": 1.6176, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.13200568990042674, | |
| "grad_norm": 0.5501790642738342, | |
| "learning_rate": 0.00018753412102725698, | |
| "loss": 1.3795, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.132574679943101, | |
| "grad_norm": 0.5117084383964539, | |
| "learning_rate": 0.00018742398337904213, | |
| "loss": 1.4731, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.13314366998577526, | |
| "grad_norm": 0.5027900338172913, | |
| "learning_rate": 0.00018731339396428607, | |
| "loss": 1.5399, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.1337126600284495, | |
| "grad_norm": 0.5187605619430542, | |
| "learning_rate": 0.00018720235335446342, | |
| "loss": 1.5111, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.13428165007112375, | |
| "grad_norm": 0.5272188782691956, | |
| "learning_rate": 0.00018709086212338058, | |
| "loss": 1.5717, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.13485064011379802, | |
| "grad_norm": 0.5339289903640747, | |
| "learning_rate": 0.00018697892084717238, | |
| "loss": 1.4529, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.13541963015647226, | |
| "grad_norm": 0.5382213592529297, | |
| "learning_rate": 0.00018686653010429937, | |
| "loss": 1.5727, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.1359886201991465, | |
| "grad_norm": 0.5148522257804871, | |
| "learning_rate": 0.00018675369047554475, | |
| "loss": 1.5683, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.13655761024182078, | |
| "grad_norm": 0.5300989747047424, | |
| "learning_rate": 0.00018664040254401121, | |
| "loss": 1.6485, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.13712660028449503, | |
| "grad_norm": 0.5400955080986023, | |
| "learning_rate": 0.00018652666689511824, | |
| "loss": 1.5095, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.13769559032716927, | |
| "grad_norm": 0.49695253372192383, | |
| "learning_rate": 0.0001864124841165988, | |
| "loss": 1.3692, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.13826458036984351, | |
| "grad_norm": 0.5431788563728333, | |
| "learning_rate": 0.00018629785479849656, | |
| "loss": 1.5774, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.1388335704125178, | |
| "grad_norm": 0.5125901103019714, | |
| "learning_rate": 0.00018618277953316245, | |
| "loss": 1.3545, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.13940256045519203, | |
| "grad_norm": 0.5172457695007324, | |
| "learning_rate": 0.0001860672589152521, | |
| "loss": 1.5196, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.13997155049786628, | |
| "grad_norm": 0.5287220478057861, | |
| "learning_rate": 0.00018595129354172235, | |
| "loss": 1.7279, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.14054054054054055, | |
| "grad_norm": 0.5728311538696289, | |
| "learning_rate": 0.00018583488401182843, | |
| "loss": 1.5514, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.1411095305832148, | |
| "grad_norm": 0.5267804861068726, | |
| "learning_rate": 0.0001857180309271207, | |
| "loss": 1.5115, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.14167852062588904, | |
| "grad_norm": 0.5459727644920349, | |
| "learning_rate": 0.00018560073489144166, | |
| "loss": 1.5057, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.1422475106685633, | |
| "grad_norm": 0.5065287947654724, | |
| "learning_rate": 0.00018548299651092269, | |
| "loss": 1.4906, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.14281650071123755, | |
| "grad_norm": 0.5647059082984924, | |
| "learning_rate": 0.00018536481639398107, | |
| "loss": 1.5447, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.1433854907539118, | |
| "grad_norm": 0.5164194703102112, | |
| "learning_rate": 0.00018524619515131679, | |
| "loss": 1.6922, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.14395448079658607, | |
| "grad_norm": 0.5288499593734741, | |
| "learning_rate": 0.0001851271333959093, | |
| "loss": 1.5596, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.14452347083926032, | |
| "grad_norm": 0.509348452091217, | |
| "learning_rate": 0.00018500763174301448, | |
| "loss": 1.6263, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.14509246088193456, | |
| "grad_norm": 0.5377824902534485, | |
| "learning_rate": 0.00018488769081016133, | |
| "loss": 1.4711, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.14566145092460883, | |
| "grad_norm": 0.5068728923797607, | |
| "learning_rate": 0.00018476731121714894, | |
| "loss": 1.6706, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.14623044096728308, | |
| "grad_norm": 0.5097038745880127, | |
| "learning_rate": 0.0001846464935860431, | |
| "loss": 1.5841, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.14679943100995732, | |
| "grad_norm": 0.5391016006469727, | |
| "learning_rate": 0.0001845252385411732, | |
| "loss": 1.6935, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.14736842105263157, | |
| "grad_norm": 0.5154038667678833, | |
| "learning_rate": 0.00018440354670912906, | |
| "loss": 1.3827, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.14793741109530584, | |
| "grad_norm": 0.5789750814437866, | |
| "learning_rate": 0.00018428141871875743, | |
| "loss": 1.545, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.14850640113798008, | |
| "grad_norm": 0.5456128716468811, | |
| "learning_rate": 0.00018415885520115915, | |
| "loss": 1.5359, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.14907539118065433, | |
| "grad_norm": 0.6158856749534607, | |
| "learning_rate": 0.00018403585678968551, | |
| "loss": 1.7601, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.1496443812233286, | |
| "grad_norm": 0.4721933603286743, | |
| "learning_rate": 0.00018391242411993516, | |
| "loss": 1.3328, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.15021337126600284, | |
| "grad_norm": 0.5242535471916199, | |
| "learning_rate": 0.00018378855782975084, | |
| "loss": 1.3359, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.1507823613086771, | |
| "grad_norm": 0.5116239190101624, | |
| "learning_rate": 0.000183664258559216, | |
| "loss": 1.218, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.15135135135135136, | |
| "grad_norm": 0.5715349316596985, | |
| "learning_rate": 0.0001835395269506515, | |
| "loss": 1.7737, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.1519203413940256, | |
| "grad_norm": 0.5294284224510193, | |
| "learning_rate": 0.0001834143636486124, | |
| "loss": 1.7273, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.15248933143669985, | |
| "grad_norm": 0.5225195288658142, | |
| "learning_rate": 0.0001832887692998845, | |
| "loss": 1.5397, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.15305832147937412, | |
| "grad_norm": 0.5032251477241516, | |
| "learning_rate": 0.00018316274455348105, | |
| "loss": 1.4483, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.15362731152204837, | |
| "grad_norm": 0.5733814835548401, | |
| "learning_rate": 0.00018303629006063943, | |
| "loss": 1.5798, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.1541963015647226, | |
| "grad_norm": 0.5273986458778381, | |
| "learning_rate": 0.0001829094064748177, | |
| "loss": 1.6515, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.15476529160739688, | |
| "grad_norm": 0.563911497592926, | |
| "learning_rate": 0.00018278209445169135, | |
| "loss": 1.6408, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.15533428165007113, | |
| "grad_norm": 0.5052376985549927, | |
| "learning_rate": 0.00018265435464914973, | |
| "loss": 1.3572, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.15590327169274537, | |
| "grad_norm": 0.5052018761634827, | |
| "learning_rate": 0.0001825261877272928, | |
| "loss": 1.5019, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.15647226173541964, | |
| "grad_norm": 0.4795508086681366, | |
| "learning_rate": 0.00018239759434842773, | |
| "loss": 1.0659, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.1570412517780939, | |
| "grad_norm": 0.5224232077598572, | |
| "learning_rate": 0.00018226857517706537, | |
| "loss": 1.6048, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.15761024182076813, | |
| "grad_norm": 0.5337119698524475, | |
| "learning_rate": 0.00018213913087991685, | |
| "loss": 1.4884, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.15817923186344238, | |
| "grad_norm": 0.48973479866981506, | |
| "learning_rate": 0.0001820092621258902, | |
| "loss": 1.3599, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.15874822190611665, | |
| "grad_norm": 0.4995887577533722, | |
| "learning_rate": 0.0001818789695860868, | |
| "loss": 1.5088, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.1593172119487909, | |
| "grad_norm": 0.513390064239502, | |
| "learning_rate": 0.00018174825393379798, | |
| "loss": 1.5376, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.15988620199146514, | |
| "grad_norm": 0.5285114645957947, | |
| "learning_rate": 0.00018161711584450152, | |
| "loss": 1.706, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.1604551920341394, | |
| "grad_norm": 0.5384095907211304, | |
| "learning_rate": 0.00018148555599585816, | |
| "loss": 1.474, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.16102418207681365, | |
| "grad_norm": 0.5326551795005798, | |
| "learning_rate": 0.0001813535750677081, | |
| "loss": 1.4764, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.1615931721194879, | |
| "grad_norm": 0.538357675075531, | |
| "learning_rate": 0.0001812211737420675, | |
| "loss": 1.7382, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.16216216216216217, | |
| "grad_norm": 0.5192847847938538, | |
| "learning_rate": 0.00018108835270312488, | |
| "loss": 1.5809, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.16273115220483642, | |
| "grad_norm": 0.5059441328048706, | |
| "learning_rate": 0.00018095511263723768, | |
| "loss": 1.3315, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.16330014224751066, | |
| "grad_norm": 0.542091429233551, | |
| "learning_rate": 0.00018082145423292868, | |
| "loss": 1.394, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.16386913229018493, | |
| "grad_norm": 0.5587398409843445, | |
| "learning_rate": 0.00018068737818088248, | |
| "loss": 1.5478, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.16443812233285918, | |
| "grad_norm": 0.5091587901115417, | |
| "learning_rate": 0.00018055288517394174, | |
| "loss": 1.4298, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.16500711237553342, | |
| "grad_norm": 0.5347201228141785, | |
| "learning_rate": 0.00018041797590710398, | |
| "loss": 1.4504, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.1655761024182077, | |
| "grad_norm": 0.5370376110076904, | |
| "learning_rate": 0.00018028265107751756, | |
| "loss": 1.6061, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.16614509246088194, | |
| "grad_norm": 0.5322532057762146, | |
| "learning_rate": 0.00018014691138447834, | |
| "loss": 1.5102, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.16671408250355618, | |
| "grad_norm": 0.4970771074295044, | |
| "learning_rate": 0.00018001075752942605, | |
| "loss": 1.3017, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.16728307254623045, | |
| "grad_norm": 0.5143032670021057, | |
| "learning_rate": 0.00017987419021594053, | |
| "loss": 1.5115, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.1678520625889047, | |
| "grad_norm": 0.4978564977645874, | |
| "learning_rate": 0.00017973721014973823, | |
| "loss": 1.33, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.16842105263157894, | |
| "grad_norm": 0.5085217356681824, | |
| "learning_rate": 0.00017959981803866856, | |
| "loss": 1.3251, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.1689900426742532, | |
| "grad_norm": 0.522738516330719, | |
| "learning_rate": 0.0001794620145927101, | |
| "loss": 1.3305, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.16955903271692746, | |
| "grad_norm": 0.506791353225708, | |
| "learning_rate": 0.00017932380052396702, | |
| "loss": 1.5626, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.1701280227596017, | |
| "grad_norm": 0.541067898273468, | |
| "learning_rate": 0.0001791851765466655, | |
| "loss": 1.6446, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.17069701280227595, | |
| "grad_norm": 0.5105940103530884, | |
| "learning_rate": 0.0001790461433771498, | |
| "loss": 1.5842, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.17126600284495022, | |
| "grad_norm": 0.49997130036354065, | |
| "learning_rate": 0.00017890670173387885, | |
| "loss": 1.5844, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.17183499288762447, | |
| "grad_norm": 0.5258059501647949, | |
| "learning_rate": 0.00017876685233742226, | |
| "loss": 1.5576, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.1724039829302987, | |
| "grad_norm": 0.5664198398590088, | |
| "learning_rate": 0.00017862659591045673, | |
| "loss": 1.4313, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.17297297297297298, | |
| "grad_norm": 0.5197086930274963, | |
| "learning_rate": 0.00017848593317776234, | |
| "loss": 1.4374, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.17354196301564723, | |
| "grad_norm": 0.5377213954925537, | |
| "learning_rate": 0.0001783448648662188, | |
| "loss": 1.3973, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.17411095305832147, | |
| "grad_norm": 0.4912850260734558, | |
| "learning_rate": 0.00017820339170480156, | |
| "loss": 1.3055, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.17467994310099574, | |
| "grad_norm": 0.5148215293884277, | |
| "learning_rate": 0.00017806151442457827, | |
| "loss": 1.5493, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.17524893314367, | |
| "grad_norm": 0.5305980443954468, | |
| "learning_rate": 0.0001779192337587048, | |
| "loss": 1.6176, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.17581792318634423, | |
| "grad_norm": 0.5322251319885254, | |
| "learning_rate": 0.0001777765504424215, | |
| "loss": 1.6621, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.1763869132290185, | |
| "grad_norm": 0.5405860543251038, | |
| "learning_rate": 0.00017763346521304955, | |
| "loss": 1.5951, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.17695590327169275, | |
| "grad_norm": 0.5762712359428406, | |
| "learning_rate": 0.00017748997880998691, | |
| "loss": 1.4609, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.177524893314367, | |
| "grad_norm": 0.5313809514045715, | |
| "learning_rate": 0.0001773460919747047, | |
| "loss": 1.4488, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.17809388335704124, | |
| "grad_norm": 0.5385677814483643, | |
| "learning_rate": 0.00017720180545074322, | |
| "loss": 1.5543, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.1786628733997155, | |
| "grad_norm": 0.5349786877632141, | |
| "learning_rate": 0.00017705711998370824, | |
| "loss": 1.5848, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.17923186344238975, | |
| "grad_norm": 0.5395460724830627, | |
| "learning_rate": 0.00017691203632126706, | |
| "loss": 1.5344, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.179800853485064, | |
| "grad_norm": 0.5073065757751465, | |
| "learning_rate": 0.0001767665552131446, | |
| "loss": 1.4227, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.18036984352773827, | |
| "grad_norm": 0.5242070555686951, | |
| "learning_rate": 0.00017662067741111974, | |
| "loss": 1.5054, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.18093883357041252, | |
| "grad_norm": 0.5271447896957397, | |
| "learning_rate": 0.00017647440366902117, | |
| "loss": 1.5675, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.18150782361308676, | |
| "grad_norm": 0.5302979946136475, | |
| "learning_rate": 0.00017632773474272363, | |
| "loss": 1.4631, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.18207681365576103, | |
| "grad_norm": 0.5438220500946045, | |
| "learning_rate": 0.00017618067139014404, | |
| "loss": 1.4737, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.18264580369843528, | |
| "grad_norm": 0.5002385377883911, | |
| "learning_rate": 0.0001760332143712375, | |
| "loss": 1.3976, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.18321479374110952, | |
| "grad_norm": 0.5478991866111755, | |
| "learning_rate": 0.00017588536444799338, | |
| "loss": 1.527, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.1837837837837838, | |
| "grad_norm": 0.5406285524368286, | |
| "learning_rate": 0.0001757371223844314, | |
| "loss": 1.4453, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.18435277382645804, | |
| "grad_norm": 0.5226593613624573, | |
| "learning_rate": 0.00017558848894659771, | |
| "loss": 1.5309, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.18492176386913228, | |
| "grad_norm": 0.5488921999931335, | |
| "learning_rate": 0.0001754394649025609, | |
| "loss": 1.6993, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.18549075391180656, | |
| "grad_norm": 0.5268238186836243, | |
| "learning_rate": 0.000175290051022408, | |
| "loss": 1.4578, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.1860597439544808, | |
| "grad_norm": 0.5236526727676392, | |
| "learning_rate": 0.00017514024807824055, | |
| "loss": 1.5276, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.18662873399715504, | |
| "grad_norm": 0.5280612707138062, | |
| "learning_rate": 0.00017499005684417057, | |
| "loss": 1.5191, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.18719772403982932, | |
| "grad_norm": 0.5311048030853271, | |
| "learning_rate": 0.0001748394780963166, | |
| "loss": 1.6317, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.18776671408250356, | |
| "grad_norm": 0.5343871712684631, | |
| "learning_rate": 0.0001746885126127997, | |
| "loss": 1.6759, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.1883357041251778, | |
| "grad_norm": 0.5824495553970337, | |
| "learning_rate": 0.00017453716117373937, | |
| "loss": 1.5064, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.18890469416785205, | |
| "grad_norm": 0.5165912508964539, | |
| "learning_rate": 0.0001743854245612495, | |
| "loss": 1.413, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.18947368421052632, | |
| "grad_norm": 0.5721679329872131, | |
| "learning_rate": 0.0001742333035594345, | |
| "loss": 1.3518, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.19004267425320057, | |
| "grad_norm": 0.5547354817390442, | |
| "learning_rate": 0.00017408079895438498, | |
| "loss": 1.7325, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.1906116642958748, | |
| "grad_norm": 0.5567200779914856, | |
| "learning_rate": 0.00017392791153417398, | |
| "loss": 1.6179, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.19118065433854908, | |
| "grad_norm": 0.5186401009559631, | |
| "learning_rate": 0.00017377464208885265, | |
| "loss": 1.3499, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.19174964438122333, | |
| "grad_norm": 0.5111268758773804, | |
| "learning_rate": 0.00017362099141044626, | |
| "loss": 1.2942, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.19231863442389757, | |
| "grad_norm": 0.5359705090522766, | |
| "learning_rate": 0.0001734669602929502, | |
| "loss": 1.552, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.19288762446657184, | |
| "grad_norm": 0.5835704803466797, | |
| "learning_rate": 0.0001733125495323257, | |
| "loss": 1.3161, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.1934566145092461, | |
| "grad_norm": 0.5223122835159302, | |
| "learning_rate": 0.00017315775992649584, | |
| "loss": 1.5189, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.19402560455192033, | |
| "grad_norm": 0.5331559777259827, | |
| "learning_rate": 0.0001730025922753415, | |
| "loss": 1.7263, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.1945945945945946, | |
| "grad_norm": 0.54593425989151, | |
| "learning_rate": 0.00017284704738069698, | |
| "loss": 1.5158, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.19516358463726885, | |
| "grad_norm": 0.5385016202926636, | |
| "learning_rate": 0.000172691126046346, | |
| "loss": 1.5762, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.1957325746799431, | |
| "grad_norm": 0.4981791079044342, | |
| "learning_rate": 0.00017253482907801773, | |
| "loss": 1.3606, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.19630156472261737, | |
| "grad_norm": 0.5046445727348328, | |
| "learning_rate": 0.00017237815728338217, | |
| "loss": 1.382, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.1968705547652916, | |
| "grad_norm": 0.5692354440689087, | |
| "learning_rate": 0.00017222111147204645, | |
| "loss": 1.6214, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.19743954480796586, | |
| "grad_norm": 0.5191353559494019, | |
| "learning_rate": 0.00017206369245555036, | |
| "loss": 1.459, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.1980085348506401, | |
| "grad_norm": 0.5159747004508972, | |
| "learning_rate": 0.0001719059010473623, | |
| "loss": 1.6057, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.1980085348506401, | |
| "eval_loss": 1.506325602531433, | |
| "eval_runtime": 16.4362, | |
| "eval_samples_per_second": 45.023, | |
| "eval_steps_per_second": 22.511, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.19857752489331437, | |
| "grad_norm": 0.5306143164634705, | |
| "learning_rate": 0.00017174773806287496, | |
| "loss": 1.5776, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.19914651493598862, | |
| "grad_norm": 0.5569584369659424, | |
| "learning_rate": 0.00017158920431940117, | |
| "loss": 1.5926, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.19971550497866286, | |
| "grad_norm": 0.5538038611412048, | |
| "learning_rate": 0.0001714303006361697, | |
| "loss": 1.6146, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.20028449502133713, | |
| "grad_norm": 0.5369197130203247, | |
| "learning_rate": 0.00017127102783432097, | |
| "loss": 1.514, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.20085348506401138, | |
| "grad_norm": 0.6111621856689453, | |
| "learning_rate": 0.00017111138673690283, | |
| "loss": 1.3508, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.20142247510668562, | |
| "grad_norm": 0.5350061655044556, | |
| "learning_rate": 0.0001709513781688664, | |
| "loss": 1.5506, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.2019914651493599, | |
| "grad_norm": 0.5226223468780518, | |
| "learning_rate": 0.00017079100295706154, | |
| "loss": 1.55, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.20256045519203414, | |
| "grad_norm": 0.5834634304046631, | |
| "learning_rate": 0.0001706302619302329, | |
| "loss": 1.6025, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.20312944523470838, | |
| "grad_norm": 0.564756453037262, | |
| "learning_rate": 0.0001704691559190155, | |
| "loss": 1.5174, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.20369843527738266, | |
| "grad_norm": 0.5217262506484985, | |
| "learning_rate": 0.00017030768575593025, | |
| "loss": 1.4321, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.2042674253200569, | |
| "grad_norm": 0.5270060896873474, | |
| "learning_rate": 0.0001701458522753801, | |
| "loss": 1.6006, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.20483641536273114, | |
| "grad_norm": 0.5722881555557251, | |
| "learning_rate": 0.00016998365631364527, | |
| "loss": 1.7025, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.20540540540540542, | |
| "grad_norm": 0.5267907977104187, | |
| "learning_rate": 0.00016982109870887908, | |
| "loss": 1.5108, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.20597439544807966, | |
| "grad_norm": 0.5428017973899841, | |
| "learning_rate": 0.00016965818030110382, | |
| "loss": 1.6343, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.2065433854907539, | |
| "grad_norm": 0.5151480436325073, | |
| "learning_rate": 0.0001694949019322061, | |
| "loss": 1.5242, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.20711237553342818, | |
| "grad_norm": 0.5217251181602478, | |
| "learning_rate": 0.00016933126444593273, | |
| "loss": 1.54, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.20768136557610242, | |
| "grad_norm": 0.5215661525726318, | |
| "learning_rate": 0.00016916726868788622, | |
| "loss": 1.5131, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.20825035561877667, | |
| "grad_norm": 0.5087475776672363, | |
| "learning_rate": 0.00016900291550552048, | |
| "loss": 1.6782, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.2088193456614509, | |
| "grad_norm": 0.5366347432136536, | |
| "learning_rate": 0.0001688382057481364, | |
| "loss": 1.5821, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.20938833570412518, | |
| "grad_norm": 0.5469174385070801, | |
| "learning_rate": 0.00016867314026687753, | |
| "loss": 1.8795, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.20995732574679943, | |
| "grad_norm": 0.5702829957008362, | |
| "learning_rate": 0.00016850771991472563, | |
| "loss": 1.4382, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.21052631578947367, | |
| "grad_norm": 0.5792803764343262, | |
| "learning_rate": 0.0001683419455464962, | |
| "loss": 1.6934, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.21109530583214794, | |
| "grad_norm": 0.5423445701599121, | |
| "learning_rate": 0.0001681758180188342, | |
| "loss": 1.5408, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.2116642958748222, | |
| "grad_norm": 0.5211445093154907, | |
| "learning_rate": 0.00016800933819020956, | |
| "loss": 1.5354, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.21223328591749643, | |
| "grad_norm": 0.5631567239761353, | |
| "learning_rate": 0.0001678425069209127, | |
| "loss": 1.6356, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.2128022759601707, | |
| "grad_norm": 0.5736171007156372, | |
| "learning_rate": 0.0001676753250730501, | |
| "loss": 1.6202, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.21337126600284495, | |
| "grad_norm": 0.5194095373153687, | |
| "learning_rate": 0.00016750779351053994, | |
| "loss": 1.4419, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.2139402560455192, | |
| "grad_norm": 0.5220928192138672, | |
| "learning_rate": 0.0001673399130991075, | |
| "loss": 1.4182, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.21450924608819347, | |
| "grad_norm": 0.5223848819732666, | |
| "learning_rate": 0.00016717168470628077, | |
| "loss": 1.5831, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.2150782361308677, | |
| "grad_norm": 0.5400263071060181, | |
| "learning_rate": 0.00016700310920138596, | |
| "loss": 1.579, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.21564722617354196, | |
| "grad_norm": 0.5276429653167725, | |
| "learning_rate": 0.00016683418745554299, | |
| "loss": 1.4674, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.21621621621621623, | |
| "grad_norm": 0.5498270392417908, | |
| "learning_rate": 0.000166664920341661, | |
| "loss": 1.8171, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.21678520625889047, | |
| "grad_norm": 0.5207138657569885, | |
| "learning_rate": 0.00016649530873443375, | |
| "loss": 1.3337, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.21735419630156472, | |
| "grad_norm": 0.5555972456932068, | |
| "learning_rate": 0.00016632535351033533, | |
| "loss": 1.5634, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.217923186344239, | |
| "grad_norm": 0.5569733381271362, | |
| "learning_rate": 0.00016615505554761533, | |
| "loss": 1.6649, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.21849217638691323, | |
| "grad_norm": 0.5526515245437622, | |
| "learning_rate": 0.00016598441572629458, | |
| "loss": 1.2708, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.21906116642958748, | |
| "grad_norm": 0.5405237674713135, | |
| "learning_rate": 0.0001658134349281604, | |
| "loss": 1.5085, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.21963015647226172, | |
| "grad_norm": 0.5164327621459961, | |
| "learning_rate": 0.00016564211403676213, | |
| "loss": 1.4096, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.220199146514936, | |
| "grad_norm": 0.535915195941925, | |
| "learning_rate": 0.0001654704539374066, | |
| "loss": 1.5407, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.22076813655761024, | |
| "grad_norm": 0.5589139461517334, | |
| "learning_rate": 0.0001652984555171534, | |
| "loss": 1.5837, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.22133712660028448, | |
| "grad_norm": 0.5141209959983826, | |
| "learning_rate": 0.00016512611966481056, | |
| "loss": 1.377, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.22190611664295876, | |
| "grad_norm": 0.514789879322052, | |
| "learning_rate": 0.00016495344727092973, | |
| "loss": 1.5191, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.222475106685633, | |
| "grad_norm": 0.5353395342826843, | |
| "learning_rate": 0.00016478043922780157, | |
| "loss": 1.5026, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.22304409672830725, | |
| "grad_norm": 0.5318089127540588, | |
| "learning_rate": 0.00016460709642945133, | |
| "loss": 1.5277, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.22361308677098152, | |
| "grad_norm": 0.5722904205322266, | |
| "learning_rate": 0.00016443341977163408, | |
| "loss": 1.3433, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.22418207681365576, | |
| "grad_norm": 0.542008101940155, | |
| "learning_rate": 0.0001642594101518301, | |
| "loss": 1.5241, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.22475106685633, | |
| "grad_norm": 0.5351589918136597, | |
| "learning_rate": 0.00016408506846924035, | |
| "loss": 1.6335, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.22532005689900428, | |
| "grad_norm": 0.5150931477546692, | |
| "learning_rate": 0.00016391039562478157, | |
| "loss": 1.5412, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.22588904694167852, | |
| "grad_norm": 0.5498356819152832, | |
| "learning_rate": 0.00016373539252108202, | |
| "loss": 1.5062, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.22645803698435277, | |
| "grad_norm": 0.5373052358627319, | |
| "learning_rate": 0.0001635600600624763, | |
| "loss": 1.6658, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.22702702702702704, | |
| "grad_norm": 0.5198200941085815, | |
| "learning_rate": 0.00016338439915500127, | |
| "loss": 1.3554, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.22759601706970128, | |
| "grad_norm": 0.5517953038215637, | |
| "learning_rate": 0.00016320841070639083, | |
| "loss": 1.5403, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.22816500711237553, | |
| "grad_norm": 0.5407613515853882, | |
| "learning_rate": 0.00016303209562607154, | |
| "loss": 1.5033, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.22873399715504977, | |
| "grad_norm": 0.5271732211112976, | |
| "learning_rate": 0.00016285545482515792, | |
| "loss": 1.4554, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.22930298719772405, | |
| "grad_norm": 0.5387139916419983, | |
| "learning_rate": 0.0001626784892164475, | |
| "loss": 1.7347, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.2298719772403983, | |
| "grad_norm": 0.5222678780555725, | |
| "learning_rate": 0.00016250119971441637, | |
| "loss": 1.4489, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.23044096728307253, | |
| "grad_norm": 0.5498174428939819, | |
| "learning_rate": 0.00016232358723521436, | |
| "loss": 1.6047, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.2310099573257468, | |
| "grad_norm": 0.5119244456291199, | |
| "learning_rate": 0.0001621456526966603, | |
| "loss": 1.5818, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.23157894736842105, | |
| "grad_norm": 0.5584565997123718, | |
| "learning_rate": 0.00016196739701823716, | |
| "loss": 1.6863, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.2321479374110953, | |
| "grad_norm": 0.5125292539596558, | |
| "learning_rate": 0.00016178882112108752, | |
| "loss": 1.4137, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.23271692745376957, | |
| "grad_norm": 0.518551230430603, | |
| "learning_rate": 0.00016160992592800872, | |
| "loss": 1.304, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.2332859174964438, | |
| "grad_norm": 0.5396437048912048, | |
| "learning_rate": 0.00016143071236344797, | |
| "loss": 1.6118, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.23385490753911806, | |
| "grad_norm": 0.6036053895950317, | |
| "learning_rate": 0.0001612511813534978, | |
| "loss": 1.5618, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.23442389758179233, | |
| "grad_norm": 0.5274645686149597, | |
| "learning_rate": 0.00016107133382589105, | |
| "loss": 1.5238, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.23499288762446657, | |
| "grad_norm": 0.5649259090423584, | |
| "learning_rate": 0.00016089117070999616, | |
| "loss": 1.4841, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.23556187766714082, | |
| "grad_norm": 0.5350419282913208, | |
| "learning_rate": 0.0001607106929368125, | |
| "loss": 1.4252, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.2361308677098151, | |
| "grad_norm": 0.5421844124794006, | |
| "learning_rate": 0.00016052990143896535, | |
| "loss": 1.3899, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.23669985775248933, | |
| "grad_norm": 0.5462636947631836, | |
| "learning_rate": 0.0001603487971507012, | |
| "loss": 1.6417, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.23726884779516358, | |
| "grad_norm": 0.564430832862854, | |
| "learning_rate": 0.00016016738100788297, | |
| "loss": 1.6418, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.23783783783783785, | |
| "grad_norm": 0.5399342179298401, | |
| "learning_rate": 0.00015998565394798492, | |
| "loss": 1.3624, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.2384068278805121, | |
| "grad_norm": 0.5136001706123352, | |
| "learning_rate": 0.00015980361691008815, | |
| "loss": 1.3956, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.23897581792318634, | |
| "grad_norm": 0.5325256586074829, | |
| "learning_rate": 0.00015962127083487548, | |
| "loss": 1.2396, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.23954480796586058, | |
| "grad_norm": 0.5132279396057129, | |
| "learning_rate": 0.00015943861666462675, | |
| "loss": 1.4461, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.24011379800853486, | |
| "grad_norm": 0.5597640872001648, | |
| "learning_rate": 0.0001592556553432139, | |
| "loss": 1.5031, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.2406827880512091, | |
| "grad_norm": 0.5563086271286011, | |
| "learning_rate": 0.00015907238781609606, | |
| "loss": 1.4839, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.24125177809388335, | |
| "grad_norm": 0.557904839515686, | |
| "learning_rate": 0.00015888881503031468, | |
| "loss": 1.6277, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.24182076813655762, | |
| "grad_norm": 0.5795301198959351, | |
| "learning_rate": 0.00015870493793448864, | |
| "loss": 1.4073, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.24238975817923186, | |
| "grad_norm": 0.5133345127105713, | |
| "learning_rate": 0.00015852075747880938, | |
| "loss": 1.3689, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.2429587482219061, | |
| "grad_norm": 0.5455712676048279, | |
| "learning_rate": 0.00015833627461503595, | |
| "loss": 1.6118, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.24352773826458038, | |
| "grad_norm": 0.5585681796073914, | |
| "learning_rate": 0.00015815149029649013, | |
| "loss": 1.5628, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.24409672830725462, | |
| "grad_norm": 0.5475082397460938, | |
| "learning_rate": 0.0001579664054780514, | |
| "loss": 1.5907, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.24466571834992887, | |
| "grad_norm": 0.530405580997467, | |
| "learning_rate": 0.0001577810211161522, | |
| "loss": 1.5324, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.24523470839260314, | |
| "grad_norm": 0.5662998557090759, | |
| "learning_rate": 0.00015759533816877275, | |
| "loss": 1.2456, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.24580369843527738, | |
| "grad_norm": 0.6249381303787231, | |
| "learning_rate": 0.0001574093575954363, | |
| "loss": 1.4694, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.24637268847795163, | |
| "grad_norm": 0.5382659435272217, | |
| "learning_rate": 0.00015722308035720408, | |
| "loss": 1.6025, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.2469416785206259, | |
| "grad_norm": 0.5415714383125305, | |
| "learning_rate": 0.00015703650741667036, | |
| "loss": 1.3643, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.24751066856330015, | |
| "grad_norm": 0.540256917476654, | |
| "learning_rate": 0.0001568496397379574, | |
| "loss": 1.4577, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.2480796586059744, | |
| "grad_norm": 0.5126465559005737, | |
| "learning_rate": 0.0001566624782867106, | |
| "loss": 1.5512, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.24864864864864866, | |
| "grad_norm": 0.5520801544189453, | |
| "learning_rate": 0.0001564750240300934, | |
| "loss": 1.6545, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.2492176386913229, | |
| "grad_norm": 0.5290027260780334, | |
| "learning_rate": 0.00015628727793678233, | |
| "loss": 1.5391, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.24978662873399715, | |
| "grad_norm": 0.5835967659950256, | |
| "learning_rate": 0.00015609924097696203, | |
| "loss": 1.4657, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.2503556187766714, | |
| "grad_norm": 0.5586689710617065, | |
| "learning_rate": 0.00015591091412232012, | |
| "loss": 1.5222, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.25092460881934564, | |
| "grad_norm": 0.5292929410934448, | |
| "learning_rate": 0.00015572229834604235, | |
| "loss": 1.4726, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.25149359886201994, | |
| "grad_norm": 0.5165523290634155, | |
| "learning_rate": 0.00015553339462280748, | |
| "loss": 1.4154, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.2520625889046942, | |
| "grad_norm": 0.5475851893424988, | |
| "learning_rate": 0.00015534420392878211, | |
| "loss": 1.5885, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.25263157894736843, | |
| "grad_norm": 0.5540974736213684, | |
| "learning_rate": 0.00015515472724161598, | |
| "loss": 1.4529, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.2532005689900427, | |
| "grad_norm": 0.5251240730285645, | |
| "learning_rate": 0.00015496496554043653, | |
| "loss": 1.3794, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.2537695590327169, | |
| "grad_norm": 0.5751416683197021, | |
| "learning_rate": 0.00015477491980584417, | |
| "loss": 1.5417, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.25433854907539116, | |
| "grad_norm": 0.5411546230316162, | |
| "learning_rate": 0.00015458459101990693, | |
| "loss": 1.6787, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.2549075391180654, | |
| "grad_norm": 0.5817191004753113, | |
| "learning_rate": 0.00015439398016615558, | |
| "loss": 1.5382, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.2554765291607397, | |
| "grad_norm": 0.505901038646698, | |
| "learning_rate": 0.00015420308822957848, | |
| "loss": 1.3885, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.25604551920341395, | |
| "grad_norm": 0.5091856718063354, | |
| "learning_rate": 0.00015401191619661658, | |
| "loss": 1.4067, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2566145092460882, | |
| "grad_norm": 0.5677408576011658, | |
| "learning_rate": 0.00015382046505515803, | |
| "loss": 1.5578, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.25718349928876244, | |
| "grad_norm": 0.5270281434059143, | |
| "learning_rate": 0.00015362873579453348, | |
| "loss": 1.3921, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.2577524893314367, | |
| "grad_norm": 0.5784454345703125, | |
| "learning_rate": 0.00015343672940551067, | |
| "loss": 1.5433, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.25832147937411093, | |
| "grad_norm": 0.5490661859512329, | |
| "learning_rate": 0.00015324444688028947, | |
| "loss": 1.4543, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.25889046941678523, | |
| "grad_norm": 0.5555963516235352, | |
| "learning_rate": 0.00015305188921249665, | |
| "loss": 1.3882, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.2594594594594595, | |
| "grad_norm": 0.5918729305267334, | |
| "learning_rate": 0.0001528590573971808, | |
| "loss": 1.6544, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.2600284495021337, | |
| "grad_norm": 0.5301398038864136, | |
| "learning_rate": 0.00015266595243080714, | |
| "loss": 1.6201, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.26059743954480796, | |
| "grad_norm": 0.5327576994895935, | |
| "learning_rate": 0.0001524725753112525, | |
| "loss": 1.6861, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.2611664295874822, | |
| "grad_norm": 0.5090361833572388, | |
| "learning_rate": 0.00015227892703780003, | |
| "loss": 1.2298, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.26173541963015645, | |
| "grad_norm": 0.5667193531990051, | |
| "learning_rate": 0.00015208500861113401, | |
| "loss": 1.4061, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.26230440967283075, | |
| "grad_norm": 0.5170226097106934, | |
| "learning_rate": 0.00015189082103333484, | |
| "loss": 1.3402, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.262873399715505, | |
| "grad_norm": 0.5260865688323975, | |
| "learning_rate": 0.0001516963653078737, | |
| "loss": 1.4571, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.26344238975817924, | |
| "grad_norm": 0.5484414100646973, | |
| "learning_rate": 0.00015150164243960752, | |
| "loss": 1.4822, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.2640113798008535, | |
| "grad_norm": 0.5555655360221863, | |
| "learning_rate": 0.00015130665343477358, | |
| "loss": 1.4383, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.26458036984352773, | |
| "grad_norm": 0.5628737211227417, | |
| "learning_rate": 0.0001511113993009845, | |
| "loss": 1.6092, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.265149359886202, | |
| "grad_norm": 0.5401899814605713, | |
| "learning_rate": 0.00015091588104722297, | |
| "loss": 1.4347, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.2657183499288762, | |
| "grad_norm": 0.5575911998748779, | |
| "learning_rate": 0.00015072009968383656, | |
| "loss": 1.6627, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.2662873399715505, | |
| "grad_norm": 0.539851725101471, | |
| "learning_rate": 0.00015052405622253235, | |
| "loss": 1.5648, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.26685633001422476, | |
| "grad_norm": 0.5497231483459473, | |
| "learning_rate": 0.00015032775167637193, | |
| "loss": 1.5671, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.267425320056899, | |
| "grad_norm": 0.5294174551963806, | |
| "learning_rate": 0.00015013118705976602, | |
| "loss": 1.4519, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.26799431009957325, | |
| "grad_norm": 0.5508366227149963, | |
| "learning_rate": 0.00014993436338846925, | |
| "loss": 1.2089, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.2685633001422475, | |
| "grad_norm": 0.530941903591156, | |
| "learning_rate": 0.00014973728167957498, | |
| "loss": 1.2298, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.26913229018492174, | |
| "grad_norm": 0.572995126247406, | |
| "learning_rate": 0.00014953994295150986, | |
| "loss": 1.5102, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.26970128022759604, | |
| "grad_norm": 0.5313156843185425, | |
| "learning_rate": 0.00014934234822402883, | |
| "loss": 1.3345, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.2702702702702703, | |
| "grad_norm": 0.5710895657539368, | |
| "learning_rate": 0.0001491444985182097, | |
| "loss": 1.4461, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.27083926031294453, | |
| "grad_norm": 0.5655211210250854, | |
| "learning_rate": 0.00014894639485644784, | |
| "loss": 1.6591, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.2714082503556188, | |
| "grad_norm": 0.5507573485374451, | |
| "learning_rate": 0.00014874803826245089, | |
| "loss": 1.3442, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.271977240398293, | |
| "grad_norm": 0.5628292560577393, | |
| "learning_rate": 0.00014854942976123367, | |
| "loss": 1.6926, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.27254623044096726, | |
| "grad_norm": 0.5278828740119934, | |
| "learning_rate": 0.00014835057037911268, | |
| "loss": 1.3193, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.27311522048364156, | |
| "grad_norm": 0.550122857093811, | |
| "learning_rate": 0.0001481514611437008, | |
| "loss": 1.4085, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.2736842105263158, | |
| "grad_norm": 0.5174803733825684, | |
| "learning_rate": 0.00014795210308390211, | |
| "loss": 1.2066, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.27425320056899005, | |
| "grad_norm": 0.5421956777572632, | |
| "learning_rate": 0.00014775249722990646, | |
| "loss": 1.4261, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.2748221906116643, | |
| "grad_norm": 0.5158098936080933, | |
| "learning_rate": 0.00014755264461318416, | |
| "loss": 1.277, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.27539118065433854, | |
| "grad_norm": 0.5564343929290771, | |
| "learning_rate": 0.0001473525462664808, | |
| "loss": 1.5075, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.2759601706970128, | |
| "grad_norm": 0.5485411882400513, | |
| "learning_rate": 0.0001471522032238116, | |
| "loss": 1.4847, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.27652916073968703, | |
| "grad_norm": 0.5449703931808472, | |
| "learning_rate": 0.00014695161652045641, | |
| "loss": 1.6162, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.27709815078236133, | |
| "grad_norm": 0.5641449093818665, | |
| "learning_rate": 0.00014675078719295415, | |
| "loss": 1.3614, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.2776671408250356, | |
| "grad_norm": 0.5554978251457214, | |
| "learning_rate": 0.00014654971627909747, | |
| "loss": 1.5019, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.2782361308677098, | |
| "grad_norm": 0.5530039668083191, | |
| "learning_rate": 0.0001463484048179275, | |
| "loss": 1.5116, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.27880512091038406, | |
| "grad_norm": 0.5324894189834595, | |
| "learning_rate": 0.00014614685384972835, | |
| "loss": 1.3575, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.2793741109530583, | |
| "grad_norm": 0.5472353100776672, | |
| "learning_rate": 0.0001459450644160218, | |
| "loss": 1.5364, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.27994310099573255, | |
| "grad_norm": 0.5706241130828857, | |
| "learning_rate": 0.00014574303755956195, | |
| "loss": 1.5958, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.28051209103840685, | |
| "grad_norm": 0.5553603768348694, | |
| "learning_rate": 0.00014554077432432975, | |
| "loss": 1.5664, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.2810810810810811, | |
| "grad_norm": 0.542325496673584, | |
| "learning_rate": 0.00014533827575552766, | |
| "loss": 1.4275, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.28165007112375534, | |
| "grad_norm": 0.6180648803710938, | |
| "learning_rate": 0.00014513554289957424, | |
| "loss": 1.3948, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.2822190611664296, | |
| "grad_norm": 0.6009839177131653, | |
| "learning_rate": 0.0001449325768040987, | |
| "loss": 1.6545, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.28278805120910383, | |
| "grad_norm": 0.58924800157547, | |
| "learning_rate": 0.00014472937851793557, | |
| "loss": 1.3284, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.2833570412517781, | |
| "grad_norm": 0.5391841530799866, | |
| "learning_rate": 0.0001445259490911192, | |
| "loss": 1.3593, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.2839260312944524, | |
| "grad_norm": 0.562134325504303, | |
| "learning_rate": 0.0001443222895748784, | |
| "loss": 1.4458, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.2844950213371266, | |
| "grad_norm": 0.5663224458694458, | |
| "learning_rate": 0.000144118401021631, | |
| "loss": 1.5136, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.28506401137980086, | |
| "grad_norm": 0.5762481689453125, | |
| "learning_rate": 0.00014391428448497825, | |
| "loss": 1.5841, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.2856330014224751, | |
| "grad_norm": 0.5568172931671143, | |
| "learning_rate": 0.00014370994101969967, | |
| "loss": 1.5863, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.28620199146514935, | |
| "grad_norm": 0.5461404323577881, | |
| "learning_rate": 0.00014350537168174738, | |
| "loss": 1.4175, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.2867709815078236, | |
| "grad_norm": 0.5522152781486511, | |
| "learning_rate": 0.00014330057752824068, | |
| "loss": 1.5865, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.28733997155049784, | |
| "grad_norm": 0.5333879590034485, | |
| "learning_rate": 0.00014309555961746067, | |
| "loss": 1.4804, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.28790896159317214, | |
| "grad_norm": 0.5656757354736328, | |
| "learning_rate": 0.00014289031900884463, | |
| "loss": 1.4009, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.2884779516358464, | |
| "grad_norm": 0.55275559425354, | |
| "learning_rate": 0.00014268485676298078, | |
| "loss": 1.3477, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.28904694167852063, | |
| "grad_norm": 0.5528755784034729, | |
| "learning_rate": 0.00014247917394160254, | |
| "loss": 1.6965, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.2896159317211949, | |
| "grad_norm": 0.5423591732978821, | |
| "learning_rate": 0.00014227327160758316, | |
| "loss": 1.3725, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.2901849217638691, | |
| "grad_norm": 0.5610995292663574, | |
| "learning_rate": 0.00014206715082493032, | |
| "loss": 1.5135, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.29075391180654336, | |
| "grad_norm": 0.550565242767334, | |
| "learning_rate": 0.00014186081265878047, | |
| "loss": 1.2824, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.29132290184921766, | |
| "grad_norm": 0.5238208174705505, | |
| "learning_rate": 0.00014165425817539343, | |
| "loss": 1.3519, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.2918918918918919, | |
| "grad_norm": 0.5561342835426331, | |
| "learning_rate": 0.00014144748844214684, | |
| "loss": 1.4381, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.29246088193456615, | |
| "grad_norm": 0.5522477030754089, | |
| "learning_rate": 0.0001412405045275306, | |
| "loss": 1.5873, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.2930298719772404, | |
| "grad_norm": 0.5491191744804382, | |
| "learning_rate": 0.0001410333075011415, | |
| "loss": 1.4527, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.29359886201991464, | |
| "grad_norm": 0.5521331429481506, | |
| "learning_rate": 0.00014082589843367752, | |
| "loss": 1.6342, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.2941678520625889, | |
| "grad_norm": 0.5632197856903076, | |
| "learning_rate": 0.0001406182783969324, | |
| "loss": 1.4758, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.29473684210526313, | |
| "grad_norm": 0.5883782505989075, | |
| "learning_rate": 0.00014041044846379, | |
| "loss": 1.4963, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.29530583214793743, | |
| "grad_norm": 0.5621269941329956, | |
| "learning_rate": 0.00014020240970821893, | |
| "loss": 1.6292, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.2958748221906117, | |
| "grad_norm": 0.5850755572319031, | |
| "learning_rate": 0.00013999416320526685, | |
| "loss": 1.5853, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.2964438122332859, | |
| "grad_norm": 0.5468763113021851, | |
| "learning_rate": 0.00013978571003105502, | |
| "loss": 1.4112, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.29701280227596016, | |
| "grad_norm": 0.5954291820526123, | |
| "learning_rate": 0.00013957705126277253, | |
| "loss": 1.4785, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.2975817923186344, | |
| "grad_norm": 0.5438716411590576, | |
| "learning_rate": 0.00013936818797867102, | |
| "loss": 1.6543, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.29815078236130865, | |
| "grad_norm": 0.5444651246070862, | |
| "learning_rate": 0.00013915912125805893, | |
| "loss": 1.5327, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.29871977240398295, | |
| "grad_norm": 0.5755301117897034, | |
| "learning_rate": 0.00013894985218129602, | |
| "loss": 1.5734, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.2992887624466572, | |
| "grad_norm": 0.5267385244369507, | |
| "learning_rate": 0.0001387403818297876, | |
| "loss": 1.5172, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.29985775248933144, | |
| "grad_norm": 0.5721412301063538, | |
| "learning_rate": 0.00013853071128597924, | |
| "loss": 1.617, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.3004267425320057, | |
| "grad_norm": 0.547497570514679, | |
| "learning_rate": 0.00013832084163335084, | |
| "loss": 1.4242, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.30099573257467993, | |
| "grad_norm": 0.5331338047981262, | |
| "learning_rate": 0.00013811077395641135, | |
| "loss": 1.2921, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.3015647226173542, | |
| "grad_norm": 0.5468523502349854, | |
| "learning_rate": 0.00013790050934069296, | |
| "loss": 1.3264, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.3021337126600285, | |
| "grad_norm": 0.538796067237854, | |
| "learning_rate": 0.00013769004887274547, | |
| "loss": 1.4284, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.3027027027027027, | |
| "grad_norm": 0.5727618932723999, | |
| "learning_rate": 0.0001374793936401309, | |
| "loss": 1.509, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.30327169274537696, | |
| "grad_norm": 0.5127109289169312, | |
| "learning_rate": 0.00013726854473141765, | |
| "loss": 1.3145, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.3038406827880512, | |
| "grad_norm": 0.5412492156028748, | |
| "learning_rate": 0.00013705750323617495, | |
| "loss": 1.4385, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.30440967283072545, | |
| "grad_norm": 0.6073004603385925, | |
| "learning_rate": 0.0001368462702449672, | |
| "loss": 1.585, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.3049786628733997, | |
| "grad_norm": 0.6075984239578247, | |
| "learning_rate": 0.00013663484684934836, | |
| "loss": 1.6782, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.30554765291607394, | |
| "grad_norm": 0.5950874090194702, | |
| "learning_rate": 0.0001364232341418564, | |
| "loss": 1.6634, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.30611664295874824, | |
| "grad_norm": 0.5442619323730469, | |
| "learning_rate": 0.00013621143321600746, | |
| "loss": 1.6321, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.3066856330014225, | |
| "grad_norm": 0.5568251609802246, | |
| "learning_rate": 0.00013599944516629045, | |
| "loss": 1.3718, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.30725462304409673, | |
| "grad_norm": 0.5321120023727417, | |
| "learning_rate": 0.00013578727108816104, | |
| "loss": 1.3387, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.307823613086771, | |
| "grad_norm": 0.6142572164535522, | |
| "learning_rate": 0.00013557491207803635, | |
| "loss": 1.4013, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.3083926031294452, | |
| "grad_norm": 0.5809832811355591, | |
| "learning_rate": 0.0001353623692332891, | |
| "loss": 1.2896, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.30896159317211946, | |
| "grad_norm": 0.5262885689735413, | |
| "learning_rate": 0.00013514964365224206, | |
| "loss": 1.4799, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.30953058321479376, | |
| "grad_norm": 0.5609673261642456, | |
| "learning_rate": 0.00013493673643416218, | |
| "loss": 1.461, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.310099573257468, | |
| "grad_norm": 0.5489050149917603, | |
| "learning_rate": 0.0001347236486792551, | |
| "loss": 1.3912, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.31066856330014225, | |
| "grad_norm": 0.55717533826828, | |
| "learning_rate": 0.0001345103814886593, | |
| "loss": 1.4207, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.3112375533428165, | |
| "grad_norm": 0.5326306819915771, | |
| "learning_rate": 0.00013429693596444067, | |
| "loss": 1.563, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.31180654338549074, | |
| "grad_norm": 0.5783535838127136, | |
| "learning_rate": 0.00013408331320958648, | |
| "loss": 1.4829, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.312375533428165, | |
| "grad_norm": 0.5628453493118286, | |
| "learning_rate": 0.00013386951432799987, | |
| "loss": 1.4815, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.3129445234708393, | |
| "grad_norm": 0.5468215346336365, | |
| "learning_rate": 0.00013365554042449427, | |
| "loss": 1.3575, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.31351351351351353, | |
| "grad_norm": 0.5711040496826172, | |
| "learning_rate": 0.00013344139260478732, | |
| "loss": 1.5833, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.3140825035561878, | |
| "grad_norm": 0.5313072204589844, | |
| "learning_rate": 0.00013322707197549555, | |
| "loss": 1.5447, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.314651493598862, | |
| "grad_norm": 0.6006999015808105, | |
| "learning_rate": 0.00013301257964412844, | |
| "loss": 1.747, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.31522048364153626, | |
| "grad_norm": 0.6007615923881531, | |
| "learning_rate": 0.00013279791671908268, | |
| "loss": 1.5486, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.3157894736842105, | |
| "grad_norm": 0.553854763507843, | |
| "learning_rate": 0.00013258308430963664, | |
| "loss": 1.4473, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.31635846372688475, | |
| "grad_norm": 0.5920282006263733, | |
| "learning_rate": 0.00013236808352594433, | |
| "loss": 1.4883, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.31692745376955905, | |
| "grad_norm": 0.5819621682167053, | |
| "learning_rate": 0.00013215291547903006, | |
| "loss": 1.4925, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.3174964438122333, | |
| "grad_norm": 0.5728132128715515, | |
| "learning_rate": 0.0001319375812807823, | |
| "loss": 1.3921, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.31806543385490754, | |
| "grad_norm": 0.6309751868247986, | |
| "learning_rate": 0.0001317220820439481, | |
| "loss": 1.6893, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.3186344238975818, | |
| "grad_norm": 0.5545490384101868, | |
| "learning_rate": 0.00013150641888212756, | |
| "loss": 1.4053, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.31920341394025603, | |
| "grad_norm": 0.5476984977722168, | |
| "learning_rate": 0.00013129059290976767, | |
| "loss": 1.3499, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.3197724039829303, | |
| "grad_norm": 0.5255653262138367, | |
| "learning_rate": 0.00013107460524215678, | |
| "loss": 1.318, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.3203413940256046, | |
| "grad_norm": 0.649142861366272, | |
| "learning_rate": 0.0001308584569954189, | |
| "loss": 1.6503, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.3209103840682788, | |
| "grad_norm": 0.5934924483299255, | |
| "learning_rate": 0.0001306421492865077, | |
| "loss": 1.5933, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.32147937411095306, | |
| "grad_norm": 0.5277055501937866, | |
| "learning_rate": 0.00013042568323320107, | |
| "loss": 1.4174, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.3220483641536273, | |
| "grad_norm": 0.5566196441650391, | |
| "learning_rate": 0.00013020905995409497, | |
| "loss": 1.4713, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.32261735419630155, | |
| "grad_norm": 0.5719363689422607, | |
| "learning_rate": 0.00012999228056859784, | |
| "loss": 1.5238, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.3231863442389758, | |
| "grad_norm": 0.5720301866531372, | |
| "learning_rate": 0.00012977534619692494, | |
| "loss": 1.5374, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.3237553342816501, | |
| "grad_norm": 0.5727265477180481, | |
| "learning_rate": 0.0001295582579600923, | |
| "loss": 1.4789, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.32432432432432434, | |
| "grad_norm": 0.5553936958312988, | |
| "learning_rate": 0.00012934101697991115, | |
| "loss": 1.2535, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.3248933143669986, | |
| "grad_norm": 0.5490901470184326, | |
| "learning_rate": 0.00012912362437898192, | |
| "loss": 1.4513, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.32546230440967283, | |
| "grad_norm": 0.5691761374473572, | |
| "learning_rate": 0.0001289060812806886, | |
| "loss": 1.5947, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.3260312944523471, | |
| "grad_norm": 0.5883947610855103, | |
| "learning_rate": 0.00012868838880919294, | |
| "loss": 1.3175, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.3266002844950213, | |
| "grad_norm": 0.5340852737426758, | |
| "learning_rate": 0.00012847054808942847, | |
| "loss": 1.1903, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.32716927453769556, | |
| "grad_norm": 0.5509372353553772, | |
| "learning_rate": 0.0001282525602470949, | |
| "loss": 1.5289, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.32773826458036986, | |
| "grad_norm": 0.5860341191291809, | |
| "learning_rate": 0.00012803442640865208, | |
| "loss": 1.6618, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.3283072546230441, | |
| "grad_norm": 0.540502667427063, | |
| "learning_rate": 0.00012781614770131442, | |
| "loss": 1.5062, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.32887624466571835, | |
| "grad_norm": 0.5500742793083191, | |
| "learning_rate": 0.00012759772525304492, | |
| "loss": 1.6137, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.3294452347083926, | |
| "grad_norm": 0.550717830657959, | |
| "learning_rate": 0.00012737916019254933, | |
| "loss": 1.6204, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.33001422475106684, | |
| "grad_norm": 0.5424780249595642, | |
| "learning_rate": 0.00012716045364927035, | |
| "loss": 1.3499, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.3305832147937411, | |
| "grad_norm": 0.5449280142784119, | |
| "learning_rate": 0.0001269416067533818, | |
| "loss": 1.518, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.3311522048364154, | |
| "grad_norm": 0.5500824451446533, | |
| "learning_rate": 0.0001267226206357828, | |
| "loss": 1.6019, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.33172119487908963, | |
| "grad_norm": 0.5455232262611389, | |
| "learning_rate": 0.00012650349642809197, | |
| "loss": 1.5048, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.3322901849217639, | |
| "grad_norm": 0.5600374937057495, | |
| "learning_rate": 0.00012628423526264134, | |
| "loss": 1.4539, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.3328591749644381, | |
| "grad_norm": 0.5611444115638733, | |
| "learning_rate": 0.0001260648382724708, | |
| "loss": 1.4871, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.33342816500711236, | |
| "grad_norm": 0.5722511410713196, | |
| "learning_rate": 0.00012584530659132215, | |
| "loss": 1.4491, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.3339971550497866, | |
| "grad_norm": 0.5913495421409607, | |
| "learning_rate": 0.00012562564135363313, | |
| "loss": 1.136, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.3345661450924609, | |
| "grad_norm": 0.578739583492279, | |
| "learning_rate": 0.00012540584369453162, | |
| "loss": 1.3503, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.33513513513513515, | |
| "grad_norm": 0.5618348717689514, | |
| "learning_rate": 0.00012518591474982985, | |
| "loss": 1.5827, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.3357041251778094, | |
| "grad_norm": 0.5958595871925354, | |
| "learning_rate": 0.00012496585565601853, | |
| "loss": 1.6305, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.33627311522048364, | |
| "grad_norm": 0.5362867116928101, | |
| "learning_rate": 0.00012474566755026073, | |
| "loss": 1.416, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.3368421052631579, | |
| "grad_norm": 0.5598848462104797, | |
| "learning_rate": 0.00012452535157038641, | |
| "loss": 1.4456, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.33741109530583213, | |
| "grad_norm": 0.5422506332397461, | |
| "learning_rate": 0.00012430490885488617, | |
| "loss": 1.3472, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.3379800853485064, | |
| "grad_norm": 0.5901892781257629, | |
| "learning_rate": 0.00012408434054290561, | |
| "loss": 1.5748, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.3385490753911807, | |
| "grad_norm": 0.5219245553016663, | |
| "learning_rate": 0.00012386364777423932, | |
| "loss": 1.3369, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.3391180654338549, | |
| "grad_norm": 0.5885049104690552, | |
| "learning_rate": 0.00012364283168932495, | |
| "loss": 1.5212, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.33968705547652916, | |
| "grad_norm": 0.5666311383247375, | |
| "learning_rate": 0.0001234218934292376, | |
| "loss": 1.5041, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.3402560455192034, | |
| "grad_norm": 0.6065592765808105, | |
| "learning_rate": 0.0001232008341356835, | |
| "loss": 1.5489, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.34082503556187765, | |
| "grad_norm": 0.6251218914985657, | |
| "learning_rate": 0.0001229796549509944, | |
| "loss": 1.5043, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.3413940256045519, | |
| "grad_norm": 0.562077522277832, | |
| "learning_rate": 0.00012275835701812163, | |
| "loss": 1.547, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3419630156472262, | |
| "grad_norm": 0.5375682711601257, | |
| "learning_rate": 0.00012253694148063013, | |
| "loss": 1.3999, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.34253200568990044, | |
| "grad_norm": 0.583003044128418, | |
| "learning_rate": 0.0001223154094826925, | |
| "loss": 1.641, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.3431009957325747, | |
| "grad_norm": 0.619719922542572, | |
| "learning_rate": 0.00012209376216908328, | |
| "loss": 1.5772, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.34366998577524893, | |
| "grad_norm": 0.5548385977745056, | |
| "learning_rate": 0.00012187200068517277, | |
| "loss": 1.4802, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.3442389758179232, | |
| "grad_norm": 0.5717220902442932, | |
| "learning_rate": 0.00012165012617692143, | |
| "loss": 1.533, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.3448079658605974, | |
| "grad_norm": 0.5915637016296387, | |
| "learning_rate": 0.00012142813979087356, | |
| "loss": 1.4618, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.34537695590327167, | |
| "grad_norm": 0.5780906081199646, | |
| "learning_rate": 0.00012120604267415172, | |
| "loss": 1.428, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.34594594594594597, | |
| "grad_norm": 0.6107869744300842, | |
| "learning_rate": 0.0001209838359744507, | |
| "loss": 1.6056, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.3465149359886202, | |
| "grad_norm": 0.5807276368141174, | |
| "learning_rate": 0.0001207615208400315, | |
| "loss": 1.4344, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.34708392603129445, | |
| "grad_norm": 0.5761096477508545, | |
| "learning_rate": 0.00012053909841971547, | |
| "loss": 1.6409, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.3476529160739687, | |
| "grad_norm": 0.5648180246353149, | |
| "learning_rate": 0.00012031656986287835, | |
| "loss": 1.5207, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.34822190611664294, | |
| "grad_norm": 0.5846616625785828, | |
| "learning_rate": 0.00012009393631944439, | |
| "loss": 1.709, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.3487908961593172, | |
| "grad_norm": 0.5779747366905212, | |
| "learning_rate": 0.00011987119893988035, | |
| "loss": 1.5626, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.3493598862019915, | |
| "grad_norm": 0.5634474158287048, | |
| "learning_rate": 0.00011964835887518955, | |
| "loss": 1.645, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.34992887624466573, | |
| "grad_norm": 0.5536413788795471, | |
| "learning_rate": 0.00011942541727690593, | |
| "loss": 1.4927, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.35049786628734, | |
| "grad_norm": 0.5312451720237732, | |
| "learning_rate": 0.00011920237529708811, | |
| "loss": 1.3328, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.3510668563300142, | |
| "grad_norm": 0.5960412621498108, | |
| "learning_rate": 0.00011897923408831346, | |
| "loss": 1.5827, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.35163584637268847, | |
| "grad_norm": 0.598399817943573, | |
| "learning_rate": 0.00011875599480367215, | |
| "loss": 1.5477, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.3522048364153627, | |
| "grad_norm": 0.517993688583374, | |
| "learning_rate": 0.00011853265859676108, | |
| "loss": 1.3741, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.352773826458037, | |
| "grad_norm": 0.5564917922019958, | |
| "learning_rate": 0.00011830922662167803, | |
| "loss": 1.3112, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.35334281650071125, | |
| "grad_norm": 0.5626814961433411, | |
| "learning_rate": 0.00011808570003301566, | |
| "loss": 1.5272, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.3539118065433855, | |
| "grad_norm": 0.6245387196540833, | |
| "learning_rate": 0.00011786207998585559, | |
| "loss": 1.433, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.35448079658605974, | |
| "grad_norm": 0.5711420178413391, | |
| "learning_rate": 0.00011763836763576237, | |
| "loss": 1.4975, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.355049786628734, | |
| "grad_norm": 0.5550587177276611, | |
| "learning_rate": 0.00011741456413877749, | |
| "loss": 1.3973, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.35561877667140823, | |
| "grad_norm": 0.583817183971405, | |
| "learning_rate": 0.00011719067065141352, | |
| "loss": 1.4535, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.3561877667140825, | |
| "grad_norm": 0.5912776589393616, | |
| "learning_rate": 0.00011696668833064795, | |
| "loss": 1.5161, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.3567567567567568, | |
| "grad_norm": 0.615287184715271, | |
| "learning_rate": 0.0001167426183339174, | |
| "loss": 1.6331, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.357325746799431, | |
| "grad_norm": 0.5431495308876038, | |
| "learning_rate": 0.00011651846181911161, | |
| "loss": 1.5279, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.35789473684210527, | |
| "grad_norm": 0.5510687232017517, | |
| "learning_rate": 0.00011629421994456723, | |
| "loss": 1.5859, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.3584637268847795, | |
| "grad_norm": 0.5746335983276367, | |
| "learning_rate": 0.0001160698938690622, | |
| "loss": 1.4053, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.35903271692745375, | |
| "grad_norm": 0.5783334374427795, | |
| "learning_rate": 0.00011584548475180943, | |
| "loss": 1.6259, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.359601706970128, | |
| "grad_norm": 0.5857696533203125, | |
| "learning_rate": 0.00011562099375245108, | |
| "loss": 1.4625, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.3601706970128023, | |
| "grad_norm": 0.580596387386322, | |
| "learning_rate": 0.00011539642203105232, | |
| "loss": 1.511, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.36073968705547654, | |
| "grad_norm": 0.5730242729187012, | |
| "learning_rate": 0.00011517177074809546, | |
| "loss": 1.6307, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.3613086770981508, | |
| "grad_norm": 0.567469596862793, | |
| "learning_rate": 0.0001149470410644741, | |
| "loss": 1.5477, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.36187766714082503, | |
| "grad_norm": 0.5704171061515808, | |
| "learning_rate": 0.00011472223414148675, | |
| "loss": 1.4716, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.3624466571834993, | |
| "grad_norm": 0.5398246645927429, | |
| "learning_rate": 0.00011449735114083127, | |
| "loss": 1.6304, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.3630156472261735, | |
| "grad_norm": 0.5576680898666382, | |
| "learning_rate": 0.0001142723932245985, | |
| "loss": 1.4775, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.3635846372688478, | |
| "grad_norm": 0.5728341341018677, | |
| "learning_rate": 0.00011404736155526645, | |
| "loss": 1.6101, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.36415362731152207, | |
| "grad_norm": 0.54744553565979, | |
| "learning_rate": 0.00011382225729569436, | |
| "loss": 1.2536, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.3647226173541963, | |
| "grad_norm": 0.5593659281730652, | |
| "learning_rate": 0.00011359708160911641, | |
| "loss": 1.4138, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.36529160739687055, | |
| "grad_norm": 0.5415304899215698, | |
| "learning_rate": 0.00011337183565913599, | |
| "loss": 1.5221, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.3658605974395448, | |
| "grad_norm": 0.5653886198997498, | |
| "learning_rate": 0.00011314652060971955, | |
| "loss": 1.5221, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.36642958748221904, | |
| "grad_norm": 0.5842243432998657, | |
| "learning_rate": 0.00011292113762519061, | |
| "loss": 1.501, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.3669985775248933, | |
| "grad_norm": 0.5919954180717468, | |
| "learning_rate": 0.00011269568787022376, | |
| "loss": 1.5444, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.3675675675675676, | |
| "grad_norm": 0.5867476463317871, | |
| "learning_rate": 0.00011247017250983865, | |
| "loss": 1.4897, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.36813655761024183, | |
| "grad_norm": 0.5661168098449707, | |
| "learning_rate": 0.00011224459270939384, | |
| "loss": 1.3373, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.3687055476529161, | |
| "grad_norm": 0.5516852736473083, | |
| "learning_rate": 0.00011201894963458106, | |
| "loss": 1.6209, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.3692745376955903, | |
| "grad_norm": 0.615533709526062, | |
| "learning_rate": 0.00011179324445141883, | |
| "loss": 1.369, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.36984352773826457, | |
| "grad_norm": 0.5543255805969238, | |
| "learning_rate": 0.00011156747832624679, | |
| "loss": 1.3172, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.3704125177809388, | |
| "grad_norm": 0.5759336352348328, | |
| "learning_rate": 0.00011134165242571938, | |
| "loss": 1.5896, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.3709815078236131, | |
| "grad_norm": 0.5587149858474731, | |
| "learning_rate": 0.00011111576791679994, | |
| "loss": 1.5963, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.37155049786628735, | |
| "grad_norm": 0.5666396617889404, | |
| "learning_rate": 0.00011088982596675475, | |
| "loss": 1.5253, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.3721194879089616, | |
| "grad_norm": 0.5888431668281555, | |
| "learning_rate": 0.00011066382774314683, | |
| "loss": 1.4419, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.37268847795163584, | |
| "grad_norm": 0.5519063472747803, | |
| "learning_rate": 0.00011043777441383006, | |
| "loss": 1.5396, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.3732574679943101, | |
| "grad_norm": 0.5812383890151978, | |
| "learning_rate": 0.00011021166714694297, | |
| "loss": 1.2045, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.37382645803698433, | |
| "grad_norm": 0.5881744623184204, | |
| "learning_rate": 0.000109985507110903, | |
| "loss": 1.4078, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.37439544807965863, | |
| "grad_norm": 0.5681930184364319, | |
| "learning_rate": 0.00010975929547440016, | |
| "loss": 1.4739, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.3749644381223329, | |
| "grad_norm": 0.5596330165863037, | |
| "learning_rate": 0.0001095330334063911, | |
| "loss": 1.4085, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.3755334281650071, | |
| "grad_norm": 0.5785601139068604, | |
| "learning_rate": 0.00010930672207609306, | |
| "loss": 1.4087, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.37610241820768137, | |
| "grad_norm": 0.5467891097068787, | |
| "learning_rate": 0.00010908036265297794, | |
| "loss": 1.6924, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.3766714082503556, | |
| "grad_norm": 0.5449764132499695, | |
| "learning_rate": 0.00010885395630676607, | |
| "loss": 1.5254, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.37724039829302985, | |
| "grad_norm": 0.5570394396781921, | |
| "learning_rate": 0.00010862750420742031, | |
| "loss": 1.4218, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.3778093883357041, | |
| "grad_norm": 0.5946861505508423, | |
| "learning_rate": 0.00010840100752513996, | |
| "loss": 1.6474, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.3783783783783784, | |
| "grad_norm": 0.545051097869873, | |
| "learning_rate": 0.00010817446743035462, | |
| "loss": 1.459, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.37894736842105264, | |
| "grad_norm": 0.5713635683059692, | |
| "learning_rate": 0.00010794788509371829, | |
| "loss": 1.44, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.3795163584637269, | |
| "grad_norm": 0.5865978598594666, | |
| "learning_rate": 0.00010772126168610325, | |
| "loss": 1.5968, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.38008534850640113, | |
| "grad_norm": 0.5625496506690979, | |
| "learning_rate": 0.00010749459837859408, | |
| "loss": 1.4018, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.3806543385490754, | |
| "grad_norm": 0.5960560441017151, | |
| "learning_rate": 0.00010726789634248137, | |
| "loss": 1.5808, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.3812233285917496, | |
| "grad_norm": 0.6137279868125916, | |
| "learning_rate": 0.00010704115674925604, | |
| "loss": 1.212, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.3817923186344239, | |
| "grad_norm": 0.5478764772415161, | |
| "learning_rate": 0.00010681438077060291, | |
| "loss": 1.4701, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.38236130867709817, | |
| "grad_norm": 0.6135146021842957, | |
| "learning_rate": 0.000106587569578395, | |
| "loss": 1.5428, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.3829302987197724, | |
| "grad_norm": 0.5707561373710632, | |
| "learning_rate": 0.00010636072434468714, | |
| "loss": 1.5299, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.38349928876244666, | |
| "grad_norm": 0.529769778251648, | |
| "learning_rate": 0.00010613384624171016, | |
| "loss": 1.4161, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.3840682788051209, | |
| "grad_norm": 0.5672623515129089, | |
| "learning_rate": 0.00010590693644186474, | |
| "loss": 1.5084, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.38463726884779514, | |
| "grad_norm": 0.5277720093727112, | |
| "learning_rate": 0.00010567999611771528, | |
| "loss": 1.2255, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.38520625889046944, | |
| "grad_norm": 0.5478918552398682, | |
| "learning_rate": 0.00010545302644198405, | |
| "loss": 1.3878, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.3857752489331437, | |
| "grad_norm": 0.5412498712539673, | |
| "learning_rate": 0.00010522602858754487, | |
| "loss": 1.5586, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.38634423897581793, | |
| "grad_norm": 0.5770754814147949, | |
| "learning_rate": 0.00010499900372741718, | |
| "loss": 1.3127, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.3869132290184922, | |
| "grad_norm": 0.5917402505874634, | |
| "learning_rate": 0.00010477195303476011, | |
| "loss": 1.3799, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.3874822190611664, | |
| "grad_norm": 0.5400240421295166, | |
| "learning_rate": 0.00010454487768286612, | |
| "loss": 1.2999, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.38805120910384067, | |
| "grad_norm": 0.5468504428863525, | |
| "learning_rate": 0.00010431777884515514, | |
| "loss": 1.3114, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.3886201991465149, | |
| "grad_norm": 0.5608039498329163, | |
| "learning_rate": 0.00010409065769516856, | |
| "loss": 1.3888, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.3891891891891892, | |
| "grad_norm": 0.5961167216300964, | |
| "learning_rate": 0.00010386351540656292, | |
| "loss": 1.5431, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.38975817923186346, | |
| "grad_norm": 0.5718376040458679, | |
| "learning_rate": 0.00010363635315310414, | |
| "loss": 1.521, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.3903271692745377, | |
| "grad_norm": 0.5798651576042175, | |
| "learning_rate": 0.00010340917210866118, | |
| "loss": 1.519, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.39089615931721194, | |
| "grad_norm": 0.5611982941627502, | |
| "learning_rate": 0.00010318197344720018, | |
| "loss": 1.499, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.3914651493598862, | |
| "grad_norm": 0.571074366569519, | |
| "learning_rate": 0.00010295475834277831, | |
| "loss": 1.4738, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.39203413940256043, | |
| "grad_norm": 0.5722329020500183, | |
| "learning_rate": 0.00010272752796953766, | |
| "loss": 1.6584, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.39260312944523473, | |
| "grad_norm": 0.5674881935119629, | |
| "learning_rate": 0.00010250028350169931, | |
| "loss": 1.5507, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.393172119487909, | |
| "grad_norm": 0.5546680688858032, | |
| "learning_rate": 0.00010227302611355712, | |
| "loss": 1.297, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.3937411095305832, | |
| "grad_norm": 0.5614904165267944, | |
| "learning_rate": 0.00010204575697947168, | |
| "loss": 1.4416, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.39431009957325747, | |
| "grad_norm": 0.5829195380210876, | |
| "learning_rate": 0.00010181847727386433, | |
| "loss": 1.5031, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.3948790896159317, | |
| "grad_norm": 0.5744046568870544, | |
| "learning_rate": 0.00010159118817121105, | |
| "loss": 1.4576, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.39544807965860596, | |
| "grad_norm": 0.572902262210846, | |
| "learning_rate": 0.00010136389084603637, | |
| "loss": 1.5078, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.3960170697012802, | |
| "grad_norm": 0.5696277618408203, | |
| "learning_rate": 0.00010113658647290723, | |
| "loss": 1.4636, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.3960170697012802, | |
| "eval_loss": 1.4791862964630127, | |
| "eval_runtime": 15.3322, | |
| "eval_samples_per_second": 48.265, | |
| "eval_steps_per_second": 24.132, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.3965860597439545, | |
| "grad_norm": 0.5474138855934143, | |
| "learning_rate": 0.0001009092762264271, | |
| "loss": 1.4683, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.39715504978662874, | |
| "grad_norm": 0.6160016059875488, | |
| "learning_rate": 0.00010068196128122975, | |
| "loss": 1.6705, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.397724039829303, | |
| "grad_norm": 0.5745415687561035, | |
| "learning_rate": 0.00010045464281197327, | |
| "loss": 1.5104, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.39829302987197723, | |
| "grad_norm": 0.5802525281906128, | |
| "learning_rate": 0.0001002273219933339, | |
| "loss": 1.4029, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.3988620199146515, | |
| "grad_norm": 0.5592519044876099, | |
| "learning_rate": 0.0001, | |
| "loss": 1.6325, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.3994310099573257, | |
| "grad_norm": 0.6051873564720154, | |
| "learning_rate": 9.977267800666613e-05, | |
| "loss": 1.688, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.5836036205291748, | |
| "learning_rate": 9.954535718802675e-05, | |
| "loss": 1.3107, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.40056899004267427, | |
| "grad_norm": 0.5733322501182556, | |
| "learning_rate": 9.931803871877028e-05, | |
| "loss": 1.7469, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.4011379800853485, | |
| "grad_norm": 0.5718969106674194, | |
| "learning_rate": 9.909072377357294e-05, | |
| "loss": 1.3822, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.40170697012802276, | |
| "grad_norm": 0.5877561569213867, | |
| "learning_rate": 9.88634135270928e-05, | |
| "loss": 1.6344, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.402275960170697, | |
| "grad_norm": 0.5636436939239502, | |
| "learning_rate": 9.863610915396365e-05, | |
| "loss": 1.5552, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.40284495021337124, | |
| "grad_norm": 0.5809296369552612, | |
| "learning_rate": 9.840881182878895e-05, | |
| "loss": 1.3633, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.40341394025604554, | |
| "grad_norm": 0.5500168204307556, | |
| "learning_rate": 9.81815227261357e-05, | |
| "loss": 1.4063, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.4039829302987198, | |
| "grad_norm": 0.5806904435157776, | |
| "learning_rate": 9.795424302052836e-05, | |
| "loss": 1.5629, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.40455192034139403, | |
| "grad_norm": 0.5868257880210876, | |
| "learning_rate": 9.77269738864429e-05, | |
| "loss": 1.3655, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.4051209103840683, | |
| "grad_norm": 0.5417432188987732, | |
| "learning_rate": 9.749971649830071e-05, | |
| "loss": 1.4914, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.4056899004267425, | |
| "grad_norm": 0.6012546420097351, | |
| "learning_rate": 9.727247203046234e-05, | |
| "loss": 1.5365, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.40625889046941677, | |
| "grad_norm": 0.5691578388214111, | |
| "learning_rate": 9.704524165722174e-05, | |
| "loss": 1.5959, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.406827880512091, | |
| "grad_norm": 0.5487850904464722, | |
| "learning_rate": 9.681802655279986e-05, | |
| "loss": 1.4469, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.4073968705547653, | |
| "grad_norm": 0.6280918121337891, | |
| "learning_rate": 9.659082789133884e-05, | |
| "loss": 1.338, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.40796586059743956, | |
| "grad_norm": 0.5909377932548523, | |
| "learning_rate": 9.63636468468959e-05, | |
| "loss": 1.6272, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.4085348506401138, | |
| "grad_norm": 0.6044595837593079, | |
| "learning_rate": 9.613648459343708e-05, | |
| "loss": 1.5717, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.40910384068278804, | |
| "grad_norm": 0.5833640098571777, | |
| "learning_rate": 9.590934230483149e-05, | |
| "loss": 1.4213, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.4096728307254623, | |
| "grad_norm": 0.6057854890823364, | |
| "learning_rate": 9.568222115484488e-05, | |
| "loss": 1.4861, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.41024182076813653, | |
| "grad_norm": 0.5813032984733582, | |
| "learning_rate": 9.54551223171339e-05, | |
| "loss": 1.5329, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.41081081081081083, | |
| "grad_norm": 0.5498741865158081, | |
| "learning_rate": 9.522804696523991e-05, | |
| "loss": 1.4457, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.4113798008534851, | |
| "grad_norm": 0.5357645750045776, | |
| "learning_rate": 9.500099627258282e-05, | |
| "loss": 1.2792, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.4119487908961593, | |
| "grad_norm": 0.5478993654251099, | |
| "learning_rate": 9.477397141245519e-05, | |
| "loss": 1.5071, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.41251778093883357, | |
| "grad_norm": 0.5776642560958862, | |
| "learning_rate": 9.454697355801598e-05, | |
| "loss": 1.3664, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.4130867709815078, | |
| "grad_norm": 0.6283994913101196, | |
| "learning_rate": 9.432000388228473e-05, | |
| "loss": 1.3994, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.41365576102418206, | |
| "grad_norm": 0.6153956651687622, | |
| "learning_rate": 9.409306355813529e-05, | |
| "loss": 1.2524, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.41422475106685636, | |
| "grad_norm": 0.5952728986740112, | |
| "learning_rate": 9.386615375828984e-05, | |
| "loss": 1.5941, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.4147937411095306, | |
| "grad_norm": 0.5799689292907715, | |
| "learning_rate": 9.36392756553129e-05, | |
| "loss": 1.3113, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.41536273115220484, | |
| "grad_norm": 0.5933107733726501, | |
| "learning_rate": 9.341243042160503e-05, | |
| "loss": 1.6378, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.4159317211948791, | |
| "grad_norm": 0.5808780789375305, | |
| "learning_rate": 9.318561922939711e-05, | |
| "loss": 1.663, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.41650071123755333, | |
| "grad_norm": 0.5661304593086243, | |
| "learning_rate": 9.295884325074398e-05, | |
| "loss": 1.4145, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.4170697012802276, | |
| "grad_norm": 0.577038049697876, | |
| "learning_rate": 9.273210365751862e-05, | |
| "loss": 1.4288, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.4176386913229018, | |
| "grad_norm": 0.5904839038848877, | |
| "learning_rate": 9.250540162140597e-05, | |
| "loss": 1.5257, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.4182076813655761, | |
| "grad_norm": 0.5645294785499573, | |
| "learning_rate": 9.227873831389677e-05, | |
| "loss": 1.4073, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.41877667140825037, | |
| "grad_norm": 0.5541549921035767, | |
| "learning_rate": 9.205211490628173e-05, | |
| "loss": 1.3965, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.4193456614509246, | |
| "grad_norm": 0.6137387752532959, | |
| "learning_rate": 9.18255325696454e-05, | |
| "loss": 1.3849, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.41991465149359886, | |
| "grad_norm": 0.588316798210144, | |
| "learning_rate": 9.159899247486004e-05, | |
| "loss": 1.4989, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.4204836415362731, | |
| "grad_norm": 0.567848265171051, | |
| "learning_rate": 9.13724957925797e-05, | |
| "loss": 1.4989, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.42105263157894735, | |
| "grad_norm": 0.554695188999176, | |
| "learning_rate": 9.114604369323395e-05, | |
| "loss": 1.4509, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.42162162162162165, | |
| "grad_norm": 0.5507339835166931, | |
| "learning_rate": 9.091963734702208e-05, | |
| "loss": 1.364, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.4221906116642959, | |
| "grad_norm": 0.569786012172699, | |
| "learning_rate": 9.069327792390695e-05, | |
| "loss": 1.5775, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.42275960170697013, | |
| "grad_norm": 0.563234806060791, | |
| "learning_rate": 9.046696659360894e-05, | |
| "loss": 1.4557, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.4233285917496444, | |
| "grad_norm": 0.5537723302841187, | |
| "learning_rate": 9.024070452559986e-05, | |
| "loss": 1.443, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.4238975817923186, | |
| "grad_norm": 0.5786699056625366, | |
| "learning_rate": 9.001449288909702e-05, | |
| "loss": 1.2683, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.42446657183499287, | |
| "grad_norm": 0.55182945728302, | |
| "learning_rate": 8.978833285305705e-05, | |
| "loss": 1.4565, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.42503556187766717, | |
| "grad_norm": 0.5818150043487549, | |
| "learning_rate": 8.956222558616998e-05, | |
| "loss": 1.6502, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.4256045519203414, | |
| "grad_norm": 0.6044638752937317, | |
| "learning_rate": 8.933617225685319e-05, | |
| "loss": 1.4631, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.42617354196301566, | |
| "grad_norm": 0.568188488483429, | |
| "learning_rate": 8.91101740332453e-05, | |
| "loss": 1.5997, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.4267425320056899, | |
| "grad_norm": 0.5530648231506348, | |
| "learning_rate": 8.888423208320008e-05, | |
| "loss": 1.174, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.42731152204836415, | |
| "grad_norm": 0.5782289505004883, | |
| "learning_rate": 8.865834757428064e-05, | |
| "loss": 1.5198, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.4278805120910384, | |
| "grad_norm": 0.5685307383537292, | |
| "learning_rate": 8.843252167375322e-05, | |
| "loss": 1.5545, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.42844950213371263, | |
| "grad_norm": 0.5832937359809875, | |
| "learning_rate": 8.820675554858115e-05, | |
| "loss": 1.5776, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.42901849217638693, | |
| "grad_norm": 0.6279184818267822, | |
| "learning_rate": 8.7981050365419e-05, | |
| "loss": 1.5975, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.4295874822190612, | |
| "grad_norm": 0.5440697073936462, | |
| "learning_rate": 8.775540729060618e-05, | |
| "loss": 1.3772, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.4301564722617354, | |
| "grad_norm": 0.6341460347175598, | |
| "learning_rate": 8.752982749016139e-05, | |
| "loss": 1.573, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.43072546230440967, | |
| "grad_norm": 0.5840321779251099, | |
| "learning_rate": 8.730431212977625e-05, | |
| "loss": 1.5051, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.4312944523470839, | |
| "grad_norm": 0.5965592265129089, | |
| "learning_rate": 8.70788623748094e-05, | |
| "loss": 1.5323, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.43186344238975816, | |
| "grad_norm": 0.5905702710151672, | |
| "learning_rate": 8.68534793902805e-05, | |
| "loss": 1.4051, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.43243243243243246, | |
| "grad_norm": 0.5640906691551208, | |
| "learning_rate": 8.662816434086404e-05, | |
| "loss": 1.6614, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.4330014224751067, | |
| "grad_norm": 0.5574825406074524, | |
| "learning_rate": 8.64029183908836e-05, | |
| "loss": 1.3464, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.43357041251778095, | |
| "grad_norm": 0.5866842865943909, | |
| "learning_rate": 8.617774270430566e-05, | |
| "loss": 1.4531, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.4341394025604552, | |
| "grad_norm": 0.6260978579521179, | |
| "learning_rate": 8.595263844473353e-05, | |
| "loss": 1.4005, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.43470839260312943, | |
| "grad_norm": 0.5732872486114502, | |
| "learning_rate": 8.572760677540154e-05, | |
| "loss": 1.366, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.4352773826458037, | |
| "grad_norm": 0.5682139992713928, | |
| "learning_rate": 8.550264885916877e-05, | |
| "loss": 1.359, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.435846372688478, | |
| "grad_norm": 0.5898922085762024, | |
| "learning_rate": 8.527776585851328e-05, | |
| "loss": 1.5197, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.4364153627311522, | |
| "grad_norm": 0.5902604460716248, | |
| "learning_rate": 8.505295893552594e-05, | |
| "loss": 1.4844, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.43698435277382647, | |
| "grad_norm": 0.6057772040367126, | |
| "learning_rate": 8.482822925190452e-05, | |
| "loss": 1.5739, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.4375533428165007, | |
| "grad_norm": 0.5546793341636658, | |
| "learning_rate": 8.460357796894773e-05, | |
| "loss": 1.4748, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.43812233285917496, | |
| "grad_norm": 0.5493602156639099, | |
| "learning_rate": 8.437900624754895e-05, | |
| "loss": 1.3922, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.4386913229018492, | |
| "grad_norm": 0.5499581098556519, | |
| "learning_rate": 8.415451524819058e-05, | |
| "loss": 1.574, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.43926031294452345, | |
| "grad_norm": 0.5515440702438354, | |
| "learning_rate": 8.393010613093781e-05, | |
| "loss": 1.3672, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.43982930298719775, | |
| "grad_norm": 0.5613058805465698, | |
| "learning_rate": 8.370578005543278e-05, | |
| "loss": 1.3815, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.440398293029872, | |
| "grad_norm": 0.5643707513809204, | |
| "learning_rate": 8.348153818088844e-05, | |
| "loss": 1.5947, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.44096728307254623, | |
| "grad_norm": 0.6310828924179077, | |
| "learning_rate": 8.325738166608263e-05, | |
| "loss": 1.5413, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.4415362731152205, | |
| "grad_norm": 0.6655511856079102, | |
| "learning_rate": 8.303331166935209e-05, | |
| "loss": 1.5198, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.4421052631578947, | |
| "grad_norm": 0.5539633631706238, | |
| "learning_rate": 8.280932934858652e-05, | |
| "loss": 1.4308, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.44267425320056897, | |
| "grad_norm": 0.5974248647689819, | |
| "learning_rate": 8.25854358612225e-05, | |
| "loss": 1.537, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.44324324324324327, | |
| "grad_norm": 0.5987525582313538, | |
| "learning_rate": 8.236163236423767e-05, | |
| "loss": 1.5318, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.4438122332859175, | |
| "grad_norm": 0.5623188018798828, | |
| "learning_rate": 8.213792001414445e-05, | |
| "loss": 1.6016, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.44438122332859176, | |
| "grad_norm": 0.5642153024673462, | |
| "learning_rate": 8.191429996698436e-05, | |
| "loss": 1.4452, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.444950213371266, | |
| "grad_norm": 0.6042040586471558, | |
| "learning_rate": 8.1690773378322e-05, | |
| "loss": 1.6325, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.44551920341394025, | |
| "grad_norm": 0.5777531862258911, | |
| "learning_rate": 8.146734140323896e-05, | |
| "loss": 1.6388, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.4460881934566145, | |
| "grad_norm": 0.5600481629371643, | |
| "learning_rate": 8.124400519632788e-05, | |
| "loss": 1.5077, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.4466571834992888, | |
| "grad_norm": 0.5644223690032959, | |
| "learning_rate": 8.102076591168655e-05, | |
| "loss": 1.4056, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.44722617354196303, | |
| "grad_norm": 0.6023853421211243, | |
| "learning_rate": 8.079762470291191e-05, | |
| "loss": 1.713, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.4477951635846373, | |
| "grad_norm": 0.5626102685928345, | |
| "learning_rate": 8.05745827230941e-05, | |
| "loss": 1.5125, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.4483641536273115, | |
| "grad_norm": 0.5824998617172241, | |
| "learning_rate": 8.035164112481048e-05, | |
| "loss": 1.4695, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.44893314366998577, | |
| "grad_norm": 0.5714951157569885, | |
| "learning_rate": 8.01288010601197e-05, | |
| "loss": 1.4452, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.44950213371266, | |
| "grad_norm": 0.5934897065162659, | |
| "learning_rate": 7.990606368055564e-05, | |
| "loss": 1.5389, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.45007112375533426, | |
| "grad_norm": 0.5794687867164612, | |
| "learning_rate": 7.968343013712167e-05, | |
| "loss": 1.4127, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.45064011379800856, | |
| "grad_norm": 0.5628656148910522, | |
| "learning_rate": 7.946090158028455e-05, | |
| "loss": 1.4798, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.4512091038406828, | |
| "grad_norm": 0.5794563293457031, | |
| "learning_rate": 7.923847915996851e-05, | |
| "loss": 1.5584, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.45177809388335705, | |
| "grad_norm": 0.5685121417045593, | |
| "learning_rate": 7.901616402554933e-05, | |
| "loss": 1.51, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.4523470839260313, | |
| "grad_norm": 0.568209171295166, | |
| "learning_rate": 7.87939573258483e-05, | |
| "loss": 1.5588, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.45291607396870553, | |
| "grad_norm": 0.5683977603912354, | |
| "learning_rate": 7.857186020912647e-05, | |
| "loss": 1.4482, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.4534850640113798, | |
| "grad_norm": 0.5802903771400452, | |
| "learning_rate": 7.834987382307861e-05, | |
| "loss": 1.5827, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.4540540540540541, | |
| "grad_norm": 0.5780924558639526, | |
| "learning_rate": 7.812799931482721e-05, | |
| "loss": 1.4595, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.4546230440967283, | |
| "grad_norm": 0.5929847359657288, | |
| "learning_rate": 7.790623783091677e-05, | |
| "loss": 1.5512, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.45519203413940257, | |
| "grad_norm": 0.5519236326217651, | |
| "learning_rate": 7.768459051730752e-05, | |
| "loss": 1.4239, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.4557610241820768, | |
| "grad_norm": 0.5426004528999329, | |
| "learning_rate": 7.74630585193699e-05, | |
| "loss": 1.3005, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.45633001422475106, | |
| "grad_norm": 0.6065943241119385, | |
| "learning_rate": 7.724164298187838e-05, | |
| "loss": 1.3966, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.4568990042674253, | |
| "grad_norm": 0.5971605777740479, | |
| "learning_rate": 7.70203450490056e-05, | |
| "loss": 1.5944, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.45746799431009955, | |
| "grad_norm": 0.5548596978187561, | |
| "learning_rate": 7.679916586431654e-05, | |
| "loss": 1.4323, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.45803698435277385, | |
| "grad_norm": 0.5478107929229736, | |
| "learning_rate": 7.657810657076243e-05, | |
| "loss": 1.3819, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.4586059743954481, | |
| "grad_norm": 0.5837447047233582, | |
| "learning_rate": 7.635716831067505e-05, | |
| "loss": 1.3941, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.45917496443812233, | |
| "grad_norm": 0.5920546650886536, | |
| "learning_rate": 7.613635222576072e-05, | |
| "loss": 1.5395, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.4597439544807966, | |
| "grad_norm": 0.6047683358192444, | |
| "learning_rate": 7.59156594570944e-05, | |
| "loss": 1.4169, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.4603129445234708, | |
| "grad_norm": 0.5774646401405334, | |
| "learning_rate": 7.569509114511386e-05, | |
| "loss": 1.5108, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.46088193456614507, | |
| "grad_norm": 0.5855366587638855, | |
| "learning_rate": 7.547464842961362e-05, | |
| "loss": 1.6545, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.46145092460881937, | |
| "grad_norm": 0.5752539038658142, | |
| "learning_rate": 7.52543324497393e-05, | |
| "loss": 1.6431, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.4620199146514936, | |
| "grad_norm": 0.5689989328384399, | |
| "learning_rate": 7.503414434398151e-05, | |
| "loss": 1.2883, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.46258890469416786, | |
| "grad_norm": 0.6341901421546936, | |
| "learning_rate": 7.481408525017013e-05, | |
| "loss": 1.4223, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.4631578947368421, | |
| "grad_norm": 0.6005092263221741, | |
| "learning_rate": 7.459415630546842e-05, | |
| "loss": 1.5522, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.46372688477951635, | |
| "grad_norm": 0.6249240636825562, | |
| "learning_rate": 7.437435864636691e-05, | |
| "loss": 1.5459, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.4642958748221906, | |
| "grad_norm": 0.5745651125907898, | |
| "learning_rate": 7.415469340867787e-05, | |
| "loss": 1.6287, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.4648648648648649, | |
| "grad_norm": 0.5915263891220093, | |
| "learning_rate": 7.393516172752919e-05, | |
| "loss": 1.4738, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.46543385490753914, | |
| "grad_norm": 0.5895527601242065, | |
| "learning_rate": 7.371576473735867e-05, | |
| "loss": 1.6939, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.4660028449502134, | |
| "grad_norm": 0.5770692825317383, | |
| "learning_rate": 7.349650357190807e-05, | |
| "loss": 1.4264, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.4665718349928876, | |
| "grad_norm": 0.6085241436958313, | |
| "learning_rate": 7.327737936421721e-05, | |
| "loss": 1.5019, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.46714082503556187, | |
| "grad_norm": 0.5652032494544983, | |
| "learning_rate": 7.305839324661823e-05, | |
| "loss": 1.3324, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.4677098150782361, | |
| "grad_norm": 0.5609267950057983, | |
| "learning_rate": 7.283954635072968e-05, | |
| "loss": 1.3902, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.46827880512091036, | |
| "grad_norm": 0.5592348575592041, | |
| "learning_rate": 7.262083980745069e-05, | |
| "loss": 1.4362, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.46884779516358466, | |
| "grad_norm": 0.5790618658065796, | |
| "learning_rate": 7.240227474695509e-05, | |
| "loss": 1.4753, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.4694167852062589, | |
| "grad_norm": 0.5804809927940369, | |
| "learning_rate": 7.218385229868559e-05, | |
| "loss": 1.2719, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.46998577524893315, | |
| "grad_norm": 0.5487887859344482, | |
| "learning_rate": 7.196557359134794e-05, | |
| "loss": 1.3212, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.4705547652916074, | |
| "grad_norm": 0.5842025876045227, | |
| "learning_rate": 7.174743975290513e-05, | |
| "loss": 1.5622, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.47112375533428164, | |
| "grad_norm": 0.580644428730011, | |
| "learning_rate": 7.152945191057154e-05, | |
| "loss": 1.4567, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.4716927453769559, | |
| "grad_norm": 0.5735095739364624, | |
| "learning_rate": 7.131161119080712e-05, | |
| "loss": 1.4547, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.4722617354196302, | |
| "grad_norm": 0.5592243671417236, | |
| "learning_rate": 7.109391871931142e-05, | |
| "loss": 1.3144, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.4728307254623044, | |
| "grad_norm": 0.581495463848114, | |
| "learning_rate": 7.087637562101813e-05, | |
| "loss": 1.5145, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.47339971550497867, | |
| "grad_norm": 0.5653107762336731, | |
| "learning_rate": 7.065898302008886e-05, | |
| "loss": 1.388, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.4739687055476529, | |
| "grad_norm": 0.5776169300079346, | |
| "learning_rate": 7.04417420399077e-05, | |
| "loss": 1.5059, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.47453769559032716, | |
| "grad_norm": 0.556419312953949, | |
| "learning_rate": 7.02246538030751e-05, | |
| "loss": 1.3933, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.4751066856330014, | |
| "grad_norm": 0.5605750679969788, | |
| "learning_rate": 7.000771943140218e-05, | |
| "loss": 1.4677, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.4756756756756757, | |
| "grad_norm": 0.5609278678894043, | |
| "learning_rate": 6.979094004590507e-05, | |
| "loss": 1.4526, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.47624466571834995, | |
| "grad_norm": 0.5990177392959595, | |
| "learning_rate": 6.957431676679896e-05, | |
| "loss": 1.6215, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.4768136557610242, | |
| "grad_norm": 0.5737520456314087, | |
| "learning_rate": 6.935785071349228e-05, | |
| "loss": 1.3985, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.47738264580369844, | |
| "grad_norm": 0.5521170496940613, | |
| "learning_rate": 6.914154300458115e-05, | |
| "loss": 1.6527, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.4779516358463727, | |
| "grad_norm": 0.5809024572372437, | |
| "learning_rate": 6.892539475784326e-05, | |
| "loss": 1.5697, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.4785206258890469, | |
| "grad_norm": 0.6158897876739502, | |
| "learning_rate": 6.870940709023237e-05, | |
| "loss": 1.48, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.47908961593172117, | |
| "grad_norm": 0.5950735807418823, | |
| "learning_rate": 6.849358111787246e-05, | |
| "loss": 1.3335, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.47965860597439547, | |
| "grad_norm": 0.5788929462432861, | |
| "learning_rate": 6.82779179560519e-05, | |
| "loss": 1.4746, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.4802275960170697, | |
| "grad_norm": 0.6169467568397522, | |
| "learning_rate": 6.806241871921777e-05, | |
| "loss": 1.2997, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.48079658605974396, | |
| "grad_norm": 0.5850261449813843, | |
| "learning_rate": 6.784708452096998e-05, | |
| "loss": 1.2293, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.4813655761024182, | |
| "grad_norm": 0.5514947772026062, | |
| "learning_rate": 6.763191647405568e-05, | |
| "loss": 1.3825, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.48193456614509245, | |
| "grad_norm": 0.5753430128097534, | |
| "learning_rate": 6.741691569036338e-05, | |
| "loss": 1.5195, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.4825035561877667, | |
| "grad_norm": 0.5876197814941406, | |
| "learning_rate": 6.720208328091732e-05, | |
| "loss": 1.4453, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.483072546230441, | |
| "grad_norm": 0.5744032859802246, | |
| "learning_rate": 6.69874203558716e-05, | |
| "loss": 1.4914, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.48364153627311524, | |
| "grad_norm": 0.5800637006759644, | |
| "learning_rate": 6.677292802450447e-05, | |
| "loss": 1.4932, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.4842105263157895, | |
| "grad_norm": 0.5554024577140808, | |
| "learning_rate": 6.655860739521271e-05, | |
| "loss": 1.1795, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.4847795163584637, | |
| "grad_norm": 0.5711913704872131, | |
| "learning_rate": 6.634445957550577e-05, | |
| "loss": 1.486, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.48534850640113797, | |
| "grad_norm": 0.5684107542037964, | |
| "learning_rate": 6.613048567200013e-05, | |
| "loss": 1.3984, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.4859174964438122, | |
| "grad_norm": 0.5672001242637634, | |
| "learning_rate": 6.591668679041359e-05, | |
| "loss": 1.4811, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.4864864864864865, | |
| "grad_norm": 0.5804989337921143, | |
| "learning_rate": 6.570306403555937e-05, | |
| "loss": 1.3624, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.48705547652916076, | |
| "grad_norm": 0.6067745089530945, | |
| "learning_rate": 6.548961851134072e-05, | |
| "loss": 1.4192, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.487624466571835, | |
| "grad_norm": 0.576329231262207, | |
| "learning_rate": 6.527635132074493e-05, | |
| "loss": 1.6314, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.48819345661450925, | |
| "grad_norm": 0.5863393545150757, | |
| "learning_rate": 6.506326356583781e-05, | |
| "loss": 1.5669, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.4887624466571835, | |
| "grad_norm": 0.6074771285057068, | |
| "learning_rate": 6.485035634775796e-05, | |
| "loss": 1.3334, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.48933143669985774, | |
| "grad_norm": 0.5837851166725159, | |
| "learning_rate": 6.463763076671091e-05, | |
| "loss": 1.607, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.489900426742532, | |
| "grad_norm": 0.5989742875099182, | |
| "learning_rate": 6.442508792196369e-05, | |
| "loss": 1.4518, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.4904694167852063, | |
| "grad_norm": 0.5692201852798462, | |
| "learning_rate": 6.4212728911839e-05, | |
| "loss": 1.3878, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.4910384068278805, | |
| "grad_norm": 0.6134719252586365, | |
| "learning_rate": 6.400055483370957e-05, | |
| "loss": 1.5154, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.49160739687055477, | |
| "grad_norm": 0.5494038462638855, | |
| "learning_rate": 6.378856678399255e-05, | |
| "loss": 1.2968, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.492176386913229, | |
| "grad_norm": 0.5780492424964905, | |
| "learning_rate": 6.357676585814366e-05, | |
| "loss": 1.5766, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.49274537695590326, | |
| "grad_norm": 0.5398704409599304, | |
| "learning_rate": 6.336515315065168e-05, | |
| "loss": 1.4446, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.4933143669985775, | |
| "grad_norm": 0.5509852170944214, | |
| "learning_rate": 6.315372975503285e-05, | |
| "loss": 1.4465, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.4938833570412518, | |
| "grad_norm": 0.6671035885810852, | |
| "learning_rate": 6.294249676382508e-05, | |
| "loss": 1.706, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.49445234708392605, | |
| "grad_norm": 0.579408586025238, | |
| "learning_rate": 6.273145526858236e-05, | |
| "loss": 1.5695, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.4950213371266003, | |
| "grad_norm": 0.571058988571167, | |
| "learning_rate": 6.252060635986911e-05, | |
| "loss": 1.3541, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.49559032716927454, | |
| "grad_norm": 0.5792422890663147, | |
| "learning_rate": 6.230995112725454e-05, | |
| "loss": 1.4329, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.4961593172119488, | |
| "grad_norm": 0.5893927216529846, | |
| "learning_rate": 6.209949065930706e-05, | |
| "loss": 1.4674, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.496728307254623, | |
| "grad_norm": 0.5954142212867737, | |
| "learning_rate": 6.188922604358865e-05, | |
| "loss": 1.4462, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.4972972972972973, | |
| "grad_norm": 0.6741952896118164, | |
| "learning_rate": 6.16791583666492e-05, | |
| "loss": 1.6458, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.49786628733997157, | |
| "grad_norm": 0.6125763654708862, | |
| "learning_rate": 6.146928871402081e-05, | |
| "loss": 1.5387, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.4984352773826458, | |
| "grad_norm": 0.5839952230453491, | |
| "learning_rate": 6.12596181702124e-05, | |
| "loss": 1.6821, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.49900426742532006, | |
| "grad_norm": 0.5859706401824951, | |
| "learning_rate": 6.1050147818704e-05, | |
| "loss": 1.4713, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.4995732574679943, | |
| "grad_norm": 0.5910811424255371, | |
| "learning_rate": 6.0840878741941057e-05, | |
| "loss": 1.59, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.5001422475106686, | |
| "grad_norm": 0.6297405958175659, | |
| "learning_rate": 6.063181202132901e-05, | |
| "loss": 1.5881, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.5007112375533428, | |
| "grad_norm": 0.5714183449745178, | |
| "learning_rate": 6.0422948737227504e-05, | |
| "loss": 1.5894, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.5012802275960171, | |
| "grad_norm": 0.5969492197036743, | |
| "learning_rate": 6.0214289968945004e-05, | |
| "loss": 1.6697, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.5018492176386913, | |
| "grad_norm": 0.5817530155181885, | |
| "learning_rate": 6.000583679473315e-05, | |
| "loss": 1.5806, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.5024182076813656, | |
| "grad_norm": 0.5869944095611572, | |
| "learning_rate": 5.979759029178107e-05, | |
| "loss": 1.4565, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.5029871977240399, | |
| "grad_norm": 0.5745888948440552, | |
| "learning_rate": 5.958955153621004e-05, | |
| "loss": 1.5645, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.5035561877667141, | |
| "grad_norm": 0.549628734588623, | |
| "learning_rate": 5.938172160306765e-05, | |
| "loss": 1.5017, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.5041251778093884, | |
| "grad_norm": 0.5471094250679016, | |
| "learning_rate": 5.9174101566322504e-05, | |
| "loss": 1.2781, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.5046941678520626, | |
| "grad_norm": 0.5772054195404053, | |
| "learning_rate": 5.896669249885851e-05, | |
| "loss": 1.386, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.5052631578947369, | |
| "grad_norm": 0.6143761873245239, | |
| "learning_rate": 5.875949547246939e-05, | |
| "loss": 1.5432, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.505832147937411, | |
| "grad_norm": 0.5768917202949524, | |
| "learning_rate": 5.8552511557853204e-05, | |
| "loss": 1.6945, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.5064011379800853, | |
| "grad_norm": 0.5644556283950806, | |
| "learning_rate": 5.8345741824606617e-05, | |
| "loss": 1.5163, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.5069701280227596, | |
| "grad_norm": 0.6083329916000366, | |
| "learning_rate": 5.813918734121955e-05, | |
| "loss": 1.7979, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.5075391180654338, | |
| "grad_norm": 0.5543102025985718, | |
| "learning_rate": 5.7932849175069705e-05, | |
| "loss": 1.5558, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.5081081081081081, | |
| "grad_norm": 0.6090741753578186, | |
| "learning_rate": 5.7726728392416874e-05, | |
| "loss": 1.6233, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.5086770981507823, | |
| "grad_norm": 0.556496798992157, | |
| "learning_rate": 5.7520826058397525e-05, | |
| "loss": 1.5755, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.5092460881934566, | |
| "grad_norm": 0.6258504986763, | |
| "learning_rate": 5.731514323701927e-05, | |
| "loss": 1.6054, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.5098150782361308, | |
| "grad_norm": 0.6283307671546936, | |
| "learning_rate": 5.7109680991155364e-05, | |
| "loss": 1.8276, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.5103840682788051, | |
| "grad_norm": 0.5817832946777344, | |
| "learning_rate": 5.690444038253935e-05, | |
| "loss": 1.6388, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.5109530583214794, | |
| "grad_norm": 0.5892955660820007, | |
| "learning_rate": 5.669942247175933e-05, | |
| "loss": 1.2641, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.5115220483641536, | |
| "grad_norm": 0.5834968686103821, | |
| "learning_rate": 5.649462831825265e-05, | |
| "loss": 1.4207, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.5120910384068279, | |
| "grad_norm": 0.5753495693206787, | |
| "learning_rate": 5.629005898030035e-05, | |
| "loss": 1.4724, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.5126600284495021, | |
| "grad_norm": 0.6050419211387634, | |
| "learning_rate": 5.608571551502175e-05, | |
| "loss": 1.7189, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.5132290184921764, | |
| "grad_norm": 0.5946124196052551, | |
| "learning_rate": 5.588159897836902e-05, | |
| "loss": 1.3803, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.5137980085348507, | |
| "grad_norm": 0.5731397867202759, | |
| "learning_rate": 5.56777104251216e-05, | |
| "loss": 1.7426, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.5143669985775249, | |
| "grad_norm": 0.5813397169113159, | |
| "learning_rate": 5.5474050908880814e-05, | |
| "loss": 1.4898, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.5149359886201992, | |
| "grad_norm": 0.5610973834991455, | |
| "learning_rate": 5.5270621482064465e-05, | |
| "loss": 1.4937, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.5155049786628734, | |
| "grad_norm": 0.5550079941749573, | |
| "learning_rate": 5.50674231959013e-05, | |
| "loss": 1.3543, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.5160739687055477, | |
| "grad_norm": 0.596593976020813, | |
| "learning_rate": 5.4864457100425783e-05, | |
| "loss": 1.5856, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.5166429587482219, | |
| "grad_norm": 0.6018926501274109, | |
| "learning_rate": 5.4661724244472355e-05, | |
| "loss": 1.5092, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.5172119487908962, | |
| "grad_norm": 0.6650524735450745, | |
| "learning_rate": 5.4459225675670264e-05, | |
| "loss": 1.7059, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.5177809388335705, | |
| "grad_norm": 0.5858013033866882, | |
| "learning_rate": 5.425696244043807e-05, | |
| "loss": 1.4591, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.5183499288762446, | |
| "grad_norm": 0.555473268032074, | |
| "learning_rate": 5.405493558397824e-05, | |
| "loss": 1.401, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.518918918918919, | |
| "grad_norm": 0.6246885061264038, | |
| "learning_rate": 5.385314615027168e-05, | |
| "loss": 1.4415, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.5194879089615931, | |
| "grad_norm": 0.608062207698822, | |
| "learning_rate": 5.365159518207252e-05, | |
| "loss": 1.4239, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.5200568990042674, | |
| "grad_norm": 0.5979565382003784, | |
| "learning_rate": 5.345028372090256e-05, | |
| "loss": 1.4656, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.5206258890469416, | |
| "grad_norm": 0.6553084254264832, | |
| "learning_rate": 5.324921280704589e-05, | |
| "loss": 1.4609, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.5211948790896159, | |
| "grad_norm": 0.5839146971702576, | |
| "learning_rate": 5.304838347954363e-05, | |
| "loss": 1.5546, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.5217638691322902, | |
| "grad_norm": 0.5618466734886169, | |
| "learning_rate": 5.284779677618841e-05, | |
| "loss": 1.4078, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.5223328591749644, | |
| "grad_norm": 0.6020224690437317, | |
| "learning_rate": 5.264745373351923e-05, | |
| "loss": 1.568, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.5229018492176387, | |
| "grad_norm": 0.6049513220787048, | |
| "learning_rate": 5.244735538681584e-05, | |
| "loss": 1.3196, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.5234708392603129, | |
| "grad_norm": 0.5781171917915344, | |
| "learning_rate": 5.224750277009358e-05, | |
| "loss": 1.5366, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.5240398293029872, | |
| "grad_norm": 0.6478269696235657, | |
| "learning_rate": 5.204789691609793e-05, | |
| "loss": 1.5281, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.5246088193456615, | |
| "grad_norm": 0.598915696144104, | |
| "learning_rate": 5.184853885629921e-05, | |
| "loss": 1.5734, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.5251778093883357, | |
| "grad_norm": 0.589694619178772, | |
| "learning_rate": 5.1649429620887334e-05, | |
| "loss": 1.4307, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.52574679943101, | |
| "grad_norm": 0.5483283996582031, | |
| "learning_rate": 5.145057023876634e-05, | |
| "loss": 1.4334, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.5263157894736842, | |
| "grad_norm": 0.5908382534980774, | |
| "learning_rate": 5.125196173754914e-05, | |
| "loss": 1.588, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.5268847795163585, | |
| "grad_norm": 0.5898739695549011, | |
| "learning_rate": 5.105360514355222e-05, | |
| "loss": 1.5685, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.5274537695590327, | |
| "grad_norm": 0.6079673171043396, | |
| "learning_rate": 5.0855501481790305e-05, | |
| "loss": 1.4421, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.528022759601707, | |
| "grad_norm": 0.5824552178382874, | |
| "learning_rate": 5.0657651775971146e-05, | |
| "loss": 1.3472, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.5285917496443813, | |
| "grad_norm": 0.5997583866119385, | |
| "learning_rate": 5.046005704849015e-05, | |
| "loss": 1.6292, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.5291607396870555, | |
| "grad_norm": 0.5740709900856018, | |
| "learning_rate": 5.026271832042506e-05, | |
| "loss": 1.4085, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.5297297297297298, | |
| "grad_norm": 0.5683955550193787, | |
| "learning_rate": 5.0065636611530767e-05, | |
| "loss": 1.4722, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.530298719772404, | |
| "grad_norm": 0.5909097790718079, | |
| "learning_rate": 4.986881294023397e-05, | |
| "loss": 1.5688, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.5308677098150782, | |
| "grad_norm": 0.5723986029624939, | |
| "learning_rate": 4.967224832362807e-05, | |
| "loss": 1.718, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.5314366998577524, | |
| "grad_norm": 0.6397773623466492, | |
| "learning_rate": 4.947594377746769e-05, | |
| "loss": 1.5896, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.5320056899004267, | |
| "grad_norm": 0.6130902171134949, | |
| "learning_rate": 4.9279900316163466e-05, | |
| "loss": 1.5974, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.532574679943101, | |
| "grad_norm": 0.5888193845748901, | |
| "learning_rate": 4.908411895277704e-05, | |
| "loss": 1.569, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.5331436699857752, | |
| "grad_norm": 0.5966805219650269, | |
| "learning_rate": 4.8888600699015496e-05, | |
| "loss": 1.4014, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.5337126600284495, | |
| "grad_norm": 0.6131336092948914, | |
| "learning_rate": 4.869334656522644e-05, | |
| "loss": 1.5619, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.5342816500711237, | |
| "grad_norm": 0.5846887826919556, | |
| "learning_rate": 4.849835756039254e-05, | |
| "loss": 1.5674, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.534850640113798, | |
| "grad_norm": 0.5879199504852295, | |
| "learning_rate": 4.830363469212631e-05, | |
| "loss": 1.6148, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.5354196301564723, | |
| "grad_norm": 0.6081675887107849, | |
| "learning_rate": 4.8109178966665194e-05, | |
| "loss": 1.5329, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.5359886201991465, | |
| "grad_norm": 0.5982802510261536, | |
| "learning_rate": 4.791499138886603e-05, | |
| "loss": 1.5198, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.5365576102418208, | |
| "grad_norm": 0.5899128913879395, | |
| "learning_rate": 4.7721072962199975e-05, | |
| "loss": 1.331, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.537126600284495, | |
| "grad_norm": 0.6289139986038208, | |
| "learning_rate": 4.7527424688747535e-05, | |
| "loss": 1.3543, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.5376955903271693, | |
| "grad_norm": 0.5747124552726746, | |
| "learning_rate": 4.733404756919287e-05, | |
| "loss": 1.2679, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.5382645803698435, | |
| "grad_norm": 0.5888437032699585, | |
| "learning_rate": 4.7140942602819236e-05, | |
| "loss": 1.3506, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.5388335704125178, | |
| "grad_norm": 0.6044580936431885, | |
| "learning_rate": 4.694811078750338e-05, | |
| "loss": 1.5955, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.5394025604551921, | |
| "grad_norm": 0.6149877905845642, | |
| "learning_rate": 4.6755553119710524e-05, | |
| "loss": 1.5836, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.5399715504978663, | |
| "grad_norm": 0.6135841012001038, | |
| "learning_rate": 4.656327059448937e-05, | |
| "loss": 1.4659, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.5405405405405406, | |
| "grad_norm": 0.5868760943412781, | |
| "learning_rate": 4.637126420546653e-05, | |
| "loss": 1.3821, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.5411095305832148, | |
| "grad_norm": 0.6109480261802673, | |
| "learning_rate": 4.6179534944842e-05, | |
| "loss": 1.5173, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.5416785206258891, | |
| "grad_norm": 0.6133657693862915, | |
| "learning_rate": 4.5988083803383464e-05, | |
| "loss": 1.6325, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.5422475106685632, | |
| "grad_norm": 0.593211829662323, | |
| "learning_rate": 4.57969117704215e-05, | |
| "loss": 1.361, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.5428165007112375, | |
| "grad_norm": 0.5881854891777039, | |
| "learning_rate": 4.560601983384447e-05, | |
| "loss": 1.3796, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.5433854907539118, | |
| "grad_norm": 0.65924471616745, | |
| "learning_rate": 4.5415408980093096e-05, | |
| "loss": 1.5899, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.543954480796586, | |
| "grad_norm": 0.6201925277709961, | |
| "learning_rate": 4.522508019415587e-05, | |
| "loss": 1.536, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.5445234708392603, | |
| "grad_norm": 0.5619149208068848, | |
| "learning_rate": 4.50350344595635e-05, | |
| "loss": 1.3624, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.5450924608819345, | |
| "grad_norm": 0.5680489540100098, | |
| "learning_rate": 4.484527275838404e-05, | |
| "loss": 1.4247, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.5456614509246088, | |
| "grad_norm": 0.5449238419532776, | |
| "learning_rate": 4.4655796071217937e-05, | |
| "loss": 1.3423, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.5462304409672831, | |
| "grad_norm": 0.6032193899154663, | |
| "learning_rate": 4.446660537719256e-05, | |
| "loss": 1.6294, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.5467994310099573, | |
| "grad_norm": 0.5516905784606934, | |
| "learning_rate": 4.427770165395766e-05, | |
| "loss": 1.3738, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.5473684210526316, | |
| "grad_norm": 0.6235291361808777, | |
| "learning_rate": 4.4089085877679904e-05, | |
| "loss": 1.4602, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.5479374110953058, | |
| "grad_norm": 0.6051345467567444, | |
| "learning_rate": 4.3900759023037974e-05, | |
| "loss": 1.3761, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.5485064011379801, | |
| "grad_norm": 0.5858922600746155, | |
| "learning_rate": 4.3712722063217693e-05, | |
| "loss": 1.5158, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.5490753911806543, | |
| "grad_norm": 0.5914279818534851, | |
| "learning_rate": 4.3524975969906636e-05, | |
| "loss": 1.3333, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.5496443812233286, | |
| "grad_norm": 0.5849418044090271, | |
| "learning_rate": 4.3337521713289407e-05, | |
| "loss": 1.5459, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.5502133712660029, | |
| "grad_norm": 0.5740037560462952, | |
| "learning_rate": 4.315036026204262e-05, | |
| "loss": 1.3858, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.5507823613086771, | |
| "grad_norm": 0.5611101984977722, | |
| "learning_rate": 4.296349258332967e-05, | |
| "loss": 1.3895, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.5513513513513514, | |
| "grad_norm": 0.585473895072937, | |
| "learning_rate": 4.277691964279594e-05, | |
| "loss": 1.2682, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.5519203413940256, | |
| "grad_norm": 0.6113364100456238, | |
| "learning_rate": 4.259064240456374e-05, | |
| "loss": 1.4292, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.5524893314366999, | |
| "grad_norm": 0.6335917115211487, | |
| "learning_rate": 4.2404661831227276e-05, | |
| "loss": 1.4529, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.5530583214793741, | |
| "grad_norm": 0.574226975440979, | |
| "learning_rate": 4.2218978883847835e-05, | |
| "loss": 1.5254, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.5536273115220484, | |
| "grad_norm": 0.5865671038627625, | |
| "learning_rate": 4.203359452194863e-05, | |
| "loss": 1.5265, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.5541963015647227, | |
| "grad_norm": 0.5852011442184448, | |
| "learning_rate": 4.184850970350992e-05, | |
| "loss": 1.5834, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.5547652916073968, | |
| "grad_norm": 0.6045235395431519, | |
| "learning_rate": 4.166372538496408e-05, | |
| "loss": 1.3905, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.5553342816500711, | |
| "grad_norm": 0.558691143989563, | |
| "learning_rate": 4.147924252119063e-05, | |
| "loss": 1.5088, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.5559032716927453, | |
| "grad_norm": 0.5623577237129211, | |
| "learning_rate": 4.129506206551138e-05, | |
| "loss": 1.3502, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.5564722617354196, | |
| "grad_norm": 0.5946846604347229, | |
| "learning_rate": 4.1111184969685354e-05, | |
| "loss": 1.3884, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.5570412517780939, | |
| "grad_norm": 0.5882412195205688, | |
| "learning_rate": 4.0927612183903976e-05, | |
| "loss": 1.542, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.5576102418207681, | |
| "grad_norm": 0.577912449836731, | |
| "learning_rate": 4.0744344656786124e-05, | |
| "loss": 1.324, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.5581792318634424, | |
| "grad_norm": 0.5644152164459229, | |
| "learning_rate": 4.056138333537326e-05, | |
| "loss": 1.2746, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.5587482219061166, | |
| "grad_norm": 0.6058292984962463, | |
| "learning_rate": 4.037872916512455e-05, | |
| "loss": 1.5404, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.5593172119487909, | |
| "grad_norm": 0.6061570644378662, | |
| "learning_rate": 4.019638308991189e-05, | |
| "loss": 1.3896, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.5598862019914651, | |
| "grad_norm": 0.6102644205093384, | |
| "learning_rate": 4.0014346052015114e-05, | |
| "loss": 1.5365, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.5604551920341394, | |
| "grad_norm": 0.5747568011283875, | |
| "learning_rate": 3.983261899211708e-05, | |
| "loss": 1.4337, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.5610241820768137, | |
| "grad_norm": 0.5756990909576416, | |
| "learning_rate": 3.965120284929878e-05, | |
| "loss": 1.4752, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.5615931721194879, | |
| "grad_norm": 0.570568323135376, | |
| "learning_rate": 3.947009856103465e-05, | |
| "loss": 1.4064, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.5621621621621622, | |
| "grad_norm": 0.6102871298789978, | |
| "learning_rate": 3.928930706318752e-05, | |
| "loss": 1.5697, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.5627311522048364, | |
| "grad_norm": 0.555619478225708, | |
| "learning_rate": 3.910882929000387e-05, | |
| "loss": 1.2905, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.5633001422475107, | |
| "grad_norm": 0.6053213477134705, | |
| "learning_rate": 3.892866617410901e-05, | |
| "loss": 1.4823, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.5638691322901849, | |
| "grad_norm": 0.5635027289390564, | |
| "learning_rate": 3.874881864650224e-05, | |
| "loss": 1.2325, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.5644381223328592, | |
| "grad_norm": 0.6095726490020752, | |
| "learning_rate": 3.8569287636552024e-05, | |
| "loss": 1.5359, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.5650071123755335, | |
| "grad_norm": 0.5644766092300415, | |
| "learning_rate": 3.839007407199129e-05, | |
| "loss": 1.277, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.5655761024182077, | |
| "grad_norm": 0.5609472393989563, | |
| "learning_rate": 3.821117887891249e-05, | |
| "loss": 1.2394, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.566145092460882, | |
| "grad_norm": 0.6164161562919617, | |
| "learning_rate": 3.803260298176288e-05, | |
| "loss": 1.5458, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.5667140825035561, | |
| "grad_norm": 0.6040405631065369, | |
| "learning_rate": 3.7854347303339754e-05, | |
| "loss": 1.2356, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.5672830725462304, | |
| "grad_norm": 0.6196702718734741, | |
| "learning_rate": 3.767641276478563e-05, | |
| "loss": 1.5923, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.5678520625889047, | |
| "grad_norm": 0.5526005029678345, | |
| "learning_rate": 3.749880028558364e-05, | |
| "loss": 1.5057, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.5684210526315789, | |
| "grad_norm": 0.5806797742843628, | |
| "learning_rate": 3.732151078355253e-05, | |
| "loss": 1.5355, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.5689900426742532, | |
| "grad_norm": 0.5680354237556458, | |
| "learning_rate": 3.7144545174842115e-05, | |
| "loss": 1.4381, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5695590327169274, | |
| "grad_norm": 0.5921180248260498, | |
| "learning_rate": 3.6967904373928475e-05, | |
| "loss": 1.3444, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.5701280227596017, | |
| "grad_norm": 0.5849342942237854, | |
| "learning_rate": 3.6791589293609184e-05, | |
| "loss": 1.3836, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.5706970128022759, | |
| "grad_norm": 0.5548643469810486, | |
| "learning_rate": 3.661560084499874e-05, | |
| "loss": 1.4809, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 0.5712660028449502, | |
| "grad_norm": 0.5976467132568359, | |
| "learning_rate": 3.64399399375237e-05, | |
| "loss": 1.4543, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.5718349928876245, | |
| "grad_norm": 0.588699996471405, | |
| "learning_rate": 3.6264607478918037e-05, | |
| "loss": 1.4448, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.5724039829302987, | |
| "grad_norm": 0.5786314606666565, | |
| "learning_rate": 3.608960437521844e-05, | |
| "loss": 1.769, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.572972972972973, | |
| "grad_norm": 0.6124690771102905, | |
| "learning_rate": 3.591493153075966e-05, | |
| "loss": 1.6527, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 0.5735419630156472, | |
| "grad_norm": 0.5587359070777893, | |
| "learning_rate": 3.5740589848169894e-05, | |
| "loss": 1.2819, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.5741109530583215, | |
| "grad_norm": 0.6170410513877869, | |
| "learning_rate": 3.556658022836594e-05, | |
| "loss": 1.5858, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 0.5746799431009957, | |
| "grad_norm": 0.5927881002426147, | |
| "learning_rate": 3.5392903570548694e-05, | |
| "loss": 1.6321, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.57524893314367, | |
| "grad_norm": 0.5902583599090576, | |
| "learning_rate": 3.521956077219847e-05, | |
| "loss": 1.5162, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 0.5758179231863443, | |
| "grad_norm": 0.6113704442977905, | |
| "learning_rate": 3.504655272907028e-05, | |
| "loss": 1.6929, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.5763869132290185, | |
| "grad_norm": 0.5586623549461365, | |
| "learning_rate": 3.4873880335189427e-05, | |
| "loss": 1.3555, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 0.5769559032716928, | |
| "grad_norm": 0.5992634296417236, | |
| "learning_rate": 3.470154448284659e-05, | |
| "loss": 1.6901, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.577524893314367, | |
| "grad_norm": 0.5722742676734924, | |
| "learning_rate": 3.452954606259343e-05, | |
| "loss": 1.386, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.5780938833570413, | |
| "grad_norm": 0.6090911030769348, | |
| "learning_rate": 3.435788596323789e-05, | |
| "loss": 1.528, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.5786628733997156, | |
| "grad_norm": 0.5943465828895569, | |
| "learning_rate": 3.41865650718396e-05, | |
| "loss": 1.4567, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 0.5792318634423897, | |
| "grad_norm": 0.5948119163513184, | |
| "learning_rate": 3.4015584273705425e-05, | |
| "loss": 1.4926, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.579800853485064, | |
| "grad_norm": 0.6115890741348267, | |
| "learning_rate": 3.384494445238471e-05, | |
| "loss": 1.4113, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 0.5803698435277382, | |
| "grad_norm": 0.5682458281517029, | |
| "learning_rate": 3.367464648966471e-05, | |
| "loss": 1.514, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.5809388335704125, | |
| "grad_norm": 0.5994877219200134, | |
| "learning_rate": 3.350469126556627e-05, | |
| "loss": 1.495, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 0.5815078236130867, | |
| "grad_norm": 0.5887535810470581, | |
| "learning_rate": 3.333507965833905e-05, | |
| "loss": 1.6428, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.582076813655761, | |
| "grad_norm": 0.5758301615715027, | |
| "learning_rate": 3.316581254445701e-05, | |
| "loss": 1.4076, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.5826458036984353, | |
| "grad_norm": 0.6117954850196838, | |
| "learning_rate": 3.299689079861408e-05, | |
| "loss": 1.4471, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.5832147937411095, | |
| "grad_norm": 0.6079879999160767, | |
| "learning_rate": 3.2828315293719245e-05, | |
| "loss": 1.485, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.5837837837837838, | |
| "grad_norm": 0.5936009287834167, | |
| "learning_rate": 3.266008690089253e-05, | |
| "loss": 1.6109, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.584352773826458, | |
| "grad_norm": 0.5736754536628723, | |
| "learning_rate": 3.24922064894601e-05, | |
| "loss": 1.4451, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 0.5849217638691323, | |
| "grad_norm": 0.5830667018890381, | |
| "learning_rate": 3.23246749269499e-05, | |
| "loss": 1.499, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.5854907539118065, | |
| "grad_norm": 0.5929978489875793, | |
| "learning_rate": 3.2157493079087343e-05, | |
| "loss": 1.5964, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 0.5860597439544808, | |
| "grad_norm": 0.5748528242111206, | |
| "learning_rate": 3.1990661809790445e-05, | |
| "loss": 1.3425, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.5866287339971551, | |
| "grad_norm": 0.6261157393455505, | |
| "learning_rate": 3.18241819811658e-05, | |
| "loss": 1.4458, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 0.5871977240398293, | |
| "grad_norm": 0.5736514925956726, | |
| "learning_rate": 3.165805445350383e-05, | |
| "loss": 1.3948, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.5877667140825036, | |
| "grad_norm": 0.6165857911109924, | |
| "learning_rate": 3.149228008527437e-05, | |
| "loss": 1.6043, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 0.5883357041251778, | |
| "grad_norm": 0.6109797954559326, | |
| "learning_rate": 3.132685973312251e-05, | |
| "loss": 1.5376, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.5889046941678521, | |
| "grad_norm": 0.5716987252235413, | |
| "learning_rate": 3.116179425186361e-05, | |
| "loss": 1.3554, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.5894736842105263, | |
| "grad_norm": 0.6563665866851807, | |
| "learning_rate": 3.099708449447956e-05, | |
| "loss": 1.4934, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.5900426742532006, | |
| "grad_norm": 0.6072697043418884, | |
| "learning_rate": 3.083273131211382e-05, | |
| "loss": 1.3181, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 0.5906116642958749, | |
| "grad_norm": 0.5769975781440735, | |
| "learning_rate": 3.066873555406727e-05, | |
| "loss": 1.5376, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.591180654338549, | |
| "grad_norm": 0.58552485704422, | |
| "learning_rate": 3.0505098067793937e-05, | |
| "loss": 1.3483, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 0.5917496443812233, | |
| "grad_norm": 0.6377474069595337, | |
| "learning_rate": 3.0341819698896202e-05, | |
| "loss": 1.6044, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.5923186344238975, | |
| "grad_norm": 0.5746393203735352, | |
| "learning_rate": 3.017890129112094e-05, | |
| "loss": 1.5081, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 0.5928876244665718, | |
| "grad_norm": 0.5879509449005127, | |
| "learning_rate": 3.0016343686354775e-05, | |
| "loss": 1.7884, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.5934566145092461, | |
| "grad_norm": 0.5871498584747314, | |
| "learning_rate": 2.9854147724619886e-05, | |
| "loss": 1.4425, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 0.5940256045519203, | |
| "grad_norm": 0.6417199373245239, | |
| "learning_rate": 2.9692314244069764e-05, | |
| "loss": 1.4729, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.5940256045519203, | |
| "eval_loss": 1.4645270109176636, | |
| "eval_runtime": 16.2716, | |
| "eval_samples_per_second": 45.478, | |
| "eval_steps_per_second": 22.739, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.5945945945945946, | |
| "grad_norm": 0.5834308862686157, | |
| "learning_rate": 2.9530844080984565e-05, | |
| "loss": 1.4174, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.5951635846372688, | |
| "grad_norm": 0.5811535120010376, | |
| "learning_rate": 2.9369738069767107e-05, | |
| "loss": 1.2859, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.5957325746799431, | |
| "grad_norm": 0.6040303707122803, | |
| "learning_rate": 2.920899704293849e-05, | |
| "loss": 1.7526, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 0.5963015647226173, | |
| "grad_norm": 0.5936810970306396, | |
| "learning_rate": 2.9048621831133616e-05, | |
| "loss": 1.3031, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.5968705547652916, | |
| "grad_norm": 0.5825332999229431, | |
| "learning_rate": 2.8888613263097153e-05, | |
| "loss": 1.3483, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 0.5974395448079659, | |
| "grad_norm": 0.6082255244255066, | |
| "learning_rate": 2.8728972165679067e-05, | |
| "loss": 1.528, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.5980085348506401, | |
| "grad_norm": 0.594572126865387, | |
| "learning_rate": 2.8569699363830316e-05, | |
| "loss": 1.5789, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 0.5985775248933144, | |
| "grad_norm": 0.6006420850753784, | |
| "learning_rate": 2.8410795680598846e-05, | |
| "loss": 1.5638, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.5991465149359886, | |
| "grad_norm": 0.5715523958206177, | |
| "learning_rate": 2.825226193712507e-05, | |
| "loss": 1.5222, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 0.5997155049786629, | |
| "grad_norm": 0.5750184059143066, | |
| "learning_rate": 2.8094098952637692e-05, | |
| "loss": 1.5154, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.6002844950213371, | |
| "grad_norm": 0.5836694240570068, | |
| "learning_rate": 2.793630754444967e-05, | |
| "loss": 1.4624, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.6008534850640114, | |
| "grad_norm": 0.5644353628158569, | |
| "learning_rate": 2.7778888527953572e-05, | |
| "loss": 1.564, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.6014224751066857, | |
| "grad_norm": 0.6327478885650635, | |
| "learning_rate": 2.762184271661785e-05, | |
| "loss": 1.4707, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 0.6019914651493599, | |
| "grad_norm": 0.5783342719078064, | |
| "learning_rate": 2.746517092198231e-05, | |
| "loss": 1.4888, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.6025604551920342, | |
| "grad_norm": 0.5796740651130676, | |
| "learning_rate": 2.730887395365397e-05, | |
| "loss": 1.5201, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 0.6031294452347084, | |
| "grad_norm": 0.5543321967124939, | |
| "learning_rate": 2.715295261930306e-05, | |
| "loss": 1.4378, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.6036984352773827, | |
| "grad_norm": 0.6244597434997559, | |
| "learning_rate": 2.699740772465851e-05, | |
| "loss": 1.4242, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 0.604267425320057, | |
| "grad_norm": 0.5890554785728455, | |
| "learning_rate": 2.6842240073504165e-05, | |
| "loss": 1.4732, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.6048364153627311, | |
| "grad_norm": 0.5934953689575195, | |
| "learning_rate": 2.668745046767436e-05, | |
| "loss": 1.517, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 0.6054054054054054, | |
| "grad_norm": 0.5716105103492737, | |
| "learning_rate": 2.6533039707049834e-05, | |
| "loss": 1.2859, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.6059743954480796, | |
| "grad_norm": 0.5798661708831787, | |
| "learning_rate": 2.63790085895538e-05, | |
| "loss": 1.5015, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.6065433854907539, | |
| "grad_norm": 0.600385844707489, | |
| "learning_rate": 2.6225357911147385e-05, | |
| "loss": 1.4027, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.6071123755334281, | |
| "grad_norm": 0.5749977231025696, | |
| "learning_rate": 2.6072088465826038e-05, | |
| "loss": 1.5876, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 0.6076813655761024, | |
| "grad_norm": 0.5585724711418152, | |
| "learning_rate": 2.591920104561503e-05, | |
| "loss": 1.4756, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.6082503556187767, | |
| "grad_norm": 0.5597994327545166, | |
| "learning_rate": 2.5766696440565496e-05, | |
| "loss": 1.4621, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 0.6088193456614509, | |
| "grad_norm": 0.5746705532073975, | |
| "learning_rate": 2.5614575438750522e-05, | |
| "loss": 1.1686, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.6093883357041252, | |
| "grad_norm": 0.6218065023422241, | |
| "learning_rate": 2.546283882626065e-05, | |
| "loss": 1.4789, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 0.6099573257467994, | |
| "grad_norm": 0.6003706455230713, | |
| "learning_rate": 2.5311487387200306e-05, | |
| "loss": 1.3938, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.6105263157894737, | |
| "grad_norm": 0.6234976649284363, | |
| "learning_rate": 2.516052190368341e-05, | |
| "loss": 1.4399, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 0.6110953058321479, | |
| "grad_norm": 0.5926810503005981, | |
| "learning_rate": 2.500994315582943e-05, | |
| "loss": 1.3032, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.6116642958748222, | |
| "grad_norm": 0.5906057953834534, | |
| "learning_rate": 2.485975192175949e-05, | |
| "loss": 1.2748, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.6122332859174965, | |
| "grad_norm": 0.5761781334877014, | |
| "learning_rate": 2.4709948977592034e-05, | |
| "loss": 1.4486, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.6128022759601707, | |
| "grad_norm": 0.6170504093170166, | |
| "learning_rate": 2.4560535097439108e-05, | |
| "loss": 1.5943, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 0.613371266002845, | |
| "grad_norm": 0.6018140912055969, | |
| "learning_rate": 2.4411511053402302e-05, | |
| "loss": 1.5996, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.6139402560455192, | |
| "grad_norm": 0.5538153052330017, | |
| "learning_rate": 2.4262877615568626e-05, | |
| "loss": 1.4874, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 0.6145092460881935, | |
| "grad_norm": 0.5843609571456909, | |
| "learning_rate": 2.411463555200667e-05, | |
| "loss": 1.269, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.6150782361308678, | |
| "grad_norm": 0.5559793710708618, | |
| "learning_rate": 2.3966785628762546e-05, | |
| "loss": 1.5796, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 0.615647226173542, | |
| "grad_norm": 0.5636264085769653, | |
| "learning_rate": 2.381932860985596e-05, | |
| "loss": 1.2805, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.6162162162162163, | |
| "grad_norm": 0.6079363226890564, | |
| "learning_rate": 2.3672265257276383e-05, | |
| "loss": 1.5295, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 0.6167852062588904, | |
| "grad_norm": 0.6165335178375244, | |
| "learning_rate": 2.352559633097885e-05, | |
| "loss": 1.5551, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.6173541963015647, | |
| "grad_norm": 0.6137623190879822, | |
| "learning_rate": 2.337932258888028e-05, | |
| "loss": 1.4585, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.6179231863442389, | |
| "grad_norm": 0.5806836485862732, | |
| "learning_rate": 2.3233444786855407e-05, | |
| "loss": 1.5539, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.6184921763869132, | |
| "grad_norm": 0.5768011212348938, | |
| "learning_rate": 2.308796367873296e-05, | |
| "loss": 1.4415, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 0.6190611664295875, | |
| "grad_norm": 0.5644312500953674, | |
| "learning_rate": 2.294288001629177e-05, | |
| "loss": 1.4668, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.6196301564722617, | |
| "grad_norm": 0.5748885869979858, | |
| "learning_rate": 2.2798194549256792e-05, | |
| "loss": 1.3066, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 0.620199146514936, | |
| "grad_norm": 0.5609626770019531, | |
| "learning_rate": 2.2653908025295323e-05, | |
| "loss": 1.3779, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.6207681365576102, | |
| "grad_norm": 0.5691306591033936, | |
| "learning_rate": 2.251002119001312e-05, | |
| "loss": 1.442, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 0.6213371266002845, | |
| "grad_norm": 0.589314877986908, | |
| "learning_rate": 2.2366534786950467e-05, | |
| "loss": 1.4482, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.6219061166429587, | |
| "grad_norm": 0.5820268392562866, | |
| "learning_rate": 2.222344955757851e-05, | |
| "loss": 1.4195, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 0.622475106685633, | |
| "grad_norm": 0.6211294531822205, | |
| "learning_rate": 2.2080766241295235e-05, | |
| "loss": 1.549, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.6230440967283073, | |
| "grad_norm": 0.6313804984092712, | |
| "learning_rate": 2.1938485575421752e-05, | |
| "loss": 1.6662, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.6236130867709815, | |
| "grad_norm": 0.5776501297950745, | |
| "learning_rate": 2.1796608295198462e-05, | |
| "loss": 1.3551, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.6241820768136558, | |
| "grad_norm": 0.5959988236427307, | |
| "learning_rate": 2.165513513378121e-05, | |
| "loss": 1.4321, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 0.62475106685633, | |
| "grad_norm": 0.5878854393959045, | |
| "learning_rate": 2.1514066822237665e-05, | |
| "loss": 1.428, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.6253200568990043, | |
| "grad_norm": 0.5653113722801208, | |
| "learning_rate": 2.137340408954329e-05, | |
| "loss": 1.3464, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 0.6258890469416786, | |
| "grad_norm": 0.5969840884208679, | |
| "learning_rate": 2.1233147662577767e-05, | |
| "loss": 1.4497, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.6264580369843528, | |
| "grad_norm": 0.5675022602081299, | |
| "learning_rate": 2.1093298266121165e-05, | |
| "loss": 1.4289, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 0.6270270270270271, | |
| "grad_norm": 0.6396809816360474, | |
| "learning_rate": 2.0953856622850176e-05, | |
| "loss": 1.4908, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.6275960170697013, | |
| "grad_norm": 0.5843429565429688, | |
| "learning_rate": 2.081482345333452e-05, | |
| "loss": 1.6213, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 0.6281650071123756, | |
| "grad_norm": 0.5792785882949829, | |
| "learning_rate": 2.0676199476033e-05, | |
| "loss": 1.57, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.6287339971550497, | |
| "grad_norm": 0.6015857458114624, | |
| "learning_rate": 2.053798540728995e-05, | |
| "loss": 1.5818, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.629302987197724, | |
| "grad_norm": 0.5723267197608948, | |
| "learning_rate": 2.0400181961331478e-05, | |
| "loss": 1.3799, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.6298719772403983, | |
| "grad_norm": 0.6322827339172363, | |
| "learning_rate": 2.0262789850261798e-05, | |
| "loss": 1.4456, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 0.6304409672830725, | |
| "grad_norm": 0.6475574970245361, | |
| "learning_rate": 2.012580978405949e-05, | |
| "loss": 1.6081, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.6310099573257468, | |
| "grad_norm": 0.5577263832092285, | |
| "learning_rate": 1.9989242470573975e-05, | |
| "loss": 1.319, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 0.631578947368421, | |
| "grad_norm": 0.5767825245857239, | |
| "learning_rate": 1.9853088615521663e-05, | |
| "loss": 1.1708, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.6321479374110953, | |
| "grad_norm": 0.5954525470733643, | |
| "learning_rate": 1.9717348922482458e-05, | |
| "loss": 1.3891, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 0.6327169274537695, | |
| "grad_norm": 0.6102060079574585, | |
| "learning_rate": 1.9582024092896033e-05, | |
| "loss": 1.3531, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.6332859174964438, | |
| "grad_norm": 0.5929975509643555, | |
| "learning_rate": 1.9447114826058233e-05, | |
| "loss": 1.5927, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 0.6338549075391181, | |
| "grad_norm": 0.5874947905540466, | |
| "learning_rate": 1.931262181911754e-05, | |
| "loss": 1.4828, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.6344238975817923, | |
| "grad_norm": 0.5605891346931458, | |
| "learning_rate": 1.9178545767071322e-05, | |
| "loss": 1.655, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.6349928876244666, | |
| "grad_norm": 0.5770267248153687, | |
| "learning_rate": 1.9044887362762343e-05, | |
| "loss": 1.3424, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.6355618776671408, | |
| "grad_norm": 0.639620840549469, | |
| "learning_rate": 1.8911647296875147e-05, | |
| "loss": 1.3701, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 0.6361308677098151, | |
| "grad_norm": 0.6051335334777832, | |
| "learning_rate": 1.87788262579325e-05, | |
| "loss": 1.3458, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.6366998577524894, | |
| "grad_norm": 0.5964054465293884, | |
| "learning_rate": 1.8646424932291896e-05, | |
| "loss": 1.5532, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 0.6372688477951636, | |
| "grad_norm": 0.6007367968559265, | |
| "learning_rate": 1.851444400414185e-05, | |
| "loss": 1.5373, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.6378378378378379, | |
| "grad_norm": 0.5970923900604248, | |
| "learning_rate": 1.8382884155498514e-05, | |
| "loss": 1.5256, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 0.6384068278805121, | |
| "grad_norm": 0.6180837750434875, | |
| "learning_rate": 1.8251746066202058e-05, | |
| "loss": 1.4781, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.6389758179231864, | |
| "grad_norm": 0.6046369671821594, | |
| "learning_rate": 1.812103041391322e-05, | |
| "loss": 1.4899, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 0.6395448079658606, | |
| "grad_norm": 0.5703504085540771, | |
| "learning_rate": 1.799073787410982e-05, | |
| "loss": 1.5633, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.6401137980085349, | |
| "grad_norm": 0.6019449830055237, | |
| "learning_rate": 1.786086912008316e-05, | |
| "loss": 1.3685, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.6406827880512092, | |
| "grad_norm": 0.5852835774421692, | |
| "learning_rate": 1.773142482293464e-05, | |
| "loss": 1.5065, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.6412517780938833, | |
| "grad_norm": 0.5664365887641907, | |
| "learning_rate": 1.7602405651572275e-05, | |
| "loss": 1.5823, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 0.6418207681365576, | |
| "grad_norm": 0.5778409242630005, | |
| "learning_rate": 1.747381227270718e-05, | |
| "loss": 1.4294, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.6423897581792318, | |
| "grad_norm": 0.5901049375534058, | |
| "learning_rate": 1.734564535085028e-05, | |
| "loss": 1.3996, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 0.6429587482219061, | |
| "grad_norm": 0.6099653244018555, | |
| "learning_rate": 1.721790554830869e-05, | |
| "loss": 1.5873, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.6435277382645803, | |
| "grad_norm": 0.5981472730636597, | |
| "learning_rate": 1.7090593525182287e-05, | |
| "loss": 1.5958, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 0.6440967283072546, | |
| "grad_norm": 0.6043581366539001, | |
| "learning_rate": 1.6963709939360585e-05, | |
| "loss": 1.561, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.6446657183499289, | |
| "grad_norm": 0.6230269074440002, | |
| "learning_rate": 1.6837255446518964e-05, | |
| "loss": 1.4484, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 0.6452347083926031, | |
| "grad_norm": 0.579458475112915, | |
| "learning_rate": 1.671123070011551e-05, | |
| "loss": 1.597, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.6458036984352774, | |
| "grad_norm": 0.5982540845870972, | |
| "learning_rate": 1.6585636351387635e-05, | |
| "loss": 1.5299, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.6463726884779516, | |
| "grad_norm": 0.6335290670394897, | |
| "learning_rate": 1.646047304934851e-05, | |
| "loss": 1.6529, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.6469416785206259, | |
| "grad_norm": 0.580467164516449, | |
| "learning_rate": 1.6335741440784035e-05, | |
| "loss": 1.5459, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 0.6475106685633002, | |
| "grad_norm": 0.5840801000595093, | |
| "learning_rate": 1.621144217024918e-05, | |
| "loss": 1.3808, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.6480796586059744, | |
| "grad_norm": 0.592555582523346, | |
| "learning_rate": 1.608757588006483e-05, | |
| "loss": 1.5013, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 0.6486486486486487, | |
| "grad_norm": 0.5938240885734558, | |
| "learning_rate": 1.596414321031452e-05, | |
| "loss": 1.3971, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.6492176386913229, | |
| "grad_norm": 0.5719125270843506, | |
| "learning_rate": 1.5841144798840855e-05, | |
| "loss": 1.4372, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 0.6497866287339972, | |
| "grad_norm": 0.6199617981910706, | |
| "learning_rate": 1.5718581281242572e-05, | |
| "loss": 1.6019, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.6503556187766714, | |
| "grad_norm": 0.594205379486084, | |
| "learning_rate": 1.5596453290870982e-05, | |
| "loss": 1.5322, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 0.6509246088193457, | |
| "grad_norm": 0.6198005676269531, | |
| "learning_rate": 1.5474761458826793e-05, | |
| "loss": 1.4777, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.65149359886202, | |
| "grad_norm": 0.567058265209198, | |
| "learning_rate": 1.5353506413956932e-05, | |
| "loss": 1.5108, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.6520625889046942, | |
| "grad_norm": 0.5950532555580139, | |
| "learning_rate": 1.5232688782851068e-05, | |
| "loss": 1.5038, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.6526315789473685, | |
| "grad_norm": 0.6178238987922668, | |
| "learning_rate": 1.511230918983867e-05, | |
| "loss": 1.5458, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 0.6532005689900426, | |
| "grad_norm": 0.5962685346603394, | |
| "learning_rate": 1.4992368256985546e-05, | |
| "loss": 1.432, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.6537695590327169, | |
| "grad_norm": 0.5979620218276978, | |
| "learning_rate": 1.4872866604090696e-05, | |
| "loss": 1.5035, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 0.6543385490753911, | |
| "grad_norm": 0.5819264650344849, | |
| "learning_rate": 1.475380484868325e-05, | |
| "loss": 1.4169, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.6549075391180654, | |
| "grad_norm": 0.6469606757164001, | |
| "learning_rate": 1.4635183606018943e-05, | |
| "loss": 1.4442, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 0.6554765291607397, | |
| "grad_norm": 0.5789610147476196, | |
| "learning_rate": 1.451700348907734e-05, | |
| "loss": 1.122, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.6560455192034139, | |
| "grad_norm": 0.5797150135040283, | |
| "learning_rate": 1.4399265108558379e-05, | |
| "loss": 1.5795, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 0.6566145092460882, | |
| "grad_norm": 0.6014512777328491, | |
| "learning_rate": 1.4281969072879298e-05, | |
| "loss": 1.3673, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.6571834992887624, | |
| "grad_norm": 0.566061794757843, | |
| "learning_rate": 1.4165115988171596e-05, | |
| "loss": 1.4255, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.6577524893314367, | |
| "grad_norm": 0.599322497844696, | |
| "learning_rate": 1.4048706458277672e-05, | |
| "loss": 1.4538, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.658321479374111, | |
| "grad_norm": 0.6258883476257324, | |
| "learning_rate": 1.3932741084747913e-05, | |
| "loss": 1.5197, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 0.6588904694167852, | |
| "grad_norm": 0.603754460811615, | |
| "learning_rate": 1.3817220466837566e-05, | |
| "loss": 1.5596, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.6594594594594595, | |
| "grad_norm": 0.5680553317070007, | |
| "learning_rate": 1.3702145201503458e-05, | |
| "loss": 1.3882, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 0.6600284495021337, | |
| "grad_norm": 0.6317921280860901, | |
| "learning_rate": 1.3587515883401202e-05, | |
| "loss": 1.4051, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.660597439544808, | |
| "grad_norm": 0.5998348593711853, | |
| "learning_rate": 1.3473333104881792e-05, | |
| "loss": 1.5309, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 0.6611664295874822, | |
| "grad_norm": 0.5694242715835571, | |
| "learning_rate": 1.3359597455988803e-05, | |
| "loss": 1.4933, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.6617354196301565, | |
| "grad_norm": 0.6193349361419678, | |
| "learning_rate": 1.3246309524455291e-05, | |
| "loss": 1.5781, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 0.6623044096728308, | |
| "grad_norm": 0.5579991340637207, | |
| "learning_rate": 1.3133469895700634e-05, | |
| "loss": 1.3616, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.662873399715505, | |
| "grad_norm": 0.5790702104568481, | |
| "learning_rate": 1.3021079152827631e-05, | |
| "loss": 1.3994, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.6634423897581793, | |
| "grad_norm": 0.5730209946632385, | |
| "learning_rate": 1.2909137876619448e-05, | |
| "loss": 1.3269, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.6640113798008535, | |
| "grad_norm": 0.6066268086433411, | |
| "learning_rate": 1.2797646645536566e-05, | |
| "loss": 1.6239, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 0.6645803698435278, | |
| "grad_norm": 0.649182140827179, | |
| "learning_rate": 1.2686606035713944e-05, | |
| "loss": 1.7304, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.6651493598862019, | |
| "grad_norm": 0.6383649110794067, | |
| "learning_rate": 1.2576016620957853e-05, | |
| "loss": 1.4477, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 0.6657183499288762, | |
| "grad_norm": 0.5763673782348633, | |
| "learning_rate": 1.2465878972743028e-05, | |
| "loss": 1.4846, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.6662873399715505, | |
| "grad_norm": 0.5865679383277893, | |
| "learning_rate": 1.2356193660209681e-05, | |
| "loss": 1.5687, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 0.6668563300142247, | |
| "grad_norm": 0.5898412466049194, | |
| "learning_rate": 1.2246961250160527e-05, | |
| "loss": 1.5227, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.667425320056899, | |
| "grad_norm": 0.6910015344619751, | |
| "learning_rate": 1.2138182307057987e-05, | |
| "loss": 1.245, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 0.6679943100995732, | |
| "grad_norm": 0.5660498142242432, | |
| "learning_rate": 1.2029857393021094e-05, | |
| "loss": 1.2887, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.6685633001422475, | |
| "grad_norm": 0.5966072082519531, | |
| "learning_rate": 1.1921987067822672e-05, | |
| "loss": 1.3417, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.6691322901849218, | |
| "grad_norm": 0.5854772329330444, | |
| "learning_rate": 1.1814571888886483e-05, | |
| "loss": 1.474, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.669701280227596, | |
| "grad_norm": 0.6000230312347412, | |
| "learning_rate": 1.1707612411284253e-05, | |
| "loss": 1.4276, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 0.6702702702702703, | |
| "grad_norm": 0.5757988691329956, | |
| "learning_rate": 1.1601109187732928e-05, | |
| "loss": 1.5459, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.6708392603129445, | |
| "grad_norm": 0.5930068492889404, | |
| "learning_rate": 1.149506276859167e-05, | |
| "loss": 1.4149, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 0.6714082503556188, | |
| "grad_norm": 0.5741011500358582, | |
| "learning_rate": 1.1389473701859121e-05, | |
| "loss": 1.2504, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.671977240398293, | |
| "grad_norm": 0.588571310043335, | |
| "learning_rate": 1.1284342533170545e-05, | |
| "loss": 1.6301, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 0.6725462304409673, | |
| "grad_norm": 0.5500454306602478, | |
| "learning_rate": 1.1179669805794968e-05, | |
| "loss": 1.4952, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.6731152204836416, | |
| "grad_norm": 0.5811514854431152, | |
| "learning_rate": 1.1075456060632472e-05, | |
| "loss": 1.447, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 0.6736842105263158, | |
| "grad_norm": 0.6315092444419861, | |
| "learning_rate": 1.0971701836211268e-05, | |
| "loss": 1.5707, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.6742532005689901, | |
| "grad_norm": 0.6309195756912231, | |
| "learning_rate": 1.0868407668684998e-05, | |
| "loss": 1.2443, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.6748221906116643, | |
| "grad_norm": 0.5840954780578613, | |
| "learning_rate": 1.0765574091829933e-05, | |
| "loss": 1.4682, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.6753911806543386, | |
| "grad_norm": 0.6113706827163696, | |
| "learning_rate": 1.0663201637042252e-05, | |
| "loss": 1.4292, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 0.6759601706970128, | |
| "grad_norm": 0.6047906279563904, | |
| "learning_rate": 1.0561290833335224e-05, | |
| "loss": 1.627, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.676529160739687, | |
| "grad_norm": 0.5755859613418579, | |
| "learning_rate": 1.04598422073366e-05, | |
| "loss": 1.3449, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 0.6770981507823614, | |
| "grad_norm": 0.5938130021095276, | |
| "learning_rate": 1.0358856283285722e-05, | |
| "loss": 1.389, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.6776671408250355, | |
| "grad_norm": 0.5975162386894226, | |
| "learning_rate": 1.0258333583030955e-05, | |
| "loss": 1.4868, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 0.6782361308677098, | |
| "grad_norm": 0.6362975239753723, | |
| "learning_rate": 1.0158274626026931e-05, | |
| "loss": 1.6409, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.678805120910384, | |
| "grad_norm": 0.6175844669342041, | |
| "learning_rate": 1.0058679929331827e-05, | |
| "loss": 1.4914, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 0.6793741109530583, | |
| "grad_norm": 0.5870533585548401, | |
| "learning_rate": 9.959550007604835e-06, | |
| "loss": 1.3655, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.6799431009957326, | |
| "grad_norm": 0.5993149280548096, | |
| "learning_rate": 9.860885373103324e-06, | |
| "loss": 1.4203, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.6805120910384068, | |
| "grad_norm": 0.5798912048339844, | |
| "learning_rate": 9.7626865356803e-06, | |
| "loss": 1.4378, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.6810810810810811, | |
| "grad_norm": 0.5729113221168518, | |
| "learning_rate": 9.664954002781745e-06, | |
| "loss": 1.4054, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 0.6816500711237553, | |
| "grad_norm": 0.6329131126403809, | |
| "learning_rate": 9.567688279443964e-06, | |
| "loss": 1.4381, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.6822190611664296, | |
| "grad_norm": 0.6088592410087585, | |
| "learning_rate": 9.4708898682911e-06, | |
| "loss": 1.4094, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 0.6827880512091038, | |
| "grad_norm": 0.5889382362365723, | |
| "learning_rate": 9.374559269532346e-06, | |
| "loss": 1.5365, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.6833570412517781, | |
| "grad_norm": 0.6487043499946594, | |
| "learning_rate": 9.27869698095951e-06, | |
| "loss": 1.4747, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 0.6839260312944524, | |
| "grad_norm": 0.6006666421890259, | |
| "learning_rate": 9.183303497944361e-06, | |
| "loss": 1.3953, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.6844950213371266, | |
| "grad_norm": 0.5925318002700806, | |
| "learning_rate": 9.088379313436113e-06, | |
| "loss": 1.5679, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 0.6850640113798009, | |
| "grad_norm": 0.5964149832725525, | |
| "learning_rate": 8.993924917958874e-06, | |
| "loss": 1.4872, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.6856330014224751, | |
| "grad_norm": 0.5567532777786255, | |
| "learning_rate": 8.899940799609096e-06, | |
| "loss": 1.3922, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.6862019914651494, | |
| "grad_norm": 0.5803432464599609, | |
| "learning_rate": 8.806427444053033e-06, | |
| "loss": 1.319, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.6867709815078236, | |
| "grad_norm": 0.583640456199646, | |
| "learning_rate": 8.713385334524283e-06, | |
| "loss": 1.4564, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 0.6873399715504979, | |
| "grad_norm": 0.6316723227500916, | |
| "learning_rate": 8.620814951821232e-06, | |
| "loss": 1.4586, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.6879089615931722, | |
| "grad_norm": 0.5926545262336731, | |
| "learning_rate": 8.528716774304658e-06, | |
| "loss": 1.5008, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 0.6884779516358464, | |
| "grad_norm": 0.5738364458084106, | |
| "learning_rate": 8.43709127789517e-06, | |
| "loss": 1.3766, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.6890469416785207, | |
| "grad_norm": 0.5985202193260193, | |
| "learning_rate": 8.345938936070718e-06, | |
| "loss": 1.5175, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 0.6896159317211948, | |
| "grad_norm": 0.6196452379226685, | |
| "learning_rate": 8.255260219864324e-06, | |
| "loss": 1.6161, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.6901849217638691, | |
| "grad_norm": 0.6303586959838867, | |
| "learning_rate": 8.16505559786146e-06, | |
| "loss": 1.6054, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 0.6907539118065433, | |
| "grad_norm": 0.5856702327728271, | |
| "learning_rate": 8.07532553619772e-06, | |
| "loss": 1.5131, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.6913229018492176, | |
| "grad_norm": 0.5996472239494324, | |
| "learning_rate": 7.986070498556397e-06, | |
| "loss": 1.3462, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.6918918918918919, | |
| "grad_norm": 0.6016293168067932, | |
| "learning_rate": 7.897290946166037e-06, | |
| "loss": 1.3177, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.6924608819345661, | |
| "grad_norm": 0.5805103182792664, | |
| "learning_rate": 7.808987337798158e-06, | |
| "loss": 1.1701, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 0.6930298719772404, | |
| "grad_norm": 0.5823555588722229, | |
| "learning_rate": 7.721160129764792e-06, | |
| "loss": 1.275, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.6935988620199146, | |
| "grad_norm": 0.5881485939025879, | |
| "learning_rate": 7.633809775916135e-06, | |
| "loss": 1.3304, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 0.6941678520625889, | |
| "grad_norm": 0.610157310962677, | |
| "learning_rate": 7.546936727638298e-06, | |
| "loss": 1.325, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.6947368421052632, | |
| "grad_norm": 0.6015896797180176, | |
| "learning_rate": 7.460541433850788e-06, | |
| "loss": 1.4739, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 0.6953058321479374, | |
| "grad_norm": 0.6073941588401794, | |
| "learning_rate": 7.374624341004388e-06, | |
| "loss": 1.6308, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.6958748221906117, | |
| "grad_norm": 0.5897748470306396, | |
| "learning_rate": 7.289185893078721e-06, | |
| "loss": 1.4808, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 0.6964438122332859, | |
| "grad_norm": 0.6318244338035583, | |
| "learning_rate": 7.204226531579994e-06, | |
| "loss": 1.5134, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.6970128022759602, | |
| "grad_norm": 0.5809091329574585, | |
| "learning_rate": 7.119746695538765e-06, | |
| "loss": 1.4117, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.6975817923186344, | |
| "grad_norm": 0.6108141541481018, | |
| "learning_rate": 7.0357468215075275e-06, | |
| "loss": 1.3201, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.6981507823613087, | |
| "grad_norm": 0.566813051700592, | |
| "learning_rate": 6.952227343558671e-06, | |
| "loss": 1.502, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 0.698719772403983, | |
| "grad_norm": 0.5999999046325684, | |
| "learning_rate": 6.869188693282036e-06, | |
| "loss": 1.3958, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.6992887624466572, | |
| "grad_norm": 0.6325690150260925, | |
| "learning_rate": 6.786631299782797e-06, | |
| "loss": 1.4682, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 0.6998577524893315, | |
| "grad_norm": 0.5865020155906677, | |
| "learning_rate": 6.704555589679262e-06, | |
| "loss": 1.4662, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.7004267425320057, | |
| "grad_norm": 0.5978051424026489, | |
| "learning_rate": 6.622961987100518e-06, | |
| "loss": 1.4549, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 0.70099573257468, | |
| "grad_norm": 0.6172093152999878, | |
| "learning_rate": 6.541850913684444e-06, | |
| "loss": 1.52, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.7015647226173541, | |
| "grad_norm": 0.6080772280693054, | |
| "learning_rate": 6.461222788575394e-06, | |
| "loss": 1.5765, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 0.7021337126600284, | |
| "grad_norm": 0.6048703193664551, | |
| "learning_rate": 6.3810780284220495e-06, | |
| "loss": 1.6723, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.7027027027027027, | |
| "grad_norm": 0.5950552225112915, | |
| "learning_rate": 6.301417047375347e-06, | |
| "loss": 1.4492, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.7032716927453769, | |
| "grad_norm": 0.5936811566352844, | |
| "learning_rate": 6.222240257086176e-06, | |
| "loss": 1.4721, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.7038406827880512, | |
| "grad_norm": 0.5990265011787415, | |
| "learning_rate": 6.143548066703475e-06, | |
| "loss": 1.3644, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 0.7044096728307254, | |
| "grad_norm": 0.5738005638122559, | |
| "learning_rate": 6.065340882871906e-06, | |
| "loss": 1.4847, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.7049786628733997, | |
| "grad_norm": 0.5998217463493347, | |
| "learning_rate": 5.9876191097298475e-06, | |
| "loss": 1.4917, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 0.705547652916074, | |
| "grad_norm": 0.5693299174308777, | |
| "learning_rate": 5.910383148907395e-06, | |
| "loss": 1.3934, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.7061166429587482, | |
| "grad_norm": 0.5998255014419556, | |
| "learning_rate": 5.8336333995240526e-06, | |
| "loss": 1.6348, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 0.7066856330014225, | |
| "grad_norm": 0.6056730151176453, | |
| "learning_rate": 5.757370258186889e-06, | |
| "loss": 1.4748, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.7072546230440967, | |
| "grad_norm": 0.6141417622566223, | |
| "learning_rate": 5.6815941189884315e-06, | |
| "loss": 1.4371, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 0.707823613086771, | |
| "grad_norm": 0.5924522280693054, | |
| "learning_rate": 5.606305373504528e-06, | |
| "loss": 1.4896, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.7083926031294452, | |
| "grad_norm": 0.5907067656517029, | |
| "learning_rate": 5.5315044107925094e-06, | |
| "loss": 1.5258, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.7089615931721195, | |
| "grad_norm": 0.5856897234916687, | |
| "learning_rate": 5.457191617388957e-06, | |
| "loss": 1.3751, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.7095305832147938, | |
| "grad_norm": 0.5863030552864075, | |
| "learning_rate": 5.383367377307857e-06, | |
| "loss": 1.2607, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 0.710099573257468, | |
| "grad_norm": 0.5891332626342773, | |
| "learning_rate": 5.310032072038651e-06, | |
| "loss": 1.3852, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.7106685633001423, | |
| "grad_norm": 0.5775113701820374, | |
| "learning_rate": 5.237186080544098e-06, | |
| "loss": 1.5867, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 0.7112375533428165, | |
| "grad_norm": 0.5843526721000671, | |
| "learning_rate": 5.164829779258451e-06, | |
| "loss": 1.5694, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.7118065433854908, | |
| "grad_norm": 0.7019409537315369, | |
| "learning_rate": 5.092963542085483e-06, | |
| "loss": 1.4444, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 0.712375533428165, | |
| "grad_norm": 0.6279569864273071, | |
| "learning_rate": 5.021587740396505e-06, | |
| "loss": 1.5798, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.7129445234708393, | |
| "grad_norm": 0.6179226040840149, | |
| "learning_rate": 4.950702743028535e-06, | |
| "loss": 1.3976, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 0.7135135135135136, | |
| "grad_norm": 0.5874016284942627, | |
| "learning_rate": 4.880308916282305e-06, | |
| "loss": 1.5384, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.7140825035561877, | |
| "grad_norm": 0.5691651701927185, | |
| "learning_rate": 4.810406623920427e-06, | |
| "loss": 1.3594, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.714651493598862, | |
| "grad_norm": 0.5660080909729004, | |
| "learning_rate": 4.740996227165462e-06, | |
| "loss": 1.5635, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.7152204836415362, | |
| "grad_norm": 0.6053207516670227, | |
| "learning_rate": 4.672078084698095e-06, | |
| "loss": 1.5981, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 0.7157894736842105, | |
| "grad_norm": 0.5976085066795349, | |
| "learning_rate": 4.603652552655302e-06, | |
| "loss": 1.5909, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.7163584637268848, | |
| "grad_norm": 0.547666609287262, | |
| "learning_rate": 4.53571998462845e-06, | |
| "loss": 1.3622, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 0.716927453769559, | |
| "grad_norm": 0.6441154479980469, | |
| "learning_rate": 4.468280731661489e-06, | |
| "loss": 1.4626, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.7174964438122333, | |
| "grad_norm": 0.6044400930404663, | |
| "learning_rate": 4.4013351422491635e-06, | |
| "loss": 1.5432, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 0.7180654338549075, | |
| "grad_norm": 0.5658133029937744, | |
| "learning_rate": 4.334883562335157e-06, | |
| "loss": 1.4528, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.7186344238975818, | |
| "grad_norm": 0.6291137933731079, | |
| "learning_rate": 4.268926335310408e-06, | |
| "loss": 1.2975, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 0.719203413940256, | |
| "grad_norm": 0.5724123120307922, | |
| "learning_rate": 4.20346380201122e-06, | |
| "loss": 1.223, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.7197724039829303, | |
| "grad_norm": 0.6324455142021179, | |
| "learning_rate": 4.138496300717565e-06, | |
| "loss": 1.3516, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.7203413940256046, | |
| "grad_norm": 0.5916242599487305, | |
| "learning_rate": 4.0740241671513025e-06, | |
| "loss": 1.6546, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.7209103840682788, | |
| "grad_norm": 0.5780736804008484, | |
| "learning_rate": 4.010047734474454e-06, | |
| "loss": 1.4467, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 0.7214793741109531, | |
| "grad_norm": 0.580437958240509, | |
| "learning_rate": 3.946567333287566e-06, | |
| "loss": 1.2151, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.7220483641536273, | |
| "grad_norm": 0.631999135017395, | |
| "learning_rate": 3.883583291627823e-06, | |
| "loss": 1.6731, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 0.7226173541963016, | |
| "grad_norm": 0.5912725329399109, | |
| "learning_rate": 3.821095934967511e-06, | |
| "loss": 1.5419, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.7231863442389758, | |
| "grad_norm": 0.5841814279556274, | |
| "learning_rate": 3.759105586212275e-06, | |
| "loss": 1.36, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 0.7237553342816501, | |
| "grad_norm": 0.620486319065094, | |
| "learning_rate": 3.6976125656994376e-06, | |
| "loss": 1.4474, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.7243243243243244, | |
| "grad_norm": 0.5620819330215454, | |
| "learning_rate": 3.6366171911963455e-06, | |
| "loss": 1.3565, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 0.7248933143669986, | |
| "grad_norm": 0.6318161487579346, | |
| "learning_rate": 3.576119777898812e-06, | |
| "loss": 1.5721, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 0.7254623044096729, | |
| "grad_norm": 0.5643869638442993, | |
| "learning_rate": 3.516120638429332e-06, | |
| "loss": 1.3681, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.726031294452347, | |
| "grad_norm": 0.5829715132713318, | |
| "learning_rate": 3.4566200828356157e-06, | |
| "loss": 1.3699, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.7266002844950213, | |
| "grad_norm": 0.5623791813850403, | |
| "learning_rate": 3.397618418588877e-06, | |
| "loss": 1.3686, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 0.7271692745376956, | |
| "grad_norm": 0.5907699465751648, | |
| "learning_rate": 3.3391159505823165e-06, | |
| "loss": 1.5019, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 0.7277382645803698, | |
| "grad_norm": 0.5887323617935181, | |
| "learning_rate": 3.2811129811295416e-06, | |
| "loss": 1.5161, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 0.7283072546230441, | |
| "grad_norm": 0.6375420093536377, | |
| "learning_rate": 3.2236098099629353e-06, | |
| "loss": 1.53, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.7288762446657183, | |
| "grad_norm": 0.569848358631134, | |
| "learning_rate": 3.16660673423218e-06, | |
| "loss": 1.5462, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 0.7294452347083926, | |
| "grad_norm": 0.5773903727531433, | |
| "learning_rate": 3.1101040485027043e-06, | |
| "loss": 1.4332, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 0.7300142247510668, | |
| "grad_norm": 0.5759513974189758, | |
| "learning_rate": 3.0541020447541256e-06, | |
| "loss": 1.4906, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 0.7305832147937411, | |
| "grad_norm": 0.5894652009010315, | |
| "learning_rate": 2.99860101237881e-06, | |
| "loss": 1.3007, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 0.7311522048364154, | |
| "grad_norm": 0.5720746517181396, | |
| "learning_rate": 2.9436012381803156e-06, | |
| "loss": 1.5254, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.7317211948790896, | |
| "grad_norm": 0.6133726239204407, | |
| "learning_rate": 2.8891030063719183e-06, | |
| "loss": 1.6029, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 0.7322901849217639, | |
| "grad_norm": 0.6293920874595642, | |
| "learning_rate": 2.8351065985751766e-06, | |
| "loss": 1.5918, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 0.7328591749644381, | |
| "grad_norm": 0.5941974520683289, | |
| "learning_rate": 2.7816122938184255e-06, | |
| "loss": 1.43, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.7334281650071124, | |
| "grad_norm": 0.5790094137191772, | |
| "learning_rate": 2.7286203685354063e-06, | |
| "loss": 1.4635, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 0.7339971550497866, | |
| "grad_norm": 0.593591570854187, | |
| "learning_rate": 2.6761310965637833e-06, | |
| "loss": 1.554, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.7345661450924609, | |
| "grad_norm": 0.6287367939949036, | |
| "learning_rate": 2.62414474914372e-06, | |
| "loss": 1.2736, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 0.7351351351351352, | |
| "grad_norm": 0.586243748664856, | |
| "learning_rate": 2.5726615949165254e-06, | |
| "loss": 1.6281, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 0.7357041251778094, | |
| "grad_norm": 0.6094790697097778, | |
| "learning_rate": 2.5216818999232117e-06, | |
| "loss": 1.4495, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 0.7362731152204837, | |
| "grad_norm": 0.5789740681648254, | |
| "learning_rate": 2.4712059276031816e-06, | |
| "loss": 1.6063, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 0.7368421052631579, | |
| "grad_norm": 0.5999897122383118, | |
| "learning_rate": 2.421233938792811e-06, | |
| "loss": 1.3805, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.7374110953058322, | |
| "grad_norm": 0.5815314054489136, | |
| "learning_rate": 2.3717661917241117e-06, | |
| "loss": 1.4289, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 0.7379800853485065, | |
| "grad_norm": 0.5862295031547546, | |
| "learning_rate": 2.322802942023461e-06, | |
| "loss": 1.4672, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 0.7385490753911806, | |
| "grad_norm": 0.5870852470397949, | |
| "learning_rate": 2.2743444427101525e-06, | |
| "loss": 1.5368, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 0.7391180654338549, | |
| "grad_norm": 0.5981742143630981, | |
| "learning_rate": 2.2263909441952226e-06, | |
| "loss": 1.4996, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 0.7396870554765291, | |
| "grad_norm": 0.6381643414497375, | |
| "learning_rate": 2.178942694280095e-06, | |
| "loss": 1.4773, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.7402560455192034, | |
| "grad_norm": 0.5861015915870667, | |
| "learning_rate": 2.1319999381552604e-06, | |
| "loss": 1.3885, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 0.7408250355618776, | |
| "grad_norm": 0.5836819410324097, | |
| "learning_rate": 2.0855629183990867e-06, | |
| "loss": 1.4594, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.7413940256045519, | |
| "grad_norm": 0.57367342710495, | |
| "learning_rate": 2.039631874976533e-06, | |
| "loss": 1.5536, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 0.7419630156472262, | |
| "grad_norm": 0.6635316014289856, | |
| "learning_rate": 1.9942070452378836e-06, | |
| "loss": 1.3837, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 0.7425320056899004, | |
| "grad_norm": 0.6087902784347534, | |
| "learning_rate": 1.9492886639175922e-06, | |
| "loss": 1.5232, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.7431009957325747, | |
| "grad_norm": 0.5868977308273315, | |
| "learning_rate": 1.9048769631329399e-06, | |
| "loss": 1.5394, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 0.7436699857752489, | |
| "grad_norm": 0.5506064891815186, | |
| "learning_rate": 1.8609721723830132e-06, | |
| "loss": 1.3222, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 0.7442389758179232, | |
| "grad_norm": 0.6256322860717773, | |
| "learning_rate": 1.8175745185473714e-06, | |
| "loss": 1.6126, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 0.7448079658605974, | |
| "grad_norm": 0.6551912426948547, | |
| "learning_rate": 1.774684225884904e-06, | |
| "loss": 1.6678, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 0.7453769559032717, | |
| "grad_norm": 0.6695655584335327, | |
| "learning_rate": 1.7323015160327638e-06, | |
| "loss": 1.4653, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.745945945945946, | |
| "grad_norm": 0.5701092481613159, | |
| "learning_rate": 1.690426608005069e-06, | |
| "loss": 1.4619, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 0.7465149359886202, | |
| "grad_norm": 0.5779337882995605, | |
| "learning_rate": 1.6490597181919254e-06, | |
| "loss": 1.3819, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.7470839260312945, | |
| "grad_norm": 0.6129727959632874, | |
| "learning_rate": 1.6082010603582053e-06, | |
| "loss": 1.5916, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 0.7476529160739687, | |
| "grad_norm": 0.627040445804596, | |
| "learning_rate": 1.567850845642449e-06, | |
| "loss": 1.4437, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 0.748221906116643, | |
| "grad_norm": 0.5893445611000061, | |
| "learning_rate": 1.5280092825558645e-06, | |
| "loss": 1.4348, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.7487908961593173, | |
| "grad_norm": 0.5637263059616089, | |
| "learning_rate": 1.4886765769811072e-06, | |
| "loss": 1.4235, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 0.7493598862019915, | |
| "grad_norm": 0.5983802080154419, | |
| "learning_rate": 1.4498529321713584e-06, | |
| "loss": 1.5322, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 0.7499288762446658, | |
| "grad_norm": 0.6314471364021301, | |
| "learning_rate": 1.4115385487491583e-06, | |
| "loss": 1.511, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 0.7504978662873399, | |
| "grad_norm": 0.6288767457008362, | |
| "learning_rate": 1.3737336247054644e-06, | |
| "loss": 1.5245, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 0.7510668563300142, | |
| "grad_norm": 0.5633329153060913, | |
| "learning_rate": 1.3364383553985726e-06, | |
| "loss": 1.5002, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.7516358463726884, | |
| "grad_norm": 0.5696760416030884, | |
| "learning_rate": 1.2996529335530749e-06, | |
| "loss": 1.4833, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 0.7522048364153627, | |
| "grad_norm": 0.6070805191993713, | |
| "learning_rate": 1.2633775492589816e-06, | |
| "loss": 1.3631, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 0.752773826458037, | |
| "grad_norm": 0.6544724702835083, | |
| "learning_rate": 1.2276123899706227e-06, | |
| "loss": 1.3451, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 0.7533428165007112, | |
| "grad_norm": 0.5900003910064697, | |
| "learning_rate": 1.1923576405057258e-06, | |
| "loss": 1.4344, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 0.7539118065433855, | |
| "grad_norm": 0.5852407217025757, | |
| "learning_rate": 1.1576134830444619e-06, | |
| "loss": 1.3403, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.7544807965860597, | |
| "grad_norm": 0.557827353477478, | |
| "learning_rate": 1.1233800971285013e-06, | |
| "loss": 1.2692, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 0.755049786628734, | |
| "grad_norm": 0.5728473663330078, | |
| "learning_rate": 1.0896576596600705e-06, | |
| "loss": 1.2999, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 0.7556187766714082, | |
| "grad_norm": 0.6241177320480347, | |
| "learning_rate": 1.0564463449010852e-06, | |
| "loss": 1.3683, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 0.7561877667140825, | |
| "grad_norm": 0.5648365020751953, | |
| "learning_rate": 1.0237463244721747e-06, | |
| "loss": 1.4297, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 0.7567567567567568, | |
| "grad_norm": 0.590801477432251, | |
| "learning_rate": 9.915577673518695e-07, | |
| "loss": 1.5818, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.757325746799431, | |
| "grad_norm": 0.6130539178848267, | |
| "learning_rate": 9.59880839875682e-07, | |
| "loss": 1.2326, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 0.7578947368421053, | |
| "grad_norm": 0.5840870141983032, | |
| "learning_rate": 9.287157057352502e-07, | |
| "loss": 1.4967, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 0.7584637268847795, | |
| "grad_norm": 0.5912277102470398, | |
| "learning_rate": 8.980625259775277e-07, | |
| "loss": 1.5008, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 0.7590327169274538, | |
| "grad_norm": 0.5769196152687073, | |
| "learning_rate": 8.679214590039064e-07, | |
| "loss": 1.29, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 0.7596017069701281, | |
| "grad_norm": 0.6438156366348267, | |
| "learning_rate": 8.382926605694064e-07, | |
| "loss": 1.4775, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.7601706970128023, | |
| "grad_norm": 0.5996107459068298, | |
| "learning_rate": 8.091762837819094e-07, | |
| "loss": 1.5507, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 0.7607396870554766, | |
| "grad_norm": 0.6058631539344788, | |
| "learning_rate": 7.80572479101327e-07, | |
| "loss": 1.4726, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 0.7613086770981508, | |
| "grad_norm": 0.5997673273086548, | |
| "learning_rate": 7.524813943388331e-07, | |
| "loss": 1.5108, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 0.761877667140825, | |
| "grad_norm": 0.5843853950500488, | |
| "learning_rate": 7.249031746561108e-07, | |
| "loss": 1.4569, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 0.7624466571834992, | |
| "grad_norm": 0.5930002927780151, | |
| "learning_rate": 6.978379625645959e-07, | |
| "loss": 1.4858, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.7630156472261735, | |
| "grad_norm": 0.5973467826843262, | |
| "learning_rate": 6.712858979247116e-07, | |
| "loss": 1.5819, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 0.7635846372688478, | |
| "grad_norm": 0.601449191570282, | |
| "learning_rate": 6.452471179452135e-07, | |
| "loss": 1.6227, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 0.764153627311522, | |
| "grad_norm": 0.5814241170883179, | |
| "learning_rate": 6.197217571824232e-07, | |
| "loss": 1.3806, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 0.7647226173541963, | |
| "grad_norm": 0.5642287731170654, | |
| "learning_rate": 5.947099475395402e-07, | |
| "loss": 1.1583, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.7652916073968705, | |
| "grad_norm": 0.5667275190353394, | |
| "learning_rate": 5.702118182659866e-07, | |
| "loss": 1.5422, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.7658605974395448, | |
| "grad_norm": 0.5716063976287842, | |
| "learning_rate": 5.462274959567193e-07, | |
| "loss": 1.4454, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 0.766429587482219, | |
| "grad_norm": 0.5906545519828796, | |
| "learning_rate": 5.227571045515633e-07, | |
| "loss": 1.4336, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 0.7669985775248933, | |
| "grad_norm": 0.5766403079032898, | |
| "learning_rate": 4.998007653346126e-07, | |
| "loss": 1.3452, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 0.7675675675675676, | |
| "grad_norm": 0.5666573643684387, | |
| "learning_rate": 4.773585969335636e-07, | |
| "loss": 1.4239, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 0.7681365576102418, | |
| "grad_norm": 0.586741030216217, | |
| "learning_rate": 4.554307153191273e-07, | |
| "loss": 1.4837, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.7687055476529161, | |
| "grad_norm": 0.5627419948577881, | |
| "learning_rate": 4.340172338043846e-07, | |
| "loss": 1.1588, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 0.7692745376955903, | |
| "grad_norm": 0.6001150608062744, | |
| "learning_rate": 4.131182630442876e-07, | |
| "loss": 1.6122, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 0.7698435277382646, | |
| "grad_norm": 0.6114206314086914, | |
| "learning_rate": 3.9273391103499257e-07, | |
| "loss": 1.493, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 0.7704125177809389, | |
| "grad_norm": 0.5789377689361572, | |
| "learning_rate": 3.728642831133833e-07, | |
| "loss": 1.4742, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 0.7709815078236131, | |
| "grad_norm": 0.6265093684196472, | |
| "learning_rate": 3.5350948195645993e-07, | |
| "loss": 1.4284, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.7715504978662874, | |
| "grad_norm": 0.6009383201599121, | |
| "learning_rate": 3.3466960758082867e-07, | |
| "loss": 1.3373, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 0.7721194879089616, | |
| "grad_norm": 0.5908554196357727, | |
| "learning_rate": 3.163447573422351e-07, | |
| "loss": 1.4147, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 0.7726884779516359, | |
| "grad_norm": 0.614896297454834, | |
| "learning_rate": 2.985350259349762e-07, | |
| "loss": 1.3802, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 0.77325746799431, | |
| "grad_norm": 0.6063140630722046, | |
| "learning_rate": 2.812405053914891e-07, | |
| "loss": 1.66, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 0.7738264580369844, | |
| "grad_norm": 0.5757827758789062, | |
| "learning_rate": 2.644612850818073e-07, | |
| "loss": 1.4361, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.7743954480796587, | |
| "grad_norm": 0.5920888185501099, | |
| "learning_rate": 2.481974517131502e-07, | |
| "loss": 1.3681, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 0.7749644381223328, | |
| "grad_norm": 0.5669330358505249, | |
| "learning_rate": 2.324490893294673e-07, | |
| "loss": 1.5391, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 0.7755334281650071, | |
| "grad_norm": 0.5745005011558533, | |
| "learning_rate": 2.172162793109611e-07, | |
| "loss": 1.3118, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 0.7761024182076813, | |
| "grad_norm": 0.5584404468536377, | |
| "learning_rate": 2.0249910037374308e-07, | |
| "loss": 1.4001, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 0.7766714082503556, | |
| "grad_norm": 0.6618481874465942, | |
| "learning_rate": 1.8829762856933387e-07, | |
| "loss": 1.4373, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.7772403982930298, | |
| "grad_norm": 0.5788954496383667, | |
| "learning_rate": 1.7461193728436353e-07, | |
| "loss": 1.5453, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 0.7778093883357041, | |
| "grad_norm": 0.6013731956481934, | |
| "learning_rate": 1.614420972401165e-07, | |
| "loss": 1.5133, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 0.7783783783783784, | |
| "grad_norm": 0.622704029083252, | |
| "learning_rate": 1.4878817649220944e-07, | |
| "loss": 1.4024, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 0.7789473684210526, | |
| "grad_norm": 0.5754665732383728, | |
| "learning_rate": 1.36650240430225e-07, | |
| "loss": 1.2369, | |
| "step": 1369 | |
| }, | |
| { | |
| "epoch": 0.7795163584637269, | |
| "grad_norm": 0.5812003016471863, | |
| "learning_rate": 1.250283517774009e-07, | |
| "loss": 1.5473, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.7800853485064011, | |
| "grad_norm": 0.6443690657615662, | |
| "learning_rate": 1.1392257059023026e-07, | |
| "loss": 1.5182, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 0.7806543385490754, | |
| "grad_norm": 0.5886285901069641, | |
| "learning_rate": 1.0333295425825063e-07, | |
| "loss": 1.6711, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 0.7812233285917497, | |
| "grad_norm": 0.5866546034812927, | |
| "learning_rate": 9.325955750367766e-08, | |
| "loss": 1.326, | |
| "step": 1373 | |
| }, | |
| { | |
| "epoch": 0.7817923186344239, | |
| "grad_norm": 0.5975569486618042, | |
| "learning_rate": 8.370243238113862e-08, | |
| "loss": 1.522, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 0.7823613086770982, | |
| "grad_norm": 0.5648385286331177, | |
| "learning_rate": 7.466162827742817e-08, | |
| "loss": 1.4345, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.7829302987197724, | |
| "grad_norm": 0.6075783371925354, | |
| "learning_rate": 6.61371919112197e-08, | |
| "loss": 1.4661, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 0.7834992887624467, | |
| "grad_norm": 0.6267833113670349, | |
| "learning_rate": 5.812916733284324e-08, | |
| "loss": 1.5151, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 0.7840682788051209, | |
| "grad_norm": 0.5820707082748413, | |
| "learning_rate": 5.063759592404127e-08, | |
| "loss": 1.4866, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 0.7846372688477952, | |
| "grad_norm": 0.5778554677963257, | |
| "learning_rate": 4.366251639777996e-08, | |
| "loss": 1.3435, | |
| "step": 1379 | |
| }, | |
| { | |
| "epoch": 0.7852062588904695, | |
| "grad_norm": 0.5690316557884216, | |
| "learning_rate": 3.720396479803823e-08, | |
| "loss": 1.4896, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.7857752489331437, | |
| "grad_norm": 0.5569249391555786, | |
| "learning_rate": 3.126197449959678e-08, | |
| "loss": 1.358, | |
| "step": 1381 | |
| }, | |
| { | |
| "epoch": 0.786344238975818, | |
| "grad_norm": 0.6170508861541748, | |
| "learning_rate": 2.5836576207916018e-08, | |
| "loss": 1.4965, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 0.7869132290184921, | |
| "grad_norm": 0.5334784388542175, | |
| "learning_rate": 2.092779795892508e-08, | |
| "loss": 1.2593, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 0.7874822190611664, | |
| "grad_norm": 0.6333361864089966, | |
| "learning_rate": 1.6535665118910802e-08, | |
| "loss": 1.3485, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 0.7880512091038406, | |
| "grad_norm": 0.6247609853744507, | |
| "learning_rate": 1.2660200384384536e-08, | |
| "loss": 1.4111, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.7886201991465149, | |
| "grad_norm": 0.5764187574386597, | |
| "learning_rate": 9.301423781926666e-09, | |
| "loss": 1.201, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 0.7891891891891892, | |
| "grad_norm": 0.5923343896865845, | |
| "learning_rate": 6.459352668164442e-09, | |
| "loss": 1.5227, | |
| "step": 1387 | |
| }, | |
| { | |
| "epoch": 0.7897581792318634, | |
| "grad_norm": 0.578344464302063, | |
| "learning_rate": 4.134001729583226e-09, | |
| "loss": 1.3055, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 0.7903271692745377, | |
| "grad_norm": 0.595220148563385, | |
| "learning_rate": 2.3253829825153894e-09, | |
| "loss": 1.5065, | |
| "step": 1389 | |
| }, | |
| { | |
| "epoch": 0.7908961593172119, | |
| "grad_norm": 0.5846388339996338, | |
| "learning_rate": 1.033505773062604e-09, | |
| "loss": 1.5331, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.7914651493598862, | |
| "grad_norm": 0.5945976376533508, | |
| "learning_rate": 2.5837677706253003e-10, | |
| "loss": 1.5147, | |
| "step": 1391 | |
| }, | |
| { | |
| "epoch": 0.7920341394025604, | |
| "grad_norm": 0.6109771728515625, | |
| "learning_rate": 0.0, | |
| "loss": 1.508, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 0.7920341394025604, | |
| "eval_loss": 1.461509108543396, | |
| "eval_runtime": 16.4544, | |
| "eval_samples_per_second": 44.973, | |
| "eval_steps_per_second": 22.486, | |
| "step": 1392 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1392, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 348, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.474815762936627e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |