| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9719154307352477, |
| "eval_steps": 770, |
| "global_step": 770, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0012622278321236984, |
| "grad_norm": 1.1810976266860962, |
| "learning_rate": 0.0, |
| "loss": 2.1786725521087646, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0025244556642473968, |
| "grad_norm": 1.1999785900115967, |
| "learning_rate": 4e-05, |
| "loss": 1.9390826225280762, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.003786683496371095, |
| "grad_norm": 1.2012475728988647, |
| "learning_rate": 8e-05, |
| "loss": 1.841808795928955, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0050489113284947935, |
| "grad_norm": 1.4274017810821533, |
| "learning_rate": 0.00012, |
| "loss": 2.174586772918701, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.006311139160618492, |
| "grad_norm": 0.5815935730934143, |
| "learning_rate": 0.00016, |
| "loss": 1.7276136875152588, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.00757336699274219, |
| "grad_norm": 0.48476865887641907, |
| "learning_rate": 0.0002, |
| "loss": 1.6276743412017822, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.008835594824865888, |
| "grad_norm": 0.5590611696243286, |
| "learning_rate": 0.0001999991567695732, |
| "loss": 1.6253315210342407, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.010097822656989587, |
| "grad_norm": 0.5516509413719177, |
| "learning_rate": 0.00019999662709251355, |
| "loss": 1.457699179649353, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.011360050489113285, |
| "grad_norm": 1.3951493501663208, |
| "learning_rate": 0.00019999241101148306, |
| "loss": 1.448043942451477, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.012622278321236984, |
| "grad_norm": 0.7879750728607178, |
| "learning_rate": 0.0001999865085975843, |
| "loss": 1.127958059310913, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.013884506153360681, |
| "grad_norm": 0.6136755347251892, |
| "learning_rate": 0.00019997891995035912, |
| "loss": 1.29304039478302, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.01514673398548438, |
| "grad_norm": 0.8061326146125793, |
| "learning_rate": 0.0001999696451977872, |
| "loss": 0.9419246912002563, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.016408961817608078, |
| "grad_norm": 0.6488391757011414, |
| "learning_rate": 0.00019995868449628346, |
| "loss": 0.8523351550102234, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.017671189649731776, |
| "grad_norm": 0.9592429399490356, |
| "learning_rate": 0.00019994603803069594, |
| "loss": 0.7415441870689392, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.018933417481855473, |
| "grad_norm": 0.6320379972457886, |
| "learning_rate": 0.0001999317060143023, |
| "loss": 0.9742417335510254, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.020195645313979174, |
| "grad_norm": 0.6976192593574524, |
| "learning_rate": 0.0001999156886888064, |
| "loss": 1.0749256610870361, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.02145787314610287, |
| "grad_norm": 0.6568692922592163, |
| "learning_rate": 0.00019989798632433415, |
| "loss": 0.7685850262641907, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.02272010097822657, |
| "grad_norm": 0.48727890849113464, |
| "learning_rate": 0.00019987859921942903, |
| "loss": 0.5362906455993652, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.023982328810350267, |
| "grad_norm": 0.42397183179855347, |
| "learning_rate": 0.0001998575277010469, |
| "loss": 0.6970788836479187, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.025244556642473968, |
| "grad_norm": 0.4272933602333069, |
| "learning_rate": 0.00019983477212455074, |
| "loss": 0.8377600312232971, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.026506784474597665, |
| "grad_norm": 0.3498779535293579, |
| "learning_rate": 0.00019981033287370443, |
| "loss": 0.7417164444923401, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.027769012306721363, |
| "grad_norm": 0.45754557847976685, |
| "learning_rate": 0.00019978421036066633, |
| "loss": 0.7524069547653198, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.02903124013884506, |
| "grad_norm": 0.406505823135376, |
| "learning_rate": 0.00019975640502598244, |
| "loss": 0.8919811248779297, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.03029346797096876, |
| "grad_norm": 0.3776075839996338, |
| "learning_rate": 0.00019972691733857883, |
| "loss": 0.5425232648849487, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.031555695803092455, |
| "grad_norm": 0.4487985670566559, |
| "learning_rate": 0.00019969574779575376, |
| "loss": 0.5764633417129517, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.032817923635216156, |
| "grad_norm": 0.4203525483608246, |
| "learning_rate": 0.00019966289692316944, |
| "loss": 0.7679987549781799, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.03408015146733986, |
| "grad_norm": 0.36741408705711365, |
| "learning_rate": 0.00019962836527484296, |
| "loss": 0.6128969192504883, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.03534237929946355, |
| "grad_norm": 0.3909834325313568, |
| "learning_rate": 0.00019959215343313703, |
| "loss": 0.6979946494102478, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.03660460713158725, |
| "grad_norm": 0.3810923099517822, |
| "learning_rate": 0.00019955426200875018, |
| "loss": 0.8191502690315247, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.037866834963710946, |
| "grad_norm": 0.4916118085384369, |
| "learning_rate": 0.00019951469164070646, |
| "loss": 0.9299726486206055, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.03912906279583465, |
| "grad_norm": 0.37555935978889465, |
| "learning_rate": 0.00019947344299634464, |
| "loss": 1.0361579656600952, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.04039129062795835, |
| "grad_norm": 0.42949214577674866, |
| "learning_rate": 0.00019943051677130696, |
| "loss": 0.8678889274597168, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.04165351846008204, |
| "grad_norm": 0.41855067014694214, |
| "learning_rate": 0.0001993859136895274, |
| "loss": 0.8316136002540588, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.04291574629220574, |
| "grad_norm": 0.4109402894973755, |
| "learning_rate": 0.00019933963450321945, |
| "loss": 0.6912973523139954, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.044177974124329444, |
| "grad_norm": 0.4073610007762909, |
| "learning_rate": 0.0001992916799928635, |
| "loss": 0.9194254875183105, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.04544020195645314, |
| "grad_norm": 0.4720235764980316, |
| "learning_rate": 0.0001992420509671936, |
| "loss": 0.7957297563552856, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.04670242978857684, |
| "grad_norm": 0.3987046182155609, |
| "learning_rate": 0.0001991907482631838, |
| "loss": 0.6258067488670349, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.04796465762070053, |
| "grad_norm": 0.4448748528957367, |
| "learning_rate": 0.00019913777274603418, |
| "loss": 1.003873348236084, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.049226885452824234, |
| "grad_norm": 0.4538639783859253, |
| "learning_rate": 0.00019908312530915603, |
| "loss": 0.8705529570579529, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.050489113284947935, |
| "grad_norm": 3.1903927326202393, |
| "learning_rate": 0.00019902680687415705, |
| "loss": 0.5736751556396484, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.05175134111707163, |
| "grad_norm": 0.34906044602394104, |
| "learning_rate": 0.00019896881839082556, |
| "loss": 0.6542955636978149, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.05301356894919533, |
| "grad_norm": 3.0380051136016846, |
| "learning_rate": 0.0001989091608371146, |
| "loss": 0.9085805416107178, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.05427579678131903, |
| "grad_norm": 0.3339233696460724, |
| "learning_rate": 0.00019884783521912554, |
| "loss": 0.4547462463378906, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.055538024613442726, |
| "grad_norm": 0.38581445813179016, |
| "learning_rate": 0.00019878484257109083, |
| "loss": 0.5983158349990845, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.056800252445566426, |
| "grad_norm": 0.3721480071544647, |
| "learning_rate": 0.0001987201839553569, |
| "loss": 0.8342102766036987, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.05806248027769012, |
| "grad_norm": 0.4079038202762604, |
| "learning_rate": 0.00019865386046236596, |
| "loss": 0.854637861251831, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.05932470810981382, |
| "grad_norm": 0.33452996611595154, |
| "learning_rate": 0.00019858587321063776, |
| "loss": 0.48024851083755493, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.06058693594193752, |
| "grad_norm": 0.35006284713745117, |
| "learning_rate": 0.00019851622334675066, |
| "loss": 0.7163654565811157, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.06184916377406122, |
| "grad_norm": 0.41123610734939575, |
| "learning_rate": 0.00019844491204532236, |
| "loss": 0.4998229742050171, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.06311139160618491, |
| "grad_norm": 0.3749666213989258, |
| "learning_rate": 0.0001983719405089901, |
| "loss": 0.48700374364852905, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.06437361943830862, |
| "grad_norm": 0.41837647557258606, |
| "learning_rate": 0.0001982973099683902, |
| "loss": 1.0134358406066895, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.06563584727043231, |
| "grad_norm": 0.3964208960533142, |
| "learning_rate": 0.00019822102168213753, |
| "loss": 0.8818788528442383, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.066898075102556, |
| "grad_norm": 0.4097653925418854, |
| "learning_rate": 0.0001981430769368042, |
| "loss": 0.6342326998710632, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.06816030293467971, |
| "grad_norm": 0.3813578188419342, |
| "learning_rate": 0.00019806347704689778, |
| "loss": 0.6181271076202393, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.06942253076680341, |
| "grad_norm": 0.36281293630599976, |
| "learning_rate": 0.00019798222335483932, |
| "loss": 0.9839555025100708, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.0706847585989271, |
| "grad_norm": 0.4149906039237976, |
| "learning_rate": 0.00019789931723094046, |
| "loss": 0.6778839826583862, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.07194698643105081, |
| "grad_norm": 0.3341962993144989, |
| "learning_rate": 0.00019781476007338058, |
| "loss": 0.47752535343170166, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.0732092142631745, |
| "grad_norm": 0.3859621286392212, |
| "learning_rate": 0.000197728553308183, |
| "loss": 0.8040428161621094, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.0744714420952982, |
| "grad_norm": 0.4537695348262787, |
| "learning_rate": 0.0001976406983891911, |
| "loss": 0.5346378684043884, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.07573366992742189, |
| "grad_norm": 0.39911121129989624, |
| "learning_rate": 0.00019755119679804367, |
| "loss": 0.8945479989051819, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0769958977595456, |
| "grad_norm": 0.3326367437839508, |
| "learning_rate": 0.00019746005004415005, |
| "loss": 0.40628719329833984, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.0782581255916693, |
| "grad_norm": 0.3570570945739746, |
| "learning_rate": 0.0001973672596646645, |
| "loss": 0.4461412727832794, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.07952035342379299, |
| "grad_norm": 0.46154263615608215, |
| "learning_rate": 0.00019727282722446047, |
| "loss": 0.8460710048675537, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.0807825812559167, |
| "grad_norm": 0.3912942111492157, |
| "learning_rate": 0.00019717675431610415, |
| "loss": 0.855891764163971, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.08204480908804039, |
| "grad_norm": 0.39667049050331116, |
| "learning_rate": 0.00019707904255982745, |
| "loss": 0.7594934105873108, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.08330703692016408, |
| "grad_norm": 0.37858495116233826, |
| "learning_rate": 0.00019697969360350098, |
| "loss": 0.8552739024162292, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.08456926475228779, |
| "grad_norm": 0.3944226801395416, |
| "learning_rate": 0.0001968787091226059, |
| "loss": 0.6596317291259766, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.08583149258441149, |
| "grad_norm": 0.4035973846912384, |
| "learning_rate": 0.00019677609082020597, |
| "loss": 0.7658134698867798, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.08709372041653518, |
| "grad_norm": 0.3967765271663666, |
| "learning_rate": 0.00019667184042691875, |
| "loss": 0.768731951713562, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.08835594824865889, |
| "grad_norm": 0.40382981300354004, |
| "learning_rate": 0.00019656595970088628, |
| "loss": 0.689699649810791, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.08961817608078258, |
| "grad_norm": 0.3337244391441345, |
| "learning_rate": 0.00019645845042774553, |
| "loss": 0.33471691608428955, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.09088040391290628, |
| "grad_norm": 0.32900235056877136, |
| "learning_rate": 0.00019634931442059832, |
| "loss": 0.8053317070007324, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.09214263174502998, |
| "grad_norm": 0.33187833428382874, |
| "learning_rate": 0.00019623855351998072, |
| "loss": 0.4668503999710083, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.09340485957715368, |
| "grad_norm": 0.4185413420200348, |
| "learning_rate": 0.0001961261695938319, |
| "loss": 0.7394185066223145, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.09466708740927737, |
| "grad_norm": 0.3454440236091614, |
| "learning_rate": 0.00019601216453746283, |
| "loss": 0.5356079339981079, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.09592931524140107, |
| "grad_norm": 0.36690330505371094, |
| "learning_rate": 0.00019589654027352414, |
| "loss": 0.496408611536026, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.09719154307352477, |
| "grad_norm": 1.212344765663147, |
| "learning_rate": 0.00019577929875197377, |
| "loss": 1.0225098133087158, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.09845377090564847, |
| "grad_norm": 0.43937745690345764, |
| "learning_rate": 0.0001956604419500441, |
| "loss": 0.7864935398101807, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.09971599873777216, |
| "grad_norm": 0.37690651416778564, |
| "learning_rate": 0.00019553997187220855, |
| "loss": 0.4752700924873352, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.10097822656989587, |
| "grad_norm": 0.34280529618263245, |
| "learning_rate": 0.00019541789055014784, |
| "loss": 0.5001055002212524, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.10224045440201956, |
| "grad_norm": 0.37480127811431885, |
| "learning_rate": 0.00019529420004271567, |
| "loss": 0.6418332457542419, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.10350268223414326, |
| "grad_norm": 0.3891831338405609, |
| "learning_rate": 0.000195168902435904, |
| "loss": 0.8710986375808716, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.10476491006626697, |
| "grad_norm": 0.3586503565311432, |
| "learning_rate": 0.00019504199984280799, |
| "loss": 0.6337010860443115, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.10602713789839066, |
| "grad_norm": 0.36571335792541504, |
| "learning_rate": 0.00019491349440359015, |
| "loss": 0.7422975301742554, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.10728936573051435, |
| "grad_norm": 0.39639922976493835, |
| "learning_rate": 0.00019478338828544435, |
| "loss": 0.8967505097389221, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.10855159356263806, |
| "grad_norm": 0.409046471118927, |
| "learning_rate": 0.00019465168368255946, |
| "loss": 0.6384124159812927, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.10981382139476176, |
| "grad_norm": 0.40344712138175964, |
| "learning_rate": 0.00019451838281608197, |
| "loss": 0.8778766393661499, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.11107604922688545, |
| "grad_norm": 0.32860085368156433, |
| "learning_rate": 0.00019438348793407881, |
| "loss": 0.4792889654636383, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.11233827705900915, |
| "grad_norm": 0.39201056957244873, |
| "learning_rate": 0.0001942470013114994, |
| "loss": 0.7574765086174011, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.11360050489113285, |
| "grad_norm": 0.3348289728164673, |
| "learning_rate": 0.0001941089252501372, |
| "loss": 0.9156350493431091, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.11486273272325655, |
| "grad_norm": 0.40806034207344055, |
| "learning_rate": 0.00019396926207859084, |
| "loss": 0.5706713795661926, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.11612496055538024, |
| "grad_norm": 0.4064014256000519, |
| "learning_rate": 0.00019382801415222516, |
| "loss": 0.697914719581604, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.11738718838750395, |
| "grad_norm": 0.3701585829257965, |
| "learning_rate": 0.00019368518385313107, |
| "loss": 0.5228875279426575, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.11864941621962764, |
| "grad_norm": 0.4085630476474762, |
| "learning_rate": 0.0001935407735900857, |
| "loss": 0.5461081266403198, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.11991164405175134, |
| "grad_norm": 0.42529523372650146, |
| "learning_rate": 0.00019339478579851155, |
| "loss": 0.7004275918006897, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.12117387188387505, |
| "grad_norm": 0.3296562731266022, |
| "learning_rate": 0.00019324722294043558, |
| "loss": 0.728748619556427, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.12243609971599874, |
| "grad_norm": 0.35158950090408325, |
| "learning_rate": 0.0001930980875044477, |
| "loss": 0.4642578959465027, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.12369832754812243, |
| "grad_norm": 0.3580923080444336, |
| "learning_rate": 0.00019294738200565856, |
| "loss": 0.6952727437019348, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.12496055538024614, |
| "grad_norm": 0.3877851963043213, |
| "learning_rate": 0.0001927951089856575, |
| "loss": 0.9369809031486511, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.12622278321236982, |
| "grad_norm": 0.35963308811187744, |
| "learning_rate": 0.0001926412710124693, |
| "loss": 0.8294747471809387, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.12748501104449353, |
| "grad_norm": 0.3461640179157257, |
| "learning_rate": 0.0001924858706805112, |
| "loss": 0.5015355348587036, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.12874723887661724, |
| "grad_norm": 0.41662901639938354, |
| "learning_rate": 0.00019232891061054895, |
| "loss": 0.613286018371582, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.13000946670874092, |
| "grad_norm": 0.39659371972084045, |
| "learning_rate": 0.0001921703934496527, |
| "loss": 0.7263169884681702, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.13127169454086463, |
| "grad_norm": 0.3626038134098053, |
| "learning_rate": 0.00019201032187115234, |
| "loss": 0.5920513272285461, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.13253392237298833, |
| "grad_norm": 0.25446978211402893, |
| "learning_rate": 0.00019184869857459232, |
| "loss": 0.20390769839286804, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.133796150205112, |
| "grad_norm": 0.3908882439136505, |
| "learning_rate": 0.00019168552628568631, |
| "loss": 0.911649763584137, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.13505837803723572, |
| "grad_norm": 0.5168955326080322, |
| "learning_rate": 0.00019152080775627103, |
| "loss": 0.783044159412384, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.13632060586935943, |
| "grad_norm": 0.32102423906326294, |
| "learning_rate": 0.0001913545457642601, |
| "loss": 0.284521222114563, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.1375828337014831, |
| "grad_norm": 0.41527506709098816, |
| "learning_rate": 0.00019118674311359684, |
| "loss": 0.690119206905365, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.13884506153360682, |
| "grad_norm": 0.3743795156478882, |
| "learning_rate": 0.0001910174026342073, |
| "loss": 0.8299716711044312, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.14010728936573053, |
| "grad_norm": 0.4144361615180969, |
| "learning_rate": 0.00019084652718195238, |
| "loss": 0.7170496582984924, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.1413695171978542, |
| "grad_norm": 0.3862667679786682, |
| "learning_rate": 0.00019067411963857967, |
| "loss": 0.6340428590774536, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.1426317450299779, |
| "grad_norm": 0.41245025396347046, |
| "learning_rate": 0.0001905001829116749, |
| "loss": 0.644637405872345, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.14389397286210162, |
| "grad_norm": 0.34236887097358704, |
| "learning_rate": 0.0001903247199346129, |
| "loss": 0.5065594911575317, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.1451562006942253, |
| "grad_norm": 0.406076043844223, |
| "learning_rate": 0.00019014773366650807, |
| "loss": 0.8917930126190186, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.146418428526349, |
| "grad_norm": 0.3787905275821686, |
| "learning_rate": 0.00018996922709216455, |
| "loss": 0.8648253083229065, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.14768065635847272, |
| "grad_norm": 0.3749518096446991, |
| "learning_rate": 0.00018978920322202582, |
| "loss": 0.6751912832260132, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.1489428841905964, |
| "grad_norm": 0.32289671897888184, |
| "learning_rate": 0.000189607665092124, |
| "loss": 0.5505026578903198, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.1502051120227201, |
| "grad_norm": 0.3582629859447479, |
| "learning_rate": 0.00018942461576402857, |
| "loss": 0.6920587420463562, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.15146733985484379, |
| "grad_norm": 0.3632330596446991, |
| "learning_rate": 0.00018924005832479478, |
| "loss": 0.6031773090362549, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1527295676869675, |
| "grad_norm": 0.40739816427230835, |
| "learning_rate": 0.00018905399588691163, |
| "loss": 0.8041491508483887, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.1539917955190912, |
| "grad_norm": 0.35906773805618286, |
| "learning_rate": 0.0001888664315882493, |
| "loss": 0.851598858833313, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.15525402335121488, |
| "grad_norm": 0.29666247963905334, |
| "learning_rate": 0.0001886773685920062, |
| "loss": 0.46212196350097656, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.1565162511833386, |
| "grad_norm": 0.3250925540924072, |
| "learning_rate": 0.00018848681008665582, |
| "loss": 0.4569106101989746, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.1577784790154623, |
| "grad_norm": 0.36993423104286194, |
| "learning_rate": 0.00018829475928589271, |
| "loss": 0.6663421988487244, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.15904070684758598, |
| "grad_norm": 0.3611743152141571, |
| "learning_rate": 0.00018810121942857845, |
| "loss": 0.7817614674568176, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.16030293467970969, |
| "grad_norm": 0.370026558637619, |
| "learning_rate": 0.00018790619377868703, |
| "loss": 0.47573864459991455, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.1615651625118334, |
| "grad_norm": 0.32366666197776794, |
| "learning_rate": 0.0001877096856252496, |
| "loss": 0.5783149003982544, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.16282739034395707, |
| "grad_norm": 0.3249809741973877, |
| "learning_rate": 0.00018751169828229927, |
| "loss": 0.46492838859558105, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.16408961817608078, |
| "grad_norm": 0.41037416458129883, |
| "learning_rate": 0.0001873122350888151, |
| "loss": 0.796636164188385, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.1653518460082045, |
| "grad_norm": 0.313863605260849, |
| "learning_rate": 0.00018711129940866575, |
| "loss": 0.38488903641700745, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.16661407384032817, |
| "grad_norm": 0.36502766609191895, |
| "learning_rate": 0.00018690889463055283, |
| "loss": 0.7027624249458313, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.16787630167245188, |
| "grad_norm": 0.348656564950943, |
| "learning_rate": 0.00018670502416795367, |
| "loss": 0.8470883369445801, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.16913852950457559, |
| "grad_norm": 0.35909080505371094, |
| "learning_rate": 0.0001864996914590638, |
| "loss": 0.661641001701355, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.17040075733669927, |
| "grad_norm": 0.38659459352493286, |
| "learning_rate": 0.00018629289996673897, |
| "loss": 0.694800853729248, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.17166298516882297, |
| "grad_norm": 0.366533100605011, |
| "learning_rate": 0.00018608465317843678, |
| "loss": 0.9004327654838562, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.17292521300094668, |
| "grad_norm": 0.42530369758605957, |
| "learning_rate": 0.00018587495460615778, |
| "loss": 0.9930410385131836, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.17418744083307036, |
| "grad_norm": 0.38337844610214233, |
| "learning_rate": 0.00018566380778638628, |
| "loss": 0.621214747428894, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.17544966866519407, |
| "grad_norm": 0.3821134567260742, |
| "learning_rate": 0.00018545121628003077, |
| "loss": 0.8524945974349976, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.17671189649731778, |
| "grad_norm": 0.6962800621986389, |
| "learning_rate": 0.0001852371836723638, |
| "loss": 0.490077942609787, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.17797412432944146, |
| "grad_norm": 0.40078434348106384, |
| "learning_rate": 0.00018502171357296144, |
| "loss": 0.7751069664955139, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.17923635216156517, |
| "grad_norm": 0.3736267685890198, |
| "learning_rate": 0.0001848048096156426, |
| "loss": 0.5479488968849182, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.18049857999368887, |
| "grad_norm": 0.3780677914619446, |
| "learning_rate": 0.00018458647545840763, |
| "loss": 0.6310573220252991, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.18176080782581255, |
| "grad_norm": 0.3293318748474121, |
| "learning_rate": 0.00018436671478337666, |
| "loss": 0.4275631010532379, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.18302303565793626, |
| "grad_norm": 0.3664384186267853, |
| "learning_rate": 0.00018414553129672732, |
| "loss": 0.4785746932029724, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.18428526349005997, |
| "grad_norm": 0.3737381100654602, |
| "learning_rate": 0.00018392292872863267, |
| "loss": 0.5807976722717285, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.18554749132218365, |
| "grad_norm": 0.40464866161346436, |
| "learning_rate": 0.00018369891083319778, |
| "loss": 0.673311710357666, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.18680971915430736, |
| "grad_norm": 0.4158247411251068, |
| "learning_rate": 0.00018347348138839683, |
| "loss": 0.5220749974250793, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.18807194698643104, |
| "grad_norm": 0.332676500082016, |
| "learning_rate": 0.0001832466441960091, |
| "loss": 0.42914730310440063, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.18933417481855475, |
| "grad_norm": 0.3765426278114319, |
| "learning_rate": 0.00018301840308155507, |
| "loss": 0.5210474729537964, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.19059640265067845, |
| "grad_norm": 0.3598466217517853, |
| "learning_rate": 0.00018278876189423179, |
| "loss": 1.0533007383346558, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.19185863048280213, |
| "grad_norm": 0.5936484932899475, |
| "learning_rate": 0.00018255772450684798, |
| "loss": 0.8764799237251282, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.19312085831492584, |
| "grad_norm": 0.37642624974250793, |
| "learning_rate": 0.00018232529481575872, |
| "loss": 0.46875783801078796, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.19438308614704955, |
| "grad_norm": 0.36098363995552063, |
| "learning_rate": 0.00018209147674079983, |
| "loss": 0.6464822292327881, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.19564531397917323, |
| "grad_norm": 0.39462804794311523, |
| "learning_rate": 0.00018185627422522148, |
| "loss": 0.7827063798904419, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.19690754181129694, |
| "grad_norm": 0.36141112446784973, |
| "learning_rate": 0.0001816196912356222, |
| "loss": 0.9432686567306519, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.19816976964342065, |
| "grad_norm": 0.3857667148113251, |
| "learning_rate": 0.00018138173176188133, |
| "loss": 0.8610580563545227, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.19943199747554433, |
| "grad_norm": 0.35036033391952515, |
| "learning_rate": 0.00018114239981709232, |
| "loss": 0.7541987299919128, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.20069422530766803, |
| "grad_norm": 0.3643214702606201, |
| "learning_rate": 0.00018090169943749476, |
| "loss": 0.5373222827911377, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.20195645313979174, |
| "grad_norm": 0.3778736889362335, |
| "learning_rate": 0.00018065963468240625, |
| "loss": 0.5798829197883606, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.20321868097191542, |
| "grad_norm": 0.3862821161746979, |
| "learning_rate": 0.00018041620963415417, |
| "loss": 0.8069719672203064, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.20448090880403913, |
| "grad_norm": 0.36028918623924255, |
| "learning_rate": 0.00018017142839800668, |
| "loss": 0.7396454811096191, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.20574313663616284, |
| "grad_norm": 0.3179962635040283, |
| "learning_rate": 0.00017992529510210348, |
| "loss": 0.4463472366333008, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.20700536446828652, |
| "grad_norm": 0.3768749237060547, |
| "learning_rate": 0.00017967781389738625, |
| "loss": 0.6056400537490845, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.20826759230041023, |
| "grad_norm": 0.3443696200847626, |
| "learning_rate": 0.0001794289889575286, |
| "loss": 0.6053676009178162, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.20952982013253393, |
| "grad_norm": 0.40036582946777344, |
| "learning_rate": 0.00017917882447886582, |
| "loss": 0.669062077999115, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.21079204796465761, |
| "grad_norm": 0.373081773519516, |
| "learning_rate": 0.00017892732468032386, |
| "loss": 0.6552575826644897, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.21205427579678132, |
| "grad_norm": 0.3748333752155304, |
| "learning_rate": 0.00017867449380334834, |
| "loss": 0.7766703963279724, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.21331650362890503, |
| "grad_norm": 0.3774300813674927, |
| "learning_rate": 0.00017842033611183307, |
| "loss": 0.425309419631958, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.2145787314610287, |
| "grad_norm": 0.3346552848815918, |
| "learning_rate": 0.00017816485589204801, |
| "loss": 0.39386531710624695, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.21584095929315242, |
| "grad_norm": 0.37330710887908936, |
| "learning_rate": 0.00017790805745256704, |
| "loss": 0.8232768774032593, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.21710318712527613, |
| "grad_norm": 0.39691922068595886, |
| "learning_rate": 0.00017764994512419534, |
| "loss": 0.6968734264373779, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.2183654149573998, |
| "grad_norm": 0.39556068181991577, |
| "learning_rate": 0.0001773905232598963, |
| "loss": 0.6288269758224487, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.21962764278952351, |
| "grad_norm": 0.3653506338596344, |
| "learning_rate": 0.00017712979623471807, |
| "loss": 0.6284940838813782, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.2208898706216472, |
| "grad_norm": 0.390316367149353, |
| "learning_rate": 0.00017686776844571988, |
| "loss": 0.7067583799362183, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.2221520984537709, |
| "grad_norm": 0.3740655481815338, |
| "learning_rate": 0.0001766044443118978, |
| "loss": 0.5908397436141968, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.2234143262858946, |
| "grad_norm": 0.3652481138706207, |
| "learning_rate": 0.00017633982827411032, |
| "loss": 0.5462816953659058, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.2246765541180183, |
| "grad_norm": 0.32050153613090515, |
| "learning_rate": 0.00017607392479500325, |
| "loss": 0.46369433403015137, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.225938781950142, |
| "grad_norm": 0.3392358720302582, |
| "learning_rate": 0.00017580673835893473, |
| "loss": 0.6735156774520874, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.2272010097822657, |
| "grad_norm": 0.3717758059501648, |
| "learning_rate": 0.00017553827347189938, |
| "loss": 0.9343303442001343, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.2284632376143894, |
| "grad_norm": 0.3827629089355469, |
| "learning_rate": 0.00017526853466145244, |
| "loss": 0.7392931580543518, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.2297254654465131, |
| "grad_norm": 0.39305350184440613, |
| "learning_rate": 0.0001749975264766334, |
| "loss": 0.9212709665298462, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.2309876932786368, |
| "grad_norm": 0.4486978352069855, |
| "learning_rate": 0.0001747252534878891, |
| "loss": 0.5881640315055847, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.23224992111076048, |
| "grad_norm": 0.31108546257019043, |
| "learning_rate": 0.000174451720286997, |
| "loss": 0.3923819959163666, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.2335121489428842, |
| "grad_norm": 0.3748640716075897, |
| "learning_rate": 0.00017417693148698743, |
| "loss": 0.7098450064659119, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.2347743767750079, |
| "grad_norm": 0.3929251730442047, |
| "learning_rate": 0.00017390089172206592, |
| "loss": 0.6599665880203247, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.23603660460713158, |
| "grad_norm": 0.3102874159812927, |
| "learning_rate": 0.00017362360564753505, |
| "loss": 0.48892730474472046, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.2372988324392553, |
| "grad_norm": 0.3638162314891815, |
| "learning_rate": 0.00017334507793971592, |
| "loss": 0.6274378895759583, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.238561060271379, |
| "grad_norm": 0.280404657125473, |
| "learning_rate": 0.00017306531329586933, |
| "loss": 0.2670789361000061, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.23982328810350267, |
| "grad_norm": 0.3414492905139923, |
| "learning_rate": 0.00017278431643411642, |
| "loss": 0.854606568813324, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.24108551593562638, |
| "grad_norm": 0.339760959148407, |
| "learning_rate": 0.00017250209209335927, |
| "loss": 0.4224780797958374, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.2423477437677501, |
| "grad_norm": 0.3548067808151245, |
| "learning_rate": 0.00017221864503320092, |
| "loss": 0.6572182178497314, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.24360997159987377, |
| "grad_norm": 0.3619638681411743, |
| "learning_rate": 0.0001719339800338651, |
| "loss": 0.4573401212692261, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.24487219943199748, |
| "grad_norm": 0.36929795145988464, |
| "learning_rate": 0.0001716481018961156, |
| "loss": 0.6632043123245239, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.24613442726412119, |
| "grad_norm": 0.37808045744895935, |
| "learning_rate": 0.00017136101544117525, |
| "loss": 0.7357593178749084, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.24739665509624487, |
| "grad_norm": 0.38574209809303284, |
| "learning_rate": 0.00017107272551064473, |
| "loss": 0.4269335865974426, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.24865888292836857, |
| "grad_norm": 0.3391668200492859, |
| "learning_rate": 0.0001707832369664209, |
| "loss": 0.8197081685066223, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.24992111076049228, |
| "grad_norm": 0.40485379099845886, |
| "learning_rate": 0.00017049255469061474, |
| "loss": 0.7450565099716187, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.251183338592616, |
| "grad_norm": 0.37861743569374084, |
| "learning_rate": 0.00017020068358546898, |
| "loss": 0.5399523973464966, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.25244556642473964, |
| "grad_norm": 0.39403632283210754, |
| "learning_rate": 0.0001699076285732756, |
| "loss": 0.9128871560096741, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.25370779425686335, |
| "grad_norm": 0.40291762351989746, |
| "learning_rate": 0.0001696133945962927, |
| "loss": 0.8255231976509094, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.25497002208898706, |
| "grad_norm": 0.6885679364204407, |
| "learning_rate": 0.000169317986616661, |
| "loss": 0.40416646003723145, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.25623224992111077, |
| "grad_norm": 0.37489989399909973, |
| "learning_rate": 0.00016902140961632054, |
| "loss": 0.688234269618988, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.2574944777532345, |
| "grad_norm": 0.38479313254356384, |
| "learning_rate": 0.00016872366859692627, |
| "loss": 0.5331247448921204, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.2587567055853582, |
| "grad_norm": 0.40287116169929504, |
| "learning_rate": 0.00016842476857976396, |
| "loss": 0.7545835971832275, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.26001893341748183, |
| "grad_norm": 0.3530018627643585, |
| "learning_rate": 0.0001681247146056654, |
| "loss": 0.5984229445457458, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.26128116124960554, |
| "grad_norm": 0.34704816341400146, |
| "learning_rate": 0.00016782351173492342, |
| "loss": 0.867906391620636, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.26254338908172925, |
| "grad_norm": 0.3187376856803894, |
| "learning_rate": 0.00016752116504720644, |
| "loss": 0.3967270255088806, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.26380561691385296, |
| "grad_norm": 0.4047222435474396, |
| "learning_rate": 0.00016721767964147306, |
| "loss": 0.7225915193557739, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.26506784474597667, |
| "grad_norm": 0.3720124661922455, |
| "learning_rate": 0.00016691306063588583, |
| "loss": 0.414902001619339, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.2663300725781004, |
| "grad_norm": 0.27026864886283875, |
| "learning_rate": 0.00016660731316772505, |
| "loss": 0.2642422616481781, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.267592300410224, |
| "grad_norm": 0.28109508752822876, |
| "learning_rate": 0.00016630044239330204, |
| "loss": 0.3239024877548218, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.26885452824234773, |
| "grad_norm": 0.4051285982131958, |
| "learning_rate": 0.0001659924534878723, |
| "loss": 0.5133159160614014, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.27011675607447144, |
| "grad_norm": 0.389447420835495, |
| "learning_rate": 0.00016568335164554812, |
| "loss": 0.5882396101951599, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.27137898390659515, |
| "grad_norm": 0.4064750075340271, |
| "learning_rate": 0.00016537314207921115, |
| "loss": 0.8135666847229004, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.27264121173871886, |
| "grad_norm": 0.4201750159263611, |
| "learning_rate": 0.0001650618300204242, |
| "loss": 0.5702388286590576, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.2739034395708425, |
| "grad_norm": 0.39069369435310364, |
| "learning_rate": 0.00016474942071934337, |
| "loss": 0.5717343688011169, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.2751656674029662, |
| "grad_norm": 0.407742977142334, |
| "learning_rate": 0.00016443591944462915, |
| "loss": 0.7300087213516235, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.2764278952350899, |
| "grad_norm": 0.3515043258666992, |
| "learning_rate": 0.00016412133148335784, |
| "loss": 0.3343101143836975, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.27769012306721363, |
| "grad_norm": 0.391044557094574, |
| "learning_rate": 0.00016380566214093225, |
| "loss": 0.7425781488418579, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.27895235089933734, |
| "grad_norm": 0.4042036831378937, |
| "learning_rate": 0.0001634889167409923, |
| "loss": 0.7461481690406799, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.28021457873146105, |
| "grad_norm": 0.3601584732532501, |
| "learning_rate": 0.0001631711006253251, |
| "loss": 0.37352609634399414, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.2814768065635847, |
| "grad_norm": 0.37277212738990784, |
| "learning_rate": 0.00016285221915377508, |
| "loss": 0.39840951561927795, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.2827390343957084, |
| "grad_norm": 0.41219770908355713, |
| "learning_rate": 0.0001625322777041534, |
| "loss": 0.631761908531189, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.2840012622278321, |
| "grad_norm": 0.3973751962184906, |
| "learning_rate": 0.0001622112816721474, |
| "loss": 0.905396580696106, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.2852634900599558, |
| "grad_norm": 0.4199240505695343, |
| "learning_rate": 0.00016188923647122947, |
| "loss": 0.5509951710700989, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.28652571789207953, |
| "grad_norm": 0.3599737882614136, |
| "learning_rate": 0.0001615661475325658, |
| "loss": 0.6364030838012695, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.28778794572420324, |
| "grad_norm": 0.36739909648895264, |
| "learning_rate": 0.000161242020304925, |
| "loss": 0.6433310508728027, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.2890501735563269, |
| "grad_norm": 0.3900837004184723, |
| "learning_rate": 0.00016091686025458576, |
| "loss": 0.965069055557251, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.2903124013884506, |
| "grad_norm": 0.35347774624824524, |
| "learning_rate": 0.0001605906728652451, |
| "loss": 0.5886582136154175, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.2915746292205743, |
| "grad_norm": 0.4109002649784088, |
| "learning_rate": 0.00016026346363792567, |
| "loss": 0.5591490268707275, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.292836857052698, |
| "grad_norm": 0.3631947636604309, |
| "learning_rate": 0.0001599352380908829, |
| "loss": 0.544223427772522, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.2940990848848217, |
| "grad_norm": 0.3431711196899414, |
| "learning_rate": 0.00015960600175951223, |
| "loss": 0.4162474274635315, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.29536131271694543, |
| "grad_norm": 0.36346155405044556, |
| "learning_rate": 0.0001592757601962555, |
| "loss": 0.8591347932815552, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.2966235405490691, |
| "grad_norm": 0.33583030104637146, |
| "learning_rate": 0.00015894451897050738, |
| "loss": 0.4463670551776886, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.2978857683811928, |
| "grad_norm": 0.3296612799167633, |
| "learning_rate": 0.00015861228366852148, |
| "loss": 0.46573173999786377, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.2991479962133165, |
| "grad_norm": 0.3123343288898468, |
| "learning_rate": 0.0001582790598933161, |
| "loss": 0.3503931164741516, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.3004102240454402, |
| "grad_norm": 0.374508261680603, |
| "learning_rate": 0.0001579448532645798, |
| "loss": 0.5895912051200867, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.3016724518775639, |
| "grad_norm": 0.3595065176486969, |
| "learning_rate": 0.00015760966941857647, |
| "loss": 0.565118670463562, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.30293467970968757, |
| "grad_norm": 0.3403629660606384, |
| "learning_rate": 0.00015727351400805052, |
| "loss": 0.3920265734195709, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.3041969075418113, |
| "grad_norm": 0.3979881703853607, |
| "learning_rate": 0.00015693639270213136, |
| "loss": 0.8573540449142456, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.305459135373935, |
| "grad_norm": 0.39144444465637207, |
| "learning_rate": 0.0001565983111862378, |
| "loss": 0.6504969000816345, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.3067213632060587, |
| "grad_norm": 0.37401193380355835, |
| "learning_rate": 0.00015625927516198232, |
| "loss": 0.5543976426124573, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.3079835910381824, |
| "grad_norm": 0.37249916791915894, |
| "learning_rate": 0.0001559192903470747, |
| "loss": 0.781203031539917, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.3092458188703061, |
| "grad_norm": 0.36005863547325134, |
| "learning_rate": 0.00015557836247522575, |
| "loss": 0.4812963306903839, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.31050804670242976, |
| "grad_norm": 0.3561168611049652, |
| "learning_rate": 0.0001552364972960506, |
| "loss": 0.5244578719139099, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.31177027453455347, |
| "grad_norm": 0.3064718544483185, |
| "learning_rate": 0.00015489370057497165, |
| "loss": 0.35441693663597107, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.3130325023666772, |
| "grad_norm": 0.38345471024513245, |
| "learning_rate": 0.0001545499780931214, |
| "loss": 0.6824744343757629, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.3142947301988009, |
| "grad_norm": 0.36782291531562805, |
| "learning_rate": 0.00015420533564724495, |
| "loss": 0.41213345527648926, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.3155569580309246, |
| "grad_norm": 0.39493328332901, |
| "learning_rate": 0.00015385977904960226, |
| "loss": 0.5020935535430908, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.3168191858630483, |
| "grad_norm": 0.3497244715690613, |
| "learning_rate": 0.00015351331412787004, |
| "loss": 0.5641796588897705, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.31808141369517196, |
| "grad_norm": 0.3519827127456665, |
| "learning_rate": 0.0001531659467250436, |
| "loss": 0.8068366646766663, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.31934364152729566, |
| "grad_norm": 0.3616220951080322, |
| "learning_rate": 0.0001528176826993382, |
| "loss": 0.8782303929328918, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.32060586935941937, |
| "grad_norm": 0.4184557795524597, |
| "learning_rate": 0.00015246852792409033, |
| "loss": 0.7177759408950806, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.3218680971915431, |
| "grad_norm": 0.4233710765838623, |
| "learning_rate": 0.0001521184882876585, |
| "loss": 0.7468725442886353, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.3231303250236668, |
| "grad_norm": 0.358642578125, |
| "learning_rate": 0.00015176756969332425, |
| "loss": 0.4827675223350525, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.3243925528557905, |
| "grad_norm": 0.33649536967277527, |
| "learning_rate": 0.00015141577805919226, |
| "loss": 0.3861742317676544, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.32565478068791415, |
| "grad_norm": 0.3700178861618042, |
| "learning_rate": 0.0001510631193180907, |
| "loss": 0.7173401713371277, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.32691700852003786, |
| "grad_norm": 0.3805610239505768, |
| "learning_rate": 0.00015070959941747124, |
| "loss": 0.8101674318313599, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.32817923635216156, |
| "grad_norm": 0.38329991698265076, |
| "learning_rate": 0.00015035522431930856, |
| "loss": 0.8402124643325806, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.32944146418428527, |
| "grad_norm": 0.361529678106308, |
| "learning_rate": 0.00015000000000000001, |
| "loss": 0.6627713441848755, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.330703692016409, |
| "grad_norm": 0.3611642122268677, |
| "learning_rate": 0.00014964393245026466, |
| "loss": 0.3878118693828583, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.3319659198485327, |
| "grad_norm": 0.41715049743652344, |
| "learning_rate": 0.00014928702767504233, |
| "loss": 0.5380449295043945, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.33322814768065634, |
| "grad_norm": 0.39908990263938904, |
| "learning_rate": 0.00014892929169339235, |
| "loss": 0.5558310151100159, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.33449037551278005, |
| "grad_norm": 0.39582890272140503, |
| "learning_rate": 0.00014857073053839206, |
| "loss": 0.7881603837013245, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.33575260334490376, |
| "grad_norm": 0.3694429397583008, |
| "learning_rate": 0.0001482113502570349, |
| "loss": 0.6454510688781738, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.33701483117702746, |
| "grad_norm": 0.25048568844795227, |
| "learning_rate": 0.00014785115691012864, |
| "loss": 0.23232965171337128, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.33827705900915117, |
| "grad_norm": 0.34138715267181396, |
| "learning_rate": 0.00014749015657219313, |
| "loss": 0.4494091868400574, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.3395392868412748, |
| "grad_norm": 0.34587278962135315, |
| "learning_rate": 0.00014712835533135774, |
| "loss": 0.6932641863822937, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.34080151467339853, |
| "grad_norm": 0.39235740900039673, |
| "learning_rate": 0.00014676575928925867, |
| "loss": 0.6115721464157104, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.34206374250552224, |
| "grad_norm": 0.372470498085022, |
| "learning_rate": 0.00014640237456093634, |
| "loss": 0.5936945676803589, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.34332597033764595, |
| "grad_norm": 0.3751293122768402, |
| "learning_rate": 0.0001460382072747319, |
| "loss": 0.6361874341964722, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.34458819816976965, |
| "grad_norm": 0.3495366871356964, |
| "learning_rate": 0.00014567326357218407, |
| "loss": 0.27429258823394775, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.34585042600189336, |
| "grad_norm": 0.40388405323028564, |
| "learning_rate": 0.00014530754960792553, |
| "loss": 0.46181124448776245, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.347112653834017, |
| "grad_norm": 0.319353312253952, |
| "learning_rate": 0.0001449410715495791, |
| "loss": 0.3895929455757141, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.3483748816661407, |
| "grad_norm": 0.3918631970882416, |
| "learning_rate": 0.00014457383557765386, |
| "loss": 0.7136199474334717, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.34963710949826443, |
| "grad_norm": 0.36512160301208496, |
| "learning_rate": 0.00014420584788544057, |
| "loss": 0.6242626905441284, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.35089933733038814, |
| "grad_norm": 0.4133952558040619, |
| "learning_rate": 0.00014383711467890774, |
| "loss": 0.5601866245269775, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.35216156516251185, |
| "grad_norm": 0.4711982011795044, |
| "learning_rate": 0.00014346764217659653, |
| "loss": 0.3125555217266083, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.35342379299463555, |
| "grad_norm": 0.3581778109073639, |
| "learning_rate": 0.00014309743660951595, |
| "loss": 0.715130090713501, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.3546860208267592, |
| "grad_norm": 0.34894779324531555, |
| "learning_rate": 0.0001427265042210381, |
| "loss": 0.5023713111877441, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.3559482486588829, |
| "grad_norm": 0.3577764332294464, |
| "learning_rate": 0.00014235485126679243, |
| "loss": 0.6359988451004028, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.3572104764910066, |
| "grad_norm": 0.44540712237358093, |
| "learning_rate": 0.00014198248401456055, |
| "loss": 0.8171525597572327, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.35847270432313033, |
| "grad_norm": 0.3892884850502014, |
| "learning_rate": 0.0001416094087441704, |
| "loss": 0.5745326280593872, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.35973493215525404, |
| "grad_norm": 0.36921554803848267, |
| "learning_rate": 0.00014123563174739037, |
| "loss": 0.4776252210140228, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.36099715998737775, |
| "grad_norm": 0.38392379879951477, |
| "learning_rate": 0.00014086115932782314, |
| "loss": 0.5178923606872559, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.3622593878195014, |
| "grad_norm": 0.2495623081922531, |
| "learning_rate": 0.00014048599780079957, |
| "loss": 0.25248217582702637, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.3635216156516251, |
| "grad_norm": 0.4058895409107208, |
| "learning_rate": 0.00014011015349327187, |
| "loss": 0.6448837518692017, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.3647838434837488, |
| "grad_norm": 0.38654524087905884, |
| "learning_rate": 0.00013973363274370721, |
| "loss": 0.5187302827835083, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.3660460713158725, |
| "grad_norm": 0.3716411292552948, |
| "learning_rate": 0.0001393564419019806, |
| "loss": 0.7247863411903381, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.36730829914799623, |
| "grad_norm": 0.36923542618751526, |
| "learning_rate": 0.00013897858732926793, |
| "loss": 0.44380512833595276, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.36857052698011994, |
| "grad_norm": 0.38871094584465027, |
| "learning_rate": 0.00013860007539793871, |
| "loss": 0.8842666149139404, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.3698327548122436, |
| "grad_norm": 0.35937783122062683, |
| "learning_rate": 0.00013822091249144838, |
| "loss": 0.489496111869812, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.3710949826443673, |
| "grad_norm": 0.3654249310493469, |
| "learning_rate": 0.00013784110500423104, |
| "loss": 0.5621508955955505, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.372357210476491, |
| "grad_norm": 0.4184640049934387, |
| "learning_rate": 0.00013746065934159123, |
| "loss": 0.4694799780845642, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.3736194383086147, |
| "grad_norm": 0.40087419748306274, |
| "learning_rate": 0.00013707958191959608, |
| "loss": 0.7347521781921387, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.3748816661407384, |
| "grad_norm": 0.43245846033096313, |
| "learning_rate": 0.00013669787916496722, |
| "loss": 0.6806380152702332, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.3761438939728621, |
| "grad_norm": 0.36302655935287476, |
| "learning_rate": 0.00013631555751497215, |
| "loss": 0.8191426992416382, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.3774061218049858, |
| "grad_norm": 0.3232358396053314, |
| "learning_rate": 0.00013593262341731578, |
| "loss": 0.3671002984046936, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.3786683496371095, |
| "grad_norm": 0.3223403990268707, |
| "learning_rate": 0.0001355490833300318, |
| "loss": 0.3676319718360901, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.3799305774692332, |
| "grad_norm": 0.3848235309123993, |
| "learning_rate": 0.00013516494372137368, |
| "loss": 0.7041884660720825, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.3811928053013569, |
| "grad_norm": 0.39564049243927, |
| "learning_rate": 0.0001347802110697055, |
| "loss": 0.7267032861709595, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.3824550331334806, |
| "grad_norm": 0.3752077519893646, |
| "learning_rate": 0.00013439489186339282, |
| "loss": 0.44746118783950806, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.38371726096560427, |
| "grad_norm": 0.3596220016479492, |
| "learning_rate": 0.00013400899260069323, |
| "loss": 0.42425066232681274, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.384979488797728, |
| "grad_norm": 0.36152541637420654, |
| "learning_rate": 0.00013362251978964675, |
| "loss": 0.457078754901886, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.3862417166298517, |
| "grad_norm": 0.3770156502723694, |
| "learning_rate": 0.00013323547994796597, |
| "loss": 0.5810063481330872, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.3875039444619754, |
| "grad_norm": 0.42228955030441284, |
| "learning_rate": 0.0001328478796029264, |
| "loss": 0.8851193189620972, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.3887661722940991, |
| "grad_norm": 0.4153822660446167, |
| "learning_rate": 0.00013245972529125606, |
| "loss": 0.6357755661010742, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.3900284001262228, |
| "grad_norm": 0.3957383930683136, |
| "learning_rate": 0.00013207102355902552, |
| "loss": 0.7041004300117493, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.39129062795834646, |
| "grad_norm": 0.37788495421409607, |
| "learning_rate": 0.0001316817809615373, |
| "loss": 0.5084975361824036, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.39255285579047017, |
| "grad_norm": 0.3773125410079956, |
| "learning_rate": 0.00013129200406321545, |
| "loss": 0.7748256325721741, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.3938150836225939, |
| "grad_norm": 0.36805328726768494, |
| "learning_rate": 0.00013090169943749476, |
| "loss": 0.5911955833435059, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.3950773114547176, |
| "grad_norm": 0.4318149983882904, |
| "learning_rate": 0.00013051087366670994, |
| "loss": 0.6285633444786072, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.3963395392868413, |
| "grad_norm": 0.27865713834762573, |
| "learning_rate": 0.00013011953334198466, |
| "loss": 0.2808951139450073, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.397601767118965, |
| "grad_norm": 0.38748934864997864, |
| "learning_rate": 0.00012972768506312027, |
| "loss": 0.7810741662979126, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.39886399495108865, |
| "grad_norm": 0.39623865485191345, |
| "learning_rate": 0.00012933533543848461, |
| "loss": 0.8346691727638245, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.40012622278321236, |
| "grad_norm": 0.3087095022201538, |
| "learning_rate": 0.0001289424910849005, |
| "loss": 0.35411983728408813, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.40138845061533607, |
| "grad_norm": 0.37265872955322266, |
| "learning_rate": 0.00012854915862753422, |
| "loss": 0.7961377501487732, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.4026506784474598, |
| "grad_norm": 0.3931768536567688, |
| "learning_rate": 0.00012815534469978363, |
| "loss": 0.5816214084625244, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.4039129062795835, |
| "grad_norm": 0.35481584072113037, |
| "learning_rate": 0.00012776105594316647, |
| "loss": 0.7527205944061279, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.40517513411170714, |
| "grad_norm": 0.3482368290424347, |
| "learning_rate": 0.0001273662990072083, |
| "loss": 0.4816396236419678, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.40643736194383084, |
| "grad_norm": 0.35917821526527405, |
| "learning_rate": 0.00012697108054933025, |
| "loss": 0.358943372964859, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.40769958977595455, |
| "grad_norm": 0.35279327630996704, |
| "learning_rate": 0.000126575407234737, |
| "loss": 0.6909571290016174, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.40896181760807826, |
| "grad_norm": 0.3735545575618744, |
| "learning_rate": 0.00012617928573630406, |
| "loss": 0.7668647170066833, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.41022404544020197, |
| "grad_norm": 0.3791963458061218, |
| "learning_rate": 0.00012578272273446536, |
| "loss": 0.4582277238368988, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.4114862732723257, |
| "grad_norm": 0.3846660852432251, |
| "learning_rate": 0.0001253857249171008, |
| "loss": 0.5816541910171509, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.41274850110444933, |
| "grad_norm": 0.2960149049758911, |
| "learning_rate": 0.0001249882989794231, |
| "loss": 0.33520427346229553, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.41401072893657304, |
| "grad_norm": 0.5094306468963623, |
| "learning_rate": 0.00012459045162386512, |
| "loss": 0.901237964630127, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.41527295676869674, |
| "grad_norm": 0.4056321680545807, |
| "learning_rate": 0.00012419218955996676, |
| "loss": 0.37850597500801086, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.41653518460082045, |
| "grad_norm": 0.4399261772632599, |
| "learning_rate": 0.00012379351950426187, |
| "loss": 0.7433345913887024, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.41779741243294416, |
| "grad_norm": 0.38947823643684387, |
| "learning_rate": 0.0001233944481801649, |
| "loss": 0.7301508784294128, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.41905964026506787, |
| "grad_norm": 0.4117131531238556, |
| "learning_rate": 0.00012299498231785737, |
| "loss": 0.5769900679588318, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.4203218680971915, |
| "grad_norm": 0.3559359312057495, |
| "learning_rate": 0.00012259512865417477, |
| "loss": 0.5584972500801086, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.42158409592931523, |
| "grad_norm": 0.4073047637939453, |
| "learning_rate": 0.00012219489393249262, |
| "loss": 0.4495258927345276, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.42284632376143894, |
| "grad_norm": 0.36505264043807983, |
| "learning_rate": 0.00012179428490261278, |
| "loss": 0.749606192111969, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.42410855159356264, |
| "grad_norm": 0.3678975999355316, |
| "learning_rate": 0.00012139330832064974, |
| "loss": 0.32790112495422363, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.42537077942568635, |
| "grad_norm": 0.37156620621681213, |
| "learning_rate": 0.00012099197094891659, |
| "loss": 0.43149426579475403, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.42663300725781006, |
| "grad_norm": 0.3237273395061493, |
| "learning_rate": 0.00012059027955581099, |
| "loss": 0.3703850209712982, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.4278952350899337, |
| "grad_norm": 0.3485283851623535, |
| "learning_rate": 0.00012018824091570103, |
| "loss": 0.569449782371521, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.4291574629220574, |
| "grad_norm": 0.378540962934494, |
| "learning_rate": 0.00011978586180881099, |
| "loss": 0.48175811767578125, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.43041969075418113, |
| "grad_norm": 0.3947147727012634, |
| "learning_rate": 0.00011938314902110701, |
| "loss": 0.4960615634918213, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.43168191858630484, |
| "grad_norm": 0.34757497906684875, |
| "learning_rate": 0.0001189801093441826, |
| "loss": 0.34023621678352356, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.43294414641842854, |
| "grad_norm": 0.3692375719547272, |
| "learning_rate": 0.00011857674957514411, |
| "loss": 0.760047197341919, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.43420637425055225, |
| "grad_norm": 0.38019847869873047, |
| "learning_rate": 0.00011817307651649616, |
| "loss": 0.8378443717956543, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.4354686020826759, |
| "grad_norm": 0.3751029074192047, |
| "learning_rate": 0.00011776909697602689, |
| "loss": 0.4766428470611572, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.4367308299147996, |
| "grad_norm": 0.5471876263618469, |
| "learning_rate": 0.00011736481776669306, |
| "loss": 0.41353490948677063, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.4379930577469233, |
| "grad_norm": 0.3773936629295349, |
| "learning_rate": 0.00011696024570650528, |
| "loss": 0.5652437210083008, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.43925528557904703, |
| "grad_norm": 0.3828847110271454, |
| "learning_rate": 0.000116555387618413, |
| "loss": 0.6103649139404297, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.44051751341117074, |
| "grad_norm": 0.35921478271484375, |
| "learning_rate": 0.00011615025033018936, |
| "loss": 0.609113872051239, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.4417797412432944, |
| "grad_norm": 0.3687792420387268, |
| "learning_rate": 0.00011574484067431617, |
| "loss": 0.8462064266204834, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.4430419690754181, |
| "grad_norm": 0.3686203956604004, |
| "learning_rate": 0.00011533916548786857, |
| "loss": 0.656709611415863, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.4443041969075418, |
| "grad_norm": 0.39589008688926697, |
| "learning_rate": 0.0001149332316123997, |
| "loss": 0.7393782734870911, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.4455664247396655, |
| "grad_norm": 0.38354629278182983, |
| "learning_rate": 0.0001145270458938255, |
| "loss": 0.6119332909584045, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.4468286525717892, |
| "grad_norm": 0.3615580201148987, |
| "learning_rate": 0.00011412061518230914, |
| "loss": 0.5982248783111572, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.44809088040391293, |
| "grad_norm": 0.35184618830680847, |
| "learning_rate": 0.00011371394633214547, |
| "loss": 0.7312008142471313, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.4493531082360366, |
| "grad_norm": 0.37319618463516235, |
| "learning_rate": 0.00011330704620164538, |
| "loss": 0.4518621265888214, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.4506153360681603, |
| "grad_norm": 0.38271263241767883, |
| "learning_rate": 0.00011289992165302035, |
| "loss": 0.684691309928894, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.451877563900284, |
| "grad_norm": 0.3614532947540283, |
| "learning_rate": 0.00011249257955226648, |
| "loss": 0.7593181729316711, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.4531397917324077, |
| "grad_norm": 0.42146942019462585, |
| "learning_rate": 0.00011208502676904886, |
| "loss": 0.6286287307739258, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.4544020195645314, |
| "grad_norm": 0.36411377787590027, |
| "learning_rate": 0.00011167727017658562, |
| "loss": 0.7084791660308838, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.4556642473966551, |
| "grad_norm": 0.3926357328891754, |
| "learning_rate": 0.00011126931665153212, |
| "loss": 0.7415444254875183, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.4569264752287788, |
| "grad_norm": 0.3722608685493469, |
| "learning_rate": 0.0001108611730738648, |
| "loss": 0.5457031726837158, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.4581887030609025, |
| "grad_norm": 0.34348252415657043, |
| "learning_rate": 0.00011045284632676536, |
| "loss": 0.3467724919319153, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.4594509308930262, |
| "grad_norm": 0.38620299100875854, |
| "learning_rate": 0.00011004434329650452, |
| "loss": 0.6784603595733643, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.4607131587251499, |
| "grad_norm": 0.412806898355484, |
| "learning_rate": 0.000109635670872326, |
| "loss": 0.541936993598938, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.4619753865572736, |
| "grad_norm": 0.37946563959121704, |
| "learning_rate": 0.00010922683594633021, |
| "loss": 0.7005019187927246, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.4632376143893973, |
| "grad_norm": 0.36721378564834595, |
| "learning_rate": 0.00010881784541335817, |
| "loss": 0.5035321712493896, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.46449984222152096, |
| "grad_norm": 0.41076555848121643, |
| "learning_rate": 0.00010840870617087514, |
| "loss": 0.7746437191963196, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.4657620700536447, |
| "grad_norm": 0.3742596209049225, |
| "learning_rate": 0.00010799942511885418, |
| "loss": 0.5171118974685669, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.4670242978857684, |
| "grad_norm": 0.3880580961704254, |
| "learning_rate": 0.00010759000915966011, |
| "loss": 0.7049781680107117, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.4682865257178921, |
| "grad_norm": 0.3612365424633026, |
| "learning_rate": 0.00010718046519793276, |
| "loss": 0.43177270889282227, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.4695487535500158, |
| "grad_norm": 0.4223220944404602, |
| "learning_rate": 0.00010677080014047076, |
| "loss": 0.6074368357658386, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.47081098138213945, |
| "grad_norm": 0.3780396282672882, |
| "learning_rate": 0.00010636102089611491, |
| "loss": 0.5008561015129089, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.47207320921426316, |
| "grad_norm": 0.3705812096595764, |
| "learning_rate": 0.00010595113437563176, |
| "loss": 0.6822476983070374, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.47333543704638686, |
| "grad_norm": 0.4130505919456482, |
| "learning_rate": 0.000105541147491597, |
| "loss": 0.5583031177520752, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.4745976648785106, |
| "grad_norm": 0.3589628040790558, |
| "learning_rate": 0.00010513106715827896, |
| "loss": 0.801206111907959, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.4758598927106343, |
| "grad_norm": 0.3859142065048218, |
| "learning_rate": 0.00010472090029152196, |
| "loss": 0.5001563429832458, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.477122120542758, |
| "grad_norm": 0.5252732038497925, |
| "learning_rate": 0.00010431065380862959, |
| "loss": 0.6630918383598328, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.47838434837488164, |
| "grad_norm": 0.37909185886383057, |
| "learning_rate": 0.00010390033462824817, |
| "loss": 0.7034825682640076, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.47964657620700535, |
| "grad_norm": 0.3590451776981354, |
| "learning_rate": 0.00010348994967025012, |
| "loss": 0.36768239736557007, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.48090880403912906, |
| "grad_norm": 0.3347563147544861, |
| "learning_rate": 0.00010307950585561706, |
| "loss": 0.35689371824264526, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.48217103187125276, |
| "grad_norm": 0.3807820975780487, |
| "learning_rate": 0.00010266901010632324, |
| "loss": 0.4797685742378235, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.48343325970337647, |
| "grad_norm": 0.35765600204467773, |
| "learning_rate": 0.00010225846934521881, |
| "loss": 0.5064284205436707, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.4846954875355002, |
| "grad_norm": 0.39294371008872986, |
| "learning_rate": 0.00010184789049591299, |
| "loss": 0.6024259924888611, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.48595771536762383, |
| "grad_norm": 0.3386979401111603, |
| "learning_rate": 0.00010143728048265735, |
| "loss": 0.4336264133453369, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.48721994319974754, |
| "grad_norm": 0.38877370953559875, |
| "learning_rate": 0.00010102664623022899, |
| "loss": 0.5891298055648804, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.48848217103187125, |
| "grad_norm": 0.3828097879886627, |
| "learning_rate": 0.00010061599466381389, |
| "loss": 0.608544111251831, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.48974439886399496, |
| "grad_norm": 0.3743601441383362, |
| "learning_rate": 0.0001002053327088899, |
| "loss": 0.6880306601524353, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.49100662669611866, |
| "grad_norm": 0.39663559198379517, |
| "learning_rate": 9.979466729111013e-05, |
| "loss": 0.587350070476532, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.49226885452824237, |
| "grad_norm": 0.4369630813598633, |
| "learning_rate": 9.938400533618615e-05, |
| "loss": 0.6706233024597168, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.493531082360366, |
| "grad_norm": 0.41926079988479614, |
| "learning_rate": 9.897335376977102e-05, |
| "loss": 0.6896798610687256, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.49479331019248973, |
| "grad_norm": 0.4132974147796631, |
| "learning_rate": 9.856271951734268e-05, |
| "loss": 0.49843940138816833, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.49605553802461344, |
| "grad_norm": 0.2707560956478119, |
| "learning_rate": 9.815210950408704e-05, |
| "loss": 0.2632002830505371, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.49731776585673715, |
| "grad_norm": 0.38526275753974915, |
| "learning_rate": 9.774153065478121e-05, |
| "loss": 0.40896376967430115, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.49857999368886086, |
| "grad_norm": 0.38434556126594543, |
| "learning_rate": 9.733098989367677e-05, |
| "loss": 0.5658249855041504, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.49984222152098456, |
| "grad_norm": 0.37741097807884216, |
| "learning_rate": 9.692049414438299e-05, |
| "loss": 0.6638325452804565, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.5011044493531083, |
| "grad_norm": 0.38284313678741455, |
| "learning_rate": 9.651005032974994e-05, |
| "loss": 0.822309672832489, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.502366677185232, |
| "grad_norm": 0.39180007576942444, |
| "learning_rate": 9.609966537175185e-05, |
| "loss": 0.6988601684570312, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.5036289050173557, |
| "grad_norm": 0.37315770983695984, |
| "learning_rate": 9.568934619137046e-05, |
| "loss": 0.3722432851791382, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.5048911328494793, |
| "grad_norm": 0.3731346130371094, |
| "learning_rate": 9.52790997084781e-05, |
| "loss": 0.6665936708450317, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.506153360681603, |
| "grad_norm": 0.39265018701553345, |
| "learning_rate": 9.486893284172102e-05, |
| "loss": 0.4295370578765869, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.5074155885137267, |
| "grad_norm": 0.22621490061283112, |
| "learning_rate": 9.4458852508403e-05, |
| "loss": 0.1555391401052475, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.5086778163458504, |
| "grad_norm": 0.39791470766067505, |
| "learning_rate": 9.404886562436825e-05, |
| "loss": 0.7941228151321411, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.5099400441779741, |
| "grad_norm": 0.39022767543792725, |
| "learning_rate": 9.36389791038851e-05, |
| "loss": 0.6743201613426208, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.5112022720100978, |
| "grad_norm": 0.3959182798862457, |
| "learning_rate": 9.322919985952926e-05, |
| "loss": 0.6928982138633728, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.5124644998422215, |
| "grad_norm": 0.35128676891326904, |
| "learning_rate": 9.281953480206725e-05, |
| "loss": 0.4283405840396881, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.5137267276743452, |
| "grad_norm": 0.38393881916999817, |
| "learning_rate": 9.240999084033991e-05, |
| "loss": 0.48866939544677734, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.514988955506469, |
| "grad_norm": 0.3746855556964874, |
| "learning_rate": 9.200057488114585e-05, |
| "loss": 0.7293848395347595, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.5162511833385927, |
| "grad_norm": 0.3574482500553131, |
| "learning_rate": 9.15912938291249e-05, |
| "loss": 0.7160978317260742, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.5175134111707164, |
| "grad_norm": 0.31795260310173035, |
| "learning_rate": 9.118215458664185e-05, |
| "loss": 0.3059941828250885, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.51877563900284, |
| "grad_norm": 0.37041789293289185, |
| "learning_rate": 9.077316405366981e-05, |
| "loss": 0.40029266476631165, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.5200378668349637, |
| "grad_norm": 0.3135358989238739, |
| "learning_rate": 9.036432912767403e-05, |
| "loss": 0.34788432717323303, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.5213000946670874, |
| "grad_norm": 0.3632740080356598, |
| "learning_rate": 8.99556567034955e-05, |
| "loss": 0.47788649797439575, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.5225623224992111, |
| "grad_norm": 0.39943233132362366, |
| "learning_rate": 8.954715367323468e-05, |
| "loss": 0.7340242862701416, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.5238245503313348, |
| "grad_norm": 0.35586607456207275, |
| "learning_rate": 8.91388269261352e-05, |
| "loss": 0.416128933429718, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.5250867781634585, |
| "grad_norm": 0.38117703795433044, |
| "learning_rate": 8.87306833484679e-05, |
| "loss": 0.5627406239509583, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.5263490059955822, |
| "grad_norm": 0.4389495253562927, |
| "learning_rate": 8.832272982341439e-05, |
| "loss": 0.41440343856811523, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.5276112338277059, |
| "grad_norm": 0.4085499942302704, |
| "learning_rate": 8.791497323095116e-05, |
| "loss": 0.48129522800445557, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.5288734616598296, |
| "grad_norm": 0.4046858549118042, |
| "learning_rate": 8.750742044773354e-05, |
| "loss": 0.6476734280586243, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.5301356894919533, |
| "grad_norm": 0.4076245427131653, |
| "learning_rate": 8.710007834697969e-05, |
| "loss": 0.6386293768882751, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.531397917324077, |
| "grad_norm": 0.4085608124732971, |
| "learning_rate": 8.669295379835467e-05, |
| "loss": 0.6650468707084656, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.5326601451562007, |
| "grad_norm": 0.4489421844482422, |
| "learning_rate": 8.628605366785458e-05, |
| "loss": 0.5000302195549011, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.5339223729883243, |
| "grad_norm": 0.3692164123058319, |
| "learning_rate": 8.587938481769089e-05, |
| "loss": 0.6816071271896362, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.535184600820448, |
| "grad_norm": 0.40202704071998596, |
| "learning_rate": 8.547295410617453e-05, |
| "loss": 0.7187950611114502, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.5364468286525718, |
| "grad_norm": 0.3954196870326996, |
| "learning_rate": 8.506676838760032e-05, |
| "loss": 0.47280117869377136, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.5377090564846955, |
| "grad_norm": 0.4074536859989166, |
| "learning_rate": 8.466083451213144e-05, |
| "loss": 0.5304967761039734, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.5389712843168192, |
| "grad_norm": 0.4292575418949127, |
| "learning_rate": 8.425515932568382e-05, |
| "loss": 0.5013709664344788, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.5402335121489429, |
| "grad_norm": 0.3722835183143616, |
| "learning_rate": 8.384974966981063e-05, |
| "loss": 0.5023803114891052, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.5414957399810666, |
| "grad_norm": 0.39425259828567505, |
| "learning_rate": 8.344461238158699e-05, |
| "loss": 0.5070059299468994, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.5427579678131903, |
| "grad_norm": 0.3532828688621521, |
| "learning_rate": 8.303975429349473e-05, |
| "loss": 0.4102450907230377, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.544020195645314, |
| "grad_norm": 0.41622671484947205, |
| "learning_rate": 8.263518223330697e-05, |
| "loss": 0.7629631757736206, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.5452824234774377, |
| "grad_norm": 0.410709947347641, |
| "learning_rate": 8.223090302397313e-05, |
| "loss": 0.7080658078193665, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.5465446513095614, |
| "grad_norm": 0.3647861182689667, |
| "learning_rate": 8.182692348350385e-05, |
| "loss": 0.48096179962158203, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.547806879141685, |
| "grad_norm": 0.39459702372550964, |
| "learning_rate": 8.142325042485592e-05, |
| "loss": 0.8301153779029846, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.5490691069738087, |
| "grad_norm": 0.3667653799057007, |
| "learning_rate": 8.101989065581743e-05, |
| "loss": 0.44432565569877625, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.5503313348059324, |
| "grad_norm": 0.4047844707965851, |
| "learning_rate": 8.0616850978893e-05, |
| "loss": 0.5940053462982178, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.5515935626380561, |
| "grad_norm": 0.4128320515155792, |
| "learning_rate": 8.021413819118903e-05, |
| "loss": 0.512177050113678, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.5528557904701799, |
| "grad_norm": 0.37576359510421753, |
| "learning_rate": 7.9811759084299e-05, |
| "loss": 0.5231778025627136, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.5541180183023036, |
| "grad_norm": 0.3246806263923645, |
| "learning_rate": 7.940972044418902e-05, |
| "loss": 0.31796854734420776, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.5553802461344273, |
| "grad_norm": 0.35433802008628845, |
| "learning_rate": 7.900802905108342e-05, |
| "loss": 0.42495012283325195, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.556642473966551, |
| "grad_norm": 0.4064764380455017, |
| "learning_rate": 7.860669167935028e-05, |
| "loss": 0.6670479774475098, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.5579047017986747, |
| "grad_norm": 0.3848694860935211, |
| "learning_rate": 7.820571509738723e-05, |
| "loss": 0.9129263162612915, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.5591669296307984, |
| "grad_norm": 0.33378908038139343, |
| "learning_rate": 7.780510606750742e-05, |
| "loss": 0.3959806561470032, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.5604291574629221, |
| "grad_norm": 0.4084720313549042, |
| "learning_rate": 7.740487134582525e-05, |
| "loss": 0.5052785873413086, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.5616913852950458, |
| "grad_norm": 0.4099523425102234, |
| "learning_rate": 7.700501768214267e-05, |
| "loss": 0.6453187465667725, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.5629536131271694, |
| "grad_norm": 0.3560808002948761, |
| "learning_rate": 7.660555181983518e-05, |
| "loss": 0.4158024787902832, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.5642158409592931, |
| "grad_norm": 0.39216476678848267, |
| "learning_rate": 7.620648049573815e-05, |
| "loss": 0.5767735242843628, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.5654780687914168, |
| "grad_norm": 0.3903045356273651, |
| "learning_rate": 7.580781044003324e-05, |
| "loss": 0.44133317470550537, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.5667402966235405, |
| "grad_norm": 0.37804114818573, |
| "learning_rate": 7.540954837613488e-05, |
| "loss": 0.3772793710231781, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.5680025244556642, |
| "grad_norm": 0.40392929315567017, |
| "learning_rate": 7.50117010205769e-05, |
| "loss": 0.6205388307571411, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.569264752287788, |
| "grad_norm": 0.414870023727417, |
| "learning_rate": 7.461427508289922e-05, |
| "loss": 0.58516925573349, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.5705269801199117, |
| "grad_norm": 0.3570805490016937, |
| "learning_rate": 7.421727726553463e-05, |
| "loss": 0.4138091802597046, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.5717892079520354, |
| "grad_norm": 0.3515688478946686, |
| "learning_rate": 7.382071426369597e-05, |
| "loss": 0.3913613557815552, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.5730514357841591, |
| "grad_norm": 0.3770284056663513, |
| "learning_rate": 7.342459276526302e-05, |
| "loss": 0.6880075335502625, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.5743136636162828, |
| "grad_norm": 0.3983762264251709, |
| "learning_rate": 7.302891945066974e-05, |
| "loss": 0.6962027549743652, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.5755758914484065, |
| "grad_norm": 0.3529524505138397, |
| "learning_rate": 7.263370099279172e-05, |
| "loss": 0.4161332845687866, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.5768381192805301, |
| "grad_norm": 0.3377407193183899, |
| "learning_rate": 7.223894405683354e-05, |
| "loss": 0.39849692583084106, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.5781003471126538, |
| "grad_norm": 0.4013289511203766, |
| "learning_rate": 7.18446553002164e-05, |
| "loss": 0.5468084812164307, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.5793625749447775, |
| "grad_norm": 0.39508214592933655, |
| "learning_rate": 7.14508413724658e-05, |
| "loss": 0.8175787329673767, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.5806248027769012, |
| "grad_norm": 0.4191129803657532, |
| "learning_rate": 7.10575089150995e-05, |
| "loss": 0.5919452905654907, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.5818870306090249, |
| "grad_norm": 0.40128064155578613, |
| "learning_rate": 7.066466456151541e-05, |
| "loss": 0.8323053121566772, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.5831492584411486, |
| "grad_norm": 0.3903089761734009, |
| "learning_rate": 7.027231493687974e-05, |
| "loss": 0.4888315796852112, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.5844114862732723, |
| "grad_norm": 0.3628254532814026, |
| "learning_rate": 6.988046665801536e-05, |
| "loss": 0.33037495613098145, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.585673714105396, |
| "grad_norm": 0.3754008710384369, |
| "learning_rate": 6.948912633329007e-05, |
| "loss": 0.5007816553115845, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.5869359419375197, |
| "grad_norm": 0.376667320728302, |
| "learning_rate": 6.909830056250527e-05, |
| "loss": 0.757786750793457, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.5881981697696435, |
| "grad_norm": 0.29717469215393066, |
| "learning_rate": 6.870799593678459e-05, |
| "loss": 0.2943430244922638, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.5894603976017672, |
| "grad_norm": 0.38486912846565247, |
| "learning_rate": 6.831821903846273e-05, |
| "loss": 0.44896000623703003, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.5907226254338909, |
| "grad_norm": 0.34192511439323425, |
| "learning_rate": 6.792897644097451e-05, |
| "loss": 0.29370012879371643, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.5919848532660145, |
| "grad_norm": 0.4050130248069763, |
| "learning_rate": 6.754027470874396e-05, |
| "loss": 0.6608400344848633, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.5932470810981382, |
| "grad_norm": 0.3004320561885834, |
| "learning_rate": 6.715212039707364e-05, |
| "loss": 0.23013579845428467, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.5945093089302619, |
| "grad_norm": 0.36933329701423645, |
| "learning_rate": 6.676452005203406e-05, |
| "loss": 0.6952561140060425, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.5957715367623856, |
| "grad_norm": 0.42043766379356384, |
| "learning_rate": 6.63774802103533e-05, |
| "loss": 0.7303497195243835, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.5970337645945093, |
| "grad_norm": 0.3762672543525696, |
| "learning_rate": 6.599100739930677e-05, |
| "loss": 0.7378503084182739, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.598295992426633, |
| "grad_norm": 0.36484387516975403, |
| "learning_rate": 6.560510813660719e-05, |
| "loss": 0.4264744818210602, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.5995582202587567, |
| "grad_norm": 0.4137173295021057, |
| "learning_rate": 6.521978893029452e-05, |
| "loss": 0.6754275560379028, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.6008204480908804, |
| "grad_norm": 0.4293482303619385, |
| "learning_rate": 6.483505627862632e-05, |
| "loss": 0.7817292809486389, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.6020826759230041, |
| "grad_norm": 0.4162338376045227, |
| "learning_rate": 6.44509166699682e-05, |
| "loss": 0.6910249590873718, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.6033449037551278, |
| "grad_norm": 0.4081710875034332, |
| "learning_rate": 6.406737658268425e-05, |
| "loss": 0.68759685754776, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.6046071315872515, |
| "grad_norm": 0.37592121958732605, |
| "learning_rate": 6.368444248502789e-05, |
| "loss": 0.6178593635559082, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.6058693594193751, |
| "grad_norm": 0.43066924810409546, |
| "learning_rate": 6.33021208350328e-05, |
| "loss": 0.5456580519676208, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.6071315872514988, |
| "grad_norm": 0.3334132730960846, |
| "learning_rate": 6.292041808040393e-05, |
| "loss": 0.36408746242523193, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.6083938150836226, |
| "grad_norm": 0.42052480578422546, |
| "learning_rate": 6.25393406584088e-05, |
| "loss": 0.6775397062301636, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.6096560429157463, |
| "grad_norm": 0.3473283648490906, |
| "learning_rate": 6.215889499576898e-05, |
| "loss": 0.4786512851715088, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.61091827074787, |
| "grad_norm": 0.35813814401626587, |
| "learning_rate": 6.177908750855164e-05, |
| "loss": 0.35457998514175415, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.6121804985799937, |
| "grad_norm": 0.33015450835227966, |
| "learning_rate": 6.139992460206132e-05, |
| "loss": 0.314817875623703, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.6134427264121174, |
| "grad_norm": 0.3904082179069519, |
| "learning_rate": 6.102141267073207e-05, |
| "loss": 0.5199745893478394, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.6147049542442411, |
| "grad_norm": 0.3974827229976654, |
| "learning_rate": 6.064355809801943e-05, |
| "loss": 0.6768912672996521, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.6159671820763648, |
| "grad_norm": 0.3908008635044098, |
| "learning_rate": 6.02663672562928e-05, |
| "loss": 0.5883216261863708, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.6172294099084885, |
| "grad_norm": 0.3862961232662201, |
| "learning_rate": 5.988984650672813e-05, |
| "loss": 0.7970855236053467, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.6184916377406122, |
| "grad_norm": 0.3746252655982971, |
| "learning_rate": 5.951400219920046e-05, |
| "loss": 0.4062190651893616, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.6197538655727359, |
| "grad_norm": 0.36359089612960815, |
| "learning_rate": 5.913884067217685e-05, |
| "loss": 0.4925137758255005, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.6210160934048595, |
| "grad_norm": 0.3990168273448944, |
| "learning_rate": 5.876436825260967e-05, |
| "loss": 0.7016726732254028, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.6222783212369832, |
| "grad_norm": 0.3235120475292206, |
| "learning_rate": 5.8390591255829644e-05, |
| "loss": 0.31492355465888977, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.6235405490691069, |
| "grad_norm": 0.41507890820503235, |
| "learning_rate": 5.8017515985439465e-05, |
| "loss": 0.647290825843811, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.6248027769012306, |
| "grad_norm": 0.27676281332969666, |
| "learning_rate": 5.764514873320761e-05, |
| "loss": 0.2870396375656128, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.6260650047333544, |
| "grad_norm": 0.3965661823749542, |
| "learning_rate": 5.727349577896194e-05, |
| "loss": 0.4853188693523407, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.6273272325654781, |
| "grad_norm": 0.4400973916053772, |
| "learning_rate": 5.6902563390484023e-05, |
| "loss": 0.6750615239143372, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.6285894603976018, |
| "grad_norm": 0.3927224576473236, |
| "learning_rate": 5.6532357823403517e-05, |
| "loss": 0.4222678542137146, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.6298516882297255, |
| "grad_norm": 0.3898910880088806, |
| "learning_rate": 5.616288532109225e-05, |
| "loss": 0.6995186805725098, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.6311139160618492, |
| "grad_norm": 0.38628652691841125, |
| "learning_rate": 5.579415211455941e-05, |
| "loss": 0.44969233870506287, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.6323761438939729, |
| "grad_norm": 0.42243316769599915, |
| "learning_rate": 5.542616442234618e-05, |
| "loss": 0.6847352981567383, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.6336383717260966, |
| "grad_norm": 0.394643098115921, |
| "learning_rate": 5.505892845042089e-05, |
| "loss": 0.5232677459716797, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.6349005995582203, |
| "grad_norm": 0.3849993050098419, |
| "learning_rate": 5.469245039207451e-05, |
| "loss": 0.45429885387420654, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.6361628273903439, |
| "grad_norm": 0.39264214038848877, |
| "learning_rate": 5.4326736427815946e-05, |
| "loss": 0.7198891639709473, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.6374250552224676, |
| "grad_norm": 0.3624120056629181, |
| "learning_rate": 5.39617927252681e-05, |
| "loss": 0.6535207033157349, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.6386872830545913, |
| "grad_norm": 0.41762086749076843, |
| "learning_rate": 5.359762543906368e-05, |
| "loss": 0.5117899775505066, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.639949510886715, |
| "grad_norm": 0.3560762405395508, |
| "learning_rate": 5.3234240710741337e-05, |
| "loss": 0.3488892912864685, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.6412117387188387, |
| "grad_norm": 0.3697710633277893, |
| "learning_rate": 5.28716446686423e-05, |
| "loss": 0.5296636819839478, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.6424739665509624, |
| "grad_norm": 0.3891625702381134, |
| "learning_rate": 5.250984342780689e-05, |
| "loss": 0.4500022530555725, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.6437361943830862, |
| "grad_norm": 0.4205571115016937, |
| "learning_rate": 5.214884308987136e-05, |
| "loss": 0.4895755648612976, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.6449984222152099, |
| "grad_norm": 0.41864123940467834, |
| "learning_rate": 5.178864974296511e-05, |
| "loss": 0.7258821725845337, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.6462606500473336, |
| "grad_norm": 0.3590496778488159, |
| "learning_rate": 5.142926946160799e-05, |
| "loss": 0.3575442135334015, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.6475228778794573, |
| "grad_norm": 0.41997307538986206, |
| "learning_rate": 5.107070830660765e-05, |
| "loss": 0.6464291214942932, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.648785105711581, |
| "grad_norm": 0.40842562913894653, |
| "learning_rate": 5.071297232495769e-05, |
| "loss": 0.693924069404602, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.6500473335437046, |
| "grad_norm": 0.4067709445953369, |
| "learning_rate": 5.035606754973539e-05, |
| "loss": 0.7233395576477051, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.6513095613758283, |
| "grad_norm": 0.4231897294521332, |
| "learning_rate": 5.000000000000002e-05, |
| "loss": 0.5112624764442444, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.652571789207952, |
| "grad_norm": 0.33488285541534424, |
| "learning_rate": 4.964477568069146e-05, |
| "loss": 0.335151731967926, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.6538340170400757, |
| "grad_norm": 0.39816269278526306, |
| "learning_rate": 4.9290400582528815e-05, |
| "loss": 0.47427669167518616, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.6550962448721994, |
| "grad_norm": 0.3252885341644287, |
| "learning_rate": 4.893688068190932e-05, |
| "loss": 0.26451653242111206, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.6563584727043231, |
| "grad_norm": 0.3190288543701172, |
| "learning_rate": 4.8584221940807774e-05, |
| "loss": 0.29336637258529663, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.6576207005364468, |
| "grad_norm": 0.3690161108970642, |
| "learning_rate": 4.823243030667576e-05, |
| "loss": 0.4153848886489868, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.6588829283685705, |
| "grad_norm": 0.38851308822631836, |
| "learning_rate": 4.7881511712341484e-05, |
| "loss": 0.8248839974403381, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.6601451562006942, |
| "grad_norm": 0.3935796618461609, |
| "learning_rate": 4.753147207590971e-05, |
| "loss": 0.8026013970375061, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.661407384032818, |
| "grad_norm": 0.39873406291007996, |
| "learning_rate": 4.7182317300661796e-05, |
| "loss": 0.7289063930511475, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.6626696118649417, |
| "grad_norm": 0.3880118429660797, |
| "learning_rate": 4.683405327495638e-05, |
| "loss": 0.5413039922714233, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.6639318396970654, |
| "grad_norm": 0.41318458318710327, |
| "learning_rate": 4.648668587212997e-05, |
| "loss": 0.6406034827232361, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.665194067529189, |
| "grad_norm": 0.3890816271305084, |
| "learning_rate": 4.6140220950397764e-05, |
| "loss": 0.7736164927482605, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.6664562953613127, |
| "grad_norm": 0.3265458047389984, |
| "learning_rate": 4.5794664352755055e-05, |
| "loss": 0.3139330744743347, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.6677185231934364, |
| "grad_norm": 0.3433822691440582, |
| "learning_rate": 4.545002190687865e-05, |
| "loss": 0.35356977581977844, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.6689807510255601, |
| "grad_norm": 0.3755057156085968, |
| "learning_rate": 4.510629942502839e-05, |
| "loss": 0.8373801708221436, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.6702429788576838, |
| "grad_norm": 0.31386467814445496, |
| "learning_rate": 4.476350270394942e-05, |
| "loss": 0.2859068214893341, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.6715052066898075, |
| "grad_norm": 0.3479110598564148, |
| "learning_rate": 4.4421637524774285e-05, |
| "loss": 0.4022149443626404, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.6727674345219312, |
| "grad_norm": 0.3931775689125061, |
| "learning_rate": 4.4080709652925336e-05, |
| "loss": 0.4654971957206726, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.6740296623540549, |
| "grad_norm": 0.41888129711151123, |
| "learning_rate": 4.374072483801769e-05, |
| "loss": 0.6287370920181274, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.6752918901861786, |
| "grad_norm": 0.3527485430240631, |
| "learning_rate": 4.340168881376222e-05, |
| "loss": 0.424509197473526, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.6765541180183023, |
| "grad_norm": 0.3850213289260864, |
| "learning_rate": 4.306360729786867e-05, |
| "loss": 0.6349387764930725, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.677816345850426, |
| "grad_norm": 0.39798423647880554, |
| "learning_rate": 4.272648599194948e-05, |
| "loss": 0.4587141275405884, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.6790785736825496, |
| "grad_norm": 0.4049997925758362, |
| "learning_rate": 4.239033058142356e-05, |
| "loss": 0.6317430138587952, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.6803408015146734, |
| "grad_norm": 0.3872447609901428, |
| "learning_rate": 4.2055146735420245e-05, |
| "loss": 0.511966347694397, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.6816030293467971, |
| "grad_norm": 0.34591948986053467, |
| "learning_rate": 4.172094010668391e-05, |
| "loss": 0.34035632014274597, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.6828652571789208, |
| "grad_norm": 0.35914257168769836, |
| "learning_rate": 4.1387716331478565e-05, |
| "loss": 0.4750257134437561, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.6841274850110445, |
| "grad_norm": 0.37576189637184143, |
| "learning_rate": 4.1055481029492645e-05, |
| "loss": 0.44672656059265137, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.6853897128431682, |
| "grad_norm": 0.38701605796813965, |
| "learning_rate": 4.072423980374452e-05, |
| "loss": 0.45069319009780884, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.6866519406752919, |
| "grad_norm": 0.3991917669773102, |
| "learning_rate": 4.039399824048777e-05, |
| "loss": 0.4803800582885742, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.6879141685074156, |
| "grad_norm": 0.3985093832015991, |
| "learning_rate": 4.00647619091171e-05, |
| "loss": 0.707385778427124, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.6891763963395393, |
| "grad_norm": 0.34546467661857605, |
| "learning_rate": 3.973653636207437e-05, |
| "loss": 0.40447893738746643, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.690438624171663, |
| "grad_norm": 0.3801027834415436, |
| "learning_rate": 3.9409327134754895e-05, |
| "loss": 0.4316953420639038, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.6917008520037867, |
| "grad_norm": 0.39960116147994995, |
| "learning_rate": 3.908313974541422e-05, |
| "loss": 0.6661956906318665, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.6929630798359104, |
| "grad_norm": 0.4249173402786255, |
| "learning_rate": 3.875797969507502e-05, |
| "loss": 0.6954900026321411, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.694225307668034, |
| "grad_norm": 0.4491938650608063, |
| "learning_rate": 3.843385246743417e-05, |
| "loss": 0.694817066192627, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.6954875355001577, |
| "grad_norm": 0.4053807556629181, |
| "learning_rate": 3.811076352877054e-05, |
| "loss": 0.677171528339386, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.6967497633322814, |
| "grad_norm": 0.3556557893753052, |
| "learning_rate": 3.778871832785262e-05, |
| "loss": 0.31312018632888794, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.6980119911644052, |
| "grad_norm": 0.37487420439720154, |
| "learning_rate": 3.74677222958466e-05, |
| "loss": 0.43329551815986633, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.6992742189965289, |
| "grad_norm": 0.4070112407207489, |
| "learning_rate": 3.714778084622492e-05, |
| "loss": 0.6022857427597046, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.7005364468286526, |
| "grad_norm": 0.3633062243461609, |
| "learning_rate": 3.682889937467493e-05, |
| "loss": 0.407479465007782, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.7017986746607763, |
| "grad_norm": 0.38449397683143616, |
| "learning_rate": 3.651108325900773e-05, |
| "loss": 0.5523849725723267, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.7030609024929, |
| "grad_norm": 0.3744942247867584, |
| "learning_rate": 3.619433785906775e-05, |
| "loss": 0.48631197214126587, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.7043231303250237, |
| "grad_norm": 0.40868815779685974, |
| "learning_rate": 3.587866851664219e-05, |
| "loss": 0.6774845719337463, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.7055853581571474, |
| "grad_norm": 0.35936489701271057, |
| "learning_rate": 3.556408055537087e-05, |
| "loss": 0.34799298644065857, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.7068475859892711, |
| "grad_norm": 0.3731677234172821, |
| "learning_rate": 3.5250579280656636e-05, |
| "loss": 0.3729614317417145, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.7081098138213947, |
| "grad_norm": 0.4450969398021698, |
| "learning_rate": 3.493816997957582e-05, |
| "loss": 0.39747729897499084, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.7093720416535184, |
| "grad_norm": 0.3150026500225067, |
| "learning_rate": 3.462685792078888e-05, |
| "loss": 0.30238404870033264, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.7106342694856421, |
| "grad_norm": 0.4264235496520996, |
| "learning_rate": 3.4316648354451895e-05, |
| "loss": 0.7084164023399353, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.7118964973177658, |
| "grad_norm": 0.35976630449295044, |
| "learning_rate": 3.400754651212776e-05, |
| "loss": 0.35280704498291016, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.7131587251498895, |
| "grad_norm": 0.3740016520023346, |
| "learning_rate": 3.3699557606698015e-05, |
| "loss": 0.487404465675354, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.7144209529820132, |
| "grad_norm": 0.4432770013809204, |
| "learning_rate": 3.339268683227499e-05, |
| "loss": 0.6776658296585083, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.715683180814137, |
| "grad_norm": 0.3524283766746521, |
| "learning_rate": 3.308693936411421e-05, |
| "loss": 0.3227110207080841, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.7169454086462607, |
| "grad_norm": 0.39707088470458984, |
| "learning_rate": 3.278232035852693e-05, |
| "loss": 0.6849966645240784, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.7182076364783844, |
| "grad_norm": 0.4202400743961334, |
| "learning_rate": 3.247883495279358e-05, |
| "loss": 0.6456137299537659, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.7194698643105081, |
| "grad_norm": 0.4002569317817688, |
| "learning_rate": 3.2176488265076596e-05, |
| "loss": 0.7039542198181152, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.7207320921426318, |
| "grad_norm": 0.40294668078422546, |
| "learning_rate": 3.187528539433458e-05, |
| "loss": 0.46439212560653687, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.7219943199747555, |
| "grad_norm": 0.40857481956481934, |
| "learning_rate": 3.157523142023604e-05, |
| "loss": 0.5847267508506775, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.7232565478068791, |
| "grad_norm": 0.43344590067863464, |
| "learning_rate": 3.1276331403073735e-05, |
| "loss": 0.5486865043640137, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.7245187756390028, |
| "grad_norm": 0.4011099934577942, |
| "learning_rate": 3.097859038367947e-05, |
| "loss": 0.6386106014251709, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.7257810034711265, |
| "grad_norm": 0.39212876558303833, |
| "learning_rate": 3.068201338333903e-05, |
| "loss": 0.6849637031555176, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.7270432313032502, |
| "grad_norm": 0.3913683593273163, |
| "learning_rate": 3.0386605403707346e-05, |
| "loss": 0.9085783958435059, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.7283054591353739, |
| "grad_norm": 0.4202577769756317, |
| "learning_rate": 3.0092371426724398e-05, |
| "loss": 0.692664623260498, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.7295676869674976, |
| "grad_norm": 0.33715662360191345, |
| "learning_rate": 2.979931641453104e-05, |
| "loss": 0.3271544575691223, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.7308299147996213, |
| "grad_norm": 0.34124237298965454, |
| "learning_rate": 2.9507445309385294e-05, |
| "loss": 0.34397092461586, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.732092142631745, |
| "grad_norm": 0.40698572993278503, |
| "learning_rate": 2.9216763033579097e-05, |
| "loss": 0.4819522500038147, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.7333543704638688, |
| "grad_norm": 0.37911415100097656, |
| "learning_rate": 2.8927274489355293e-05, |
| "loss": 0.4310797154903412, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.7346165982959925, |
| "grad_norm": 0.36646318435668945, |
| "learning_rate": 2.8638984558824777e-05, |
| "loss": 0.5274304747581482, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.7358788261281162, |
| "grad_norm": 0.3488803803920746, |
| "learning_rate": 2.835189810388441e-05, |
| "loss": 0.7499272227287292, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.7371410539602399, |
| "grad_norm": 0.40415751934051514, |
| "learning_rate": 2.8066019966134904e-05, |
| "loss": 0.8633046746253967, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.7384032817923635, |
| "grad_norm": 0.325978547334671, |
| "learning_rate": 2.7781354966799078e-05, |
| "loss": 0.3552260994911194, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.7396655096244872, |
| "grad_norm": 0.37058016657829285, |
| "learning_rate": 2.7497907906640742e-05, |
| "loss": 0.913851261138916, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.7409277374566109, |
| "grad_norm": 0.36124756932258606, |
| "learning_rate": 2.721568356588362e-05, |
| "loss": 0.5102133750915527, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.7421899652887346, |
| "grad_norm": 0.41945722699165344, |
| "learning_rate": 2.6934686704130696e-05, |
| "loss": 0.5533009767532349, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.7434521931208583, |
| "grad_norm": 0.40652337670326233, |
| "learning_rate": 2.665492206028407e-05, |
| "loss": 0.6261847019195557, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.744714420952982, |
| "grad_norm": 0.36238163709640503, |
| "learning_rate": 2.6376394352464972e-05, |
| "loss": 0.5246446132659912, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.7459766487851057, |
| "grad_norm": 0.3909083306789398, |
| "learning_rate": 2.6099108277934103e-05, |
| "loss": 0.5678606033325195, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.7472388766172294, |
| "grad_norm": 0.3918708562850952, |
| "learning_rate": 2.5823068513012595e-05, |
| "loss": 0.4282546639442444, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.7485011044493531, |
| "grad_norm": 0.3766772150993347, |
| "learning_rate": 2.5548279713002997e-05, |
| "loss": 0.43503549695014954, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.7497633322814768, |
| "grad_norm": 0.43319037556648254, |
| "learning_rate": 2.527474651211089e-05, |
| "loss": 0.6522255539894104, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.7510255601136006, |
| "grad_norm": 0.4107663035392761, |
| "learning_rate": 2.500247352336664e-05, |
| "loss": 0.3986871540546417, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.7522877879457242, |
| "grad_norm": 0.4372679591178894, |
| "learning_rate": 2.4731465338547556e-05, |
| "loss": 0.681415855884552, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.7535500157778479, |
| "grad_norm": 0.3968641459941864, |
| "learning_rate": 2.4461726528100615e-05, |
| "loss": 0.44046419858932495, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.7548122436099716, |
| "grad_norm": 0.33103057742118835, |
| "learning_rate": 2.41932616410653e-05, |
| "loss": 0.37138405442237854, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.7560744714420953, |
| "grad_norm": 0.36118385195732117, |
| "learning_rate": 2.392607520499677e-05, |
| "loss": 0.31369921565055847, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.757336699274219, |
| "grad_norm": 0.35563066601753235, |
| "learning_rate": 2.36601717258897e-05, |
| "loss": 0.3743899464607239, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.7585989271063427, |
| "grad_norm": 0.4097678065299988, |
| "learning_rate": 2.339555568810221e-05, |
| "loss": 0.418079674243927, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.7598611549384664, |
| "grad_norm": 0.38674771785736084, |
| "learning_rate": 2.3132231554280136e-05, |
| "loss": 0.8224179744720459, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.7611233827705901, |
| "grad_norm": 0.3854767084121704, |
| "learning_rate": 2.2870203765281926e-05, |
| "loss": 0.542049765586853, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.7623856106027138, |
| "grad_norm": 0.35851332545280457, |
| "learning_rate": 2.260947674010372e-05, |
| "loss": 0.5342020988464355, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.7636478384348375, |
| "grad_norm": 0.37478891015052795, |
| "learning_rate": 2.235005487580466e-05, |
| "loss": 0.8123199939727783, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.7649100662669612, |
| "grad_norm": 0.451459676027298, |
| "learning_rate": 2.2091942547432955e-05, |
| "loss": 0.5622618198394775, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.7661722940990849, |
| "grad_norm": 0.42055562138557434, |
| "learning_rate": 2.1835144107952022e-05, |
| "loss": 0.6805808544158936, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.7674345219312085, |
| "grad_norm": 0.38752734661102295, |
| "learning_rate": 2.1579663888166956e-05, |
| "loss": 0.6346580982208252, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.7686967497633322, |
| "grad_norm": 0.39068523049354553, |
| "learning_rate": 2.132550619665168e-05, |
| "loss": 0.5962034463882446, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.769958977595456, |
| "grad_norm": 0.3247472643852234, |
| "learning_rate": 2.107267531967618e-05, |
| "loss": 0.25553497672080994, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.7712212054275797, |
| "grad_norm": 0.4266479015350342, |
| "learning_rate": 2.0821175521134207e-05, |
| "loss": 0.5519466996192932, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.7724834332597034, |
| "grad_norm": 0.4060700237751007, |
| "learning_rate": 2.05710110424714e-05, |
| "loss": 0.6059053540229797, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.7737456610918271, |
| "grad_norm": 0.4174729585647583, |
| "learning_rate": 2.0322186102613795e-05, |
| "loss": 0.42115089297294617, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.7750078889239508, |
| "grad_norm": 0.375446617603302, |
| "learning_rate": 2.0074704897896558e-05, |
| "loss": 0.368305504322052, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.7762701167560745, |
| "grad_norm": 0.37311506271362305, |
| "learning_rate": 1.982857160199334e-05, |
| "loss": 0.3238658010959625, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.7775323445881982, |
| "grad_norm": 0.41771042346954346, |
| "learning_rate": 1.9583790365845822e-05, |
| "loss": 0.6185348033905029, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.7787945724203219, |
| "grad_norm": 0.39036667346954346, |
| "learning_rate": 1.9340365317593746e-05, |
| "loss": 0.7339574098587036, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.7800568002524456, |
| "grad_norm": 0.40570926666259766, |
| "learning_rate": 1.9098300562505266e-05, |
| "loss": 0.46005457639694214, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.7813190280845692, |
| "grad_norm": 0.36136454343795776, |
| "learning_rate": 1.8857600182907675e-05, |
| "loss": 0.3527463972568512, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.7825812559166929, |
| "grad_norm": 0.38751932978630066, |
| "learning_rate": 1.8618268238118675e-05, |
| "loss": 0.7095609307289124, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.7838434837488166, |
| "grad_norm": 0.4258861541748047, |
| "learning_rate": 1.8380308764377842e-05, |
| "loss": 0.6087920665740967, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.7851057115809403, |
| "grad_norm": 0.3894071578979492, |
| "learning_rate": 1.8143725774778508e-05, |
| "loss": 0.5984947085380554, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.786367939413064, |
| "grad_norm": 0.39034441113471985, |
| "learning_rate": 1.7908523259200192e-05, |
| "loss": 0.5467015504837036, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.7876301672451878, |
| "grad_norm": 0.40297675132751465, |
| "learning_rate": 1.767470518424129e-05, |
| "loss": 0.6903741359710693, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.7888923950773115, |
| "grad_norm": 0.3851509392261505, |
| "learning_rate": 1.7442275493152037e-05, |
| "loss": 0.486089825630188, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.7901546229094352, |
| "grad_norm": 0.37658852338790894, |
| "learning_rate": 1.7211238105768214e-05, |
| "loss": 0.4333967864513397, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.7914168507415589, |
| "grad_norm": 0.45156872272491455, |
| "learning_rate": 1.6981596918444953e-05, |
| "loss": 0.7170761823654175, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.7926790785736826, |
| "grad_norm": 0.41625985503196716, |
| "learning_rate": 1.6753355803990912e-05, |
| "loss": 0.45374661684036255, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.7939413064058063, |
| "grad_norm": 0.41271454095840454, |
| "learning_rate": 1.652651861160318e-05, |
| "loss": 0.49166661500930786, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.79520353423793, |
| "grad_norm": 0.30450883507728577, |
| "learning_rate": 1.630108916680223e-05, |
| "loss": 0.26509180665016174, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.7964657620700536, |
| "grad_norm": 0.41994258761405945, |
| "learning_rate": 1.607707127136734e-05, |
| "loss": 0.5564639568328857, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.7977279899021773, |
| "grad_norm": 0.42379099130630493, |
| "learning_rate": 1.5854468703272663e-05, |
| "loss": 0.6809132695198059, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.798990217734301, |
| "grad_norm": 0.3801705837249756, |
| "learning_rate": 1.5633285216623385e-05, |
| "loss": 0.4586731493473053, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.8002524455664247, |
| "grad_norm": 0.3840394914150238, |
| "learning_rate": 1.541352454159237e-05, |
| "loss": 0.38096368312835693, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.8015146733985484, |
| "grad_norm": 0.3911992311477661, |
| "learning_rate": 1.5195190384357404e-05, |
| "loss": 0.6233262419700623, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.8027769012306721, |
| "grad_norm": 0.4130832254886627, |
| "learning_rate": 1.4978286427038601e-05, |
| "loss": 0.6100831031799316, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.8040391290627958, |
| "grad_norm": 0.530238687992096, |
| "learning_rate": 1.4762816327636241e-05, |
| "loss": 0.6475313901901245, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.8053013568949196, |
| "grad_norm": 0.43065938353538513, |
| "learning_rate": 1.4548783719969239e-05, |
| "loss": 0.6517763137817383, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.8065635847270433, |
| "grad_norm": 0.39852434396743774, |
| "learning_rate": 1.4336192213613742e-05, |
| "loss": 0.762035608291626, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.807825812559167, |
| "grad_norm": 0.4060841202735901, |
| "learning_rate": 1.4125045393842219e-05, |
| "loss": 0.5141922831535339, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.8090880403912907, |
| "grad_norm": 0.42946869134902954, |
| "learning_rate": 1.3915346821563235e-05, |
| "loss": 0.4715317189693451, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.8103502682234143, |
| "grad_norm": 0.4243875741958618, |
| "learning_rate": 1.3707100033261034e-05, |
| "loss": 0.5333652496337891, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.811612496055538, |
| "grad_norm": 0.40289306640625, |
| "learning_rate": 1.3500308540936201e-05, |
| "loss": 0.8304973840713501, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.8128747238876617, |
| "grad_norm": 0.43981650471687317, |
| "learning_rate": 1.3294975832046353e-05, |
| "loss": 0.7121323347091675, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.8141369517197854, |
| "grad_norm": 0.3223661780357361, |
| "learning_rate": 1.3091105369447165e-05, |
| "loss": 0.2905374765396118, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.8153991795519091, |
| "grad_norm": 0.4346272051334381, |
| "learning_rate": 1.2888700591334223e-05, |
| "loss": 0.537320613861084, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.8166614073840328, |
| "grad_norm": 0.35340362787246704, |
| "learning_rate": 1.2687764911184907e-05, |
| "loss": 0.34484896063804626, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.8179236352161565, |
| "grad_norm": 0.40185239911079407, |
| "learning_rate": 1.2488301717700735e-05, |
| "loss": 0.4863336682319641, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.8191858630482802, |
| "grad_norm": 0.33702552318573, |
| "learning_rate": 1.2290314374750422e-05, |
| "loss": 0.3356221318244934, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.8204480908804039, |
| "grad_norm": 0.38969579339027405, |
| "learning_rate": 1.2093806221313008e-05, |
| "loss": 0.6058964729309082, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.8217103187125276, |
| "grad_norm": 0.4453175961971283, |
| "learning_rate": 1.1898780571421552e-05, |
| "loss": 0.44390422105789185, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.8229725465446514, |
| "grad_norm": 0.39128580689430237, |
| "learning_rate": 1.1705240714107302e-05, |
| "loss": 0.6540953516960144, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.8242347743767751, |
| "grad_norm": 0.3710046708583832, |
| "learning_rate": 1.1513189913344214e-05, |
| "loss": 0.5617390871047974, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.8254970022088987, |
| "grad_norm": 0.4133809208869934, |
| "learning_rate": 1.1322631407993811e-05, |
| "loss": 0.6450774669647217, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.8267592300410224, |
| "grad_norm": 0.3774697184562683, |
| "learning_rate": 1.1133568411750727e-05, |
| "loss": 0.3926354646682739, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.8280214578731461, |
| "grad_norm": 0.39373353123664856, |
| "learning_rate": 1.0946004113088381e-05, |
| "loss": 0.7614798545837402, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.8292836857052698, |
| "grad_norm": 0.3788921535015106, |
| "learning_rate": 1.0759941675205221e-05, |
| "loss": 0.6513789892196655, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.8305459135373935, |
| "grad_norm": 0.47546783089637756, |
| "learning_rate": 1.0575384235971465e-05, |
| "loss": 0.43815821409225464, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.8318081413695172, |
| "grad_norm": 0.4033801257610321, |
| "learning_rate": 1.0392334907876022e-05, |
| "loss": 0.7993838787078857, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.8330703692016409, |
| "grad_norm": 0.3804508447647095, |
| "learning_rate": 1.0210796777974197e-05, |
| "loss": 0.5399584174156189, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.8343325970337646, |
| "grad_norm": 0.40873584151268005, |
| "learning_rate": 1.0030772907835483e-05, |
| "loss": 0.4069630801677704, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.8355948248658883, |
| "grad_norm": 0.31726691126823425, |
| "learning_rate": 9.852266333491954e-06, |
| "loss": 0.31673499941825867, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.836857052698012, |
| "grad_norm": 0.42769894003868103, |
| "learning_rate": 9.675280065387116e-06, |
| "loss": 0.5651416778564453, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.8381192805301357, |
| "grad_norm": 0.34212225675582886, |
| "learning_rate": 9.499817088325102e-06, |
| "loss": 0.3379066288471222, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.8393815083622594, |
| "grad_norm": 0.3834571838378906, |
| "learning_rate": 9.325880361420336e-06, |
| "loss": 0.532379686832428, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.840643736194383, |
| "grad_norm": 0.4152385890483856, |
| "learning_rate": 9.153472818047625e-06, |
| "loss": 0.5268415212631226, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.8419059640265067, |
| "grad_norm": 0.43394723534584045, |
| "learning_rate": 8.982597365792711e-06, |
| "loss": 0.5578685402870178, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.8431681918586305, |
| "grad_norm": 0.3674545884132385, |
| "learning_rate": 8.813256886403164e-06, |
| "loss": 0.4507666826248169, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.8444304196907542, |
| "grad_norm": 0.4950237572193146, |
| "learning_rate": 8.645454235739903e-06, |
| "loss": 0.5587325096130371, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.8456926475228779, |
| "grad_norm": 0.42047086358070374, |
| "learning_rate": 8.479192243728962e-06, |
| "loss": 0.46830785274505615, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.8469548753550016, |
| "grad_norm": 0.33029595017433167, |
| "learning_rate": 8.314473714313719e-06, |
| "loss": 0.3492874503135681, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.8482171031871253, |
| "grad_norm": 0.3771483600139618, |
| "learning_rate": 8.151301425407699e-06, |
| "loss": 0.416072815656662, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.849479331019249, |
| "grad_norm": 0.3575372099876404, |
| "learning_rate": 7.9896781288477e-06, |
| "loss": 0.4314277470111847, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.8507415588513727, |
| "grad_norm": 0.42138731479644775, |
| "learning_rate": 7.829606550347313e-06, |
| "loss": 0.6481724381446838, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.8520037866834964, |
| "grad_norm": 0.39553171396255493, |
| "learning_rate": 7.671089389451058e-06, |
| "loss": 0.3940804600715637, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.8532660145156201, |
| "grad_norm": 0.3964840769767761, |
| "learning_rate": 7.514129319488839e-06, |
| "loss": 0.7153723835945129, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.8545282423477437, |
| "grad_norm": 0.4527961015701294, |
| "learning_rate": 7.358728987530728e-06, |
| "loss": 0.7575295567512512, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.8557904701798674, |
| "grad_norm": 0.47758570313453674, |
| "learning_rate": 7.204891014342552e-06, |
| "loss": 0.732297420501709, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.8570526980119911, |
| "grad_norm": 0.3915818929672241, |
| "learning_rate": 7.052617994341448e-06, |
| "loss": 0.5047644376754761, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.8583149258441148, |
| "grad_norm": 0.42662402987480164, |
| "learning_rate": 6.901912495552332e-06, |
| "loss": 0.7435489892959595, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.8595771536762385, |
| "grad_norm": 0.44890522956848145, |
| "learning_rate": 6.75277705956443e-06, |
| "loss": 0.5125769376754761, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.8608393815083623, |
| "grad_norm": 0.3554657995700836, |
| "learning_rate": 6.605214201488486e-06, |
| "loss": 0.3450443744659424, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.862101609340486, |
| "grad_norm": 0.32458341121673584, |
| "learning_rate": 6.459226409914332e-06, |
| "loss": 0.31173160672187805, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.8633638371726097, |
| "grad_norm": 0.3945808708667755, |
| "learning_rate": 6.314816146868952e-06, |
| "loss": 0.4987742304801941, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.8646260650047334, |
| "grad_norm": 0.41859179735183716, |
| "learning_rate": 6.171985847774864e-06, |
| "loss": 0.5809845924377441, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.8658882928368571, |
| "grad_norm": 0.4125705361366272, |
| "learning_rate": 6.030737921409169e-06, |
| "loss": 0.6869086623191833, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.8671505206689808, |
| "grad_norm": 0.5110360980033875, |
| "learning_rate": 5.891074749862857e-06, |
| "loss": 0.5902141332626343, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.8684127485011045, |
| "grad_norm": 0.3964199125766754, |
| "learning_rate": 5.75299868850061e-06, |
| "loss": 0.778140127658844, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.8696749763332281, |
| "grad_norm": 0.3277434706687927, |
| "learning_rate": 5.616512065921187e-06, |
| "loss": 0.2611342966556549, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.8709372041653518, |
| "grad_norm": 0.3749728500843048, |
| "learning_rate": 5.481617183918053e-06, |
| "loss": 0.42815372347831726, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.8721994319974755, |
| "grad_norm": 0.36340272426605225, |
| "learning_rate": 5.348316317440549e-06, |
| "loss": 0.4718218445777893, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.8734616598295992, |
| "grad_norm": 0.3954283893108368, |
| "learning_rate": 5.21661171455563e-06, |
| "loss": 0.49787670373916626, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.8747238876617229, |
| "grad_norm": 0.39619600772857666, |
| "learning_rate": 5.086505596409885e-06, |
| "loss": 0.568760335445404, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.8759861154938466, |
| "grad_norm": 0.33868858218193054, |
| "learning_rate": 4.958000157192022e-06, |
| "loss": 0.37448927760124207, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.8772483433259703, |
| "grad_norm": 0.43138137459754944, |
| "learning_rate": 4.831097564095999e-06, |
| "loss": 0.6743485331535339, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.8785105711580941, |
| "grad_norm": 0.41570451855659485, |
| "learning_rate": 4.705799957284351e-06, |
| "loss": 0.6966921091079712, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.8797727989902178, |
| "grad_norm": 0.3950325548648834, |
| "learning_rate": 4.582109449852168e-06, |
| "loss": 0.8221022486686707, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.8810350268223415, |
| "grad_norm": 0.31951889395713806, |
| "learning_rate": 4.4600281277914715e-06, |
| "loss": 0.33876973390579224, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.8822972546544652, |
| "grad_norm": 0.408273309469223, |
| "learning_rate": 4.339558049955927e-06, |
| "loss": 0.5404328107833862, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.8835594824865888, |
| "grad_norm": 0.3891682028770447, |
| "learning_rate": 4.220701248026248e-06, |
| "loss": 0.48202747106552124, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.8848217103187125, |
| "grad_norm": 0.40945693850517273, |
| "learning_rate": 4.103459726475889e-06, |
| "loss": 0.8016560077667236, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.8860839381508362, |
| "grad_norm": 0.43001535534858704, |
| "learning_rate": 3.987835462537193e-06, |
| "loss": 0.6459006071090698, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.8873461659829599, |
| "grad_norm": 0.41465309262275696, |
| "learning_rate": 3.873830406168111e-06, |
| "loss": 0.5275793671607971, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.8886083938150836, |
| "grad_norm": 0.3870158791542053, |
| "learning_rate": 3.761446480019315e-06, |
| "loss": 0.8116216063499451, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.8898706216472073, |
| "grad_norm": 0.3732059895992279, |
| "learning_rate": 3.6506855794016913e-06, |
| "loss": 0.3549728989601135, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.891132849479331, |
| "grad_norm": 0.38289642333984375, |
| "learning_rate": 3.541549572254488e-06, |
| "loss": 0.3792566955089569, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.8923950773114547, |
| "grad_norm": 0.3992280066013336, |
| "learning_rate": 3.43404029911375e-06, |
| "loss": 0.7304099798202515, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.8936573051435784, |
| "grad_norm": 0.3860641121864319, |
| "learning_rate": 3.3281595730812575e-06, |
| "loss": 0.6320814490318298, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.8949195329757021, |
| "grad_norm": 0.40705665946006775, |
| "learning_rate": 3.223909179794027e-06, |
| "loss": 0.7557500600814819, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.8961817608078259, |
| "grad_norm": 0.3863953649997711, |
| "learning_rate": 3.121290877394134e-06, |
| "loss": 0.5255841016769409, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.8974439886399496, |
| "grad_norm": 0.3851090967655182, |
| "learning_rate": 3.0203063964990617e-06, |
| "loss": 0.5183653235435486, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.8987062164720732, |
| "grad_norm": 0.39725980162620544, |
| "learning_rate": 2.9209574401725557e-06, |
| "loss": 0.5958725214004517, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.8999684443041969, |
| "grad_norm": 0.47921210527420044, |
| "learning_rate": 2.82324568389587e-06, |
| "loss": 0.7262052297592163, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.9012306721363206, |
| "grad_norm": 0.405513733625412, |
| "learning_rate": 2.7271727755395214e-06, |
| "loss": 0.6049070954322815, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.9024928999684443, |
| "grad_norm": 0.3995083272457123, |
| "learning_rate": 2.6327403353355264e-06, |
| "loss": 0.808394193649292, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.903755127800568, |
| "grad_norm": 0.43631553649902344, |
| "learning_rate": 2.539949955849985e-06, |
| "loss": 0.48620936274528503, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.9050173556326917, |
| "grad_norm": 0.479377806186676, |
| "learning_rate": 2.4488032019563402e-06, |
| "loss": 0.6404117941856384, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.9062795834648154, |
| "grad_norm": 0.408569872379303, |
| "learning_rate": 2.359301610808917e-06, |
| "loss": 0.7001040577888489, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.9075418112969391, |
| "grad_norm": 0.4069215655326843, |
| "learning_rate": 2.271446691817014e-06, |
| "loss": 0.6278159618377686, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.9088040391290628, |
| "grad_norm": 0.4575406014919281, |
| "learning_rate": 2.1852399266194314e-06, |
| "loss": 0.6095160245895386, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.9100662669611865, |
| "grad_norm": 0.43460536003112793, |
| "learning_rate": 2.100682769059548e-06, |
| "loss": 0.4627190828323364, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.9113284947933102, |
| "grad_norm": 0.4876587986946106, |
| "learning_rate": 2.017776645160707e-06, |
| "loss": 0.4769670367240906, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.9125907226254338, |
| "grad_norm": 0.4268261194229126, |
| "learning_rate": 1.9365229531022264e-06, |
| "loss": 0.49713101983070374, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.9138529504575575, |
| "grad_norm": 0.4099612832069397, |
| "learning_rate": 1.8569230631958256e-06, |
| "loss": 0.45675134658813477, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.9151151782896813, |
| "grad_norm": 0.39911365509033203, |
| "learning_rate": 1.7789783178624897e-06, |
| "loss": 0.4840657711029053, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.916377406121805, |
| "grad_norm": 0.39041027426719666, |
| "learning_rate": 1.7026900316098215e-06, |
| "loss": 0.5516049861907959, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.9176396339539287, |
| "grad_norm": 0.401254802942276, |
| "learning_rate": 1.6280594910099256e-06, |
| "loss": 0.7506740093231201, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.9189018617860524, |
| "grad_norm": 0.38945209980010986, |
| "learning_rate": 1.5550879546776364e-06, |
| "loss": 0.45651984214782715, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.9201640896181761, |
| "grad_norm": 0.3908751904964447, |
| "learning_rate": 1.4837766532493468e-06, |
| "loss": 0.4634789824485779, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.9214263174502998, |
| "grad_norm": 0.42969706654548645, |
| "learning_rate": 1.414126789362269e-06, |
| "loss": 0.8332436084747314, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.9226885452824235, |
| "grad_norm": 0.3828902542591095, |
| "learning_rate": 1.3461395376340502e-06, |
| "loss": 0.36839234828948975, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.9239507731145472, |
| "grad_norm": 0.4279589354991913, |
| "learning_rate": 1.2798160446431006e-06, |
| "loss": 0.7247366309165955, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.9252130009466709, |
| "grad_norm": 0.4109678566455841, |
| "learning_rate": 1.2151574289091749e-06, |
| "loss": 0.44771307706832886, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.9264752287787946, |
| "grad_norm": 0.3857699930667877, |
| "learning_rate": 1.1521647808744873e-06, |
| "loss": 0.7814648151397705, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.9277374566109182, |
| "grad_norm": 0.40495210886001587, |
| "learning_rate": 1.0908391628854041e-06, |
| "loss": 0.4813134968280792, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.9289996844430419, |
| "grad_norm": 0.40271830558776855, |
| "learning_rate": 1.0311816091744698e-06, |
| "loss": 0.4100000858306885, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.9302619122751656, |
| "grad_norm": 0.37395796179771423, |
| "learning_rate": 9.731931258429638e-07, |
| "loss": 0.4800105690956116, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.9315241401072893, |
| "grad_norm": 0.3781779408454895, |
| "learning_rate": 9.168746908439718e-07, |
| "loss": 0.48567116260528564, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.932786367939413, |
| "grad_norm": 0.383577436208725, |
| "learning_rate": 8.622272539658415e-07, |
| "loss": 0.4960499107837677, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.9340485957715368, |
| "grad_norm": 0.40534883737564087, |
| "learning_rate": 8.092517368162078e-07, |
| "loss": 0.4538559913635254, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.9353108236036605, |
| "grad_norm": 0.3785009980201721, |
| "learning_rate": 7.579490328064265e-07, |
| "loss": 0.4022294580936432, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.9365730514357842, |
| "grad_norm": 0.3643127381801605, |
| "learning_rate": 7.083200071365203e-07, |
| "loss": 0.429392009973526, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.9378352792679079, |
| "grad_norm": 0.4218924343585968, |
| "learning_rate": 6.603654967805683e-07, |
| "loss": 0.6960986256599426, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.9390975071000316, |
| "grad_norm": 0.387144535779953, |
| "learning_rate": 6.140863104726391e-07, |
| "loss": 0.359319269657135, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.9403597349321553, |
| "grad_norm": 0.386854887008667, |
| "learning_rate": 5.694832286930685e-07, |
| "loss": 0.5978315472602844, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.9416219627642789, |
| "grad_norm": 0.38212618231773376, |
| "learning_rate": 5.265570036553813e-07, |
| "loss": 0.7151321172714233, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.9428841905964026, |
| "grad_norm": 0.38942816853523254, |
| "learning_rate": 4.85308359293557e-07, |
| "loss": 0.34270745515823364, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.9441464184285263, |
| "grad_norm": 0.4136378765106201, |
| "learning_rate": 4.457379912498394e-07, |
| "loss": 0.3653174340724945, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.94540864626065, |
| "grad_norm": 0.42216548323631287, |
| "learning_rate": 4.078465668629905e-07, |
| "loss": 0.663544237613678, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.9466708740927737, |
| "grad_norm": 0.4414190948009491, |
| "learning_rate": 3.716347251570551e-07, |
| "loss": 0.7294875383377075, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.9479331019248974, |
| "grad_norm": 0.3959789574146271, |
| "learning_rate": 3.371030768305583e-07, |
| "loss": 0.6958010196685791, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.9491953297570211, |
| "grad_norm": 0.45387375354766846, |
| "learning_rate": 3.042522042462359e-07, |
| "loss": 0.7474179267883301, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.9504575575891449, |
| "grad_norm": 0.37097567319869995, |
| "learning_rate": 2.7308266142119785e-07, |
| "loss": 0.7090280055999756, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.9517197854212686, |
| "grad_norm": 0.4319815933704376, |
| "learning_rate": 2.4359497401758024e-07, |
| "loss": 0.632872462272644, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.9529820132533923, |
| "grad_norm": 0.412222295999527, |
| "learning_rate": 2.1578963933367446e-07, |
| "loss": 0.6069747805595398, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.954244241085516, |
| "grad_norm": 0.4318292737007141, |
| "learning_rate": 1.8966712629558957e-07, |
| "loss": 0.48516613245010376, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.9555064689176397, |
| "grad_norm": 0.4013379216194153, |
| "learning_rate": 1.6522787544926977e-07, |
| "loss": 0.7001821994781494, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.9567686967497633, |
| "grad_norm": 0.3875749111175537, |
| "learning_rate": 1.424722989531113e-07, |
| "loss": 0.5603348016738892, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.958030924581887, |
| "grad_norm": 0.2857275605201721, |
| "learning_rate": 1.2140078057101266e-07, |
| "loss": 0.2514762878417969, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.9592931524140107, |
| "grad_norm": 0.38641858100891113, |
| "learning_rate": 1.020136756658574e-07, |
| "loss": 0.6449640393257141, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.9605553802461344, |
| "grad_norm": 0.4277747571468353, |
| "learning_rate": 8.43113111936189e-08, |
| "loss": 0.7620565891265869, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.9618176080782581, |
| "grad_norm": 0.3486212193965912, |
| "learning_rate": 6.829398569770939e-08, |
| "loss": 0.43015536665916443, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.9630798359103818, |
| "grad_norm": 0.36243584752082825, |
| "learning_rate": 5.3961969304072715e-08, |
| "loss": 0.393317312002182, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.9643420637425055, |
| "grad_norm": 0.38432276248931885, |
| "learning_rate": 4.131550371655468e-08, |
| "loss": 0.752675473690033, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.9656042915746292, |
| "grad_norm": 0.413333535194397, |
| "learning_rate": 3.0354802212839705e-08, |
| "loss": 0.7670407891273499, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.9668665194067529, |
| "grad_norm": 0.3813234269618988, |
| "learning_rate": 2.108004964086474e-08, |
| "loss": 0.4830048382282257, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.9681287472388767, |
| "grad_norm": 0.2374144047498703, |
| "learning_rate": 1.3491402415710675e-08, |
| "loss": 0.1855914294719696, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.9693909750710004, |
| "grad_norm": 0.48682042956352234, |
| "learning_rate": 7.58898851693779e-09, |
| "loss": 0.5933582186698914, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.9706532029031241, |
| "grad_norm": 0.4472711980342865, |
| "learning_rate": 3.3729074864541355e-09, |
| "loss": 0.55843585729599, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.9719154307352477, |
| "grad_norm": 0.4075043201446533, |
| "learning_rate": 8.432304268057856e-10, |
| "loss": 0.7006219625473022, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.9719154307352477, |
| "eval_loss": 0.5271598100662231, |
| "eval_runtime": 224.8405, |
| "eval_samples_per_second": 2.126, |
| "eval_steps_per_second": 0.534, |
| "step": 770 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 770, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.2224210803964467e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|