| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 2618, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0003819709702062643, |
| "grad_norm": 19.90881904294145, |
| "learning_rate": 2.531645569620253e-08, |
| "loss": 1.299, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0007639419404125286, |
| "grad_norm": 15.414554054540094, |
| "learning_rate": 5.063291139240506e-08, |
| "loss": 1.3119, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.001145912910618793, |
| "grad_norm": 16.05082822290993, |
| "learning_rate": 7.59493670886076e-08, |
| "loss": 1.3474, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0015278838808250573, |
| "grad_norm": 16.802557597929273, |
| "learning_rate": 1.0126582278481012e-07, |
| "loss": 1.2602, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0019098548510313217, |
| "grad_norm": 17.30879067784603, |
| "learning_rate": 1.2658227848101266e-07, |
| "loss": 1.3413, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.002291825821237586, |
| "grad_norm": 20.77717323976014, |
| "learning_rate": 1.518987341772152e-07, |
| "loss": 1.2759, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.00267379679144385, |
| "grad_norm": 17.559417298661096, |
| "learning_rate": 1.7721518987341772e-07, |
| "loss": 1.298, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0030557677616501145, |
| "grad_norm": 16.28982557516589, |
| "learning_rate": 2.0253164556962025e-07, |
| "loss": 1.349, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.003437738731856379, |
| "grad_norm": 17.474868678501934, |
| "learning_rate": 2.2784810126582277e-07, |
| "loss": 1.3612, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0038197097020626434, |
| "grad_norm": 16.69254255107241, |
| "learning_rate": 2.5316455696202533e-07, |
| "loss": 1.2958, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.004201680672268907, |
| "grad_norm": 14.759522064675997, |
| "learning_rate": 2.7848101265822783e-07, |
| "loss": 1.2975, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.004583651642475172, |
| "grad_norm": 17.79667586296148, |
| "learning_rate": 3.037974683544304e-07, |
| "loss": 1.355, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.004965622612681436, |
| "grad_norm": 16.62840589636226, |
| "learning_rate": 3.291139240506329e-07, |
| "loss": 1.3086, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0053475935828877, |
| "grad_norm": 15.772125524972015, |
| "learning_rate": 3.5443037974683544e-07, |
| "loss": 1.3046, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.005729564553093965, |
| "grad_norm": 16.065515009444724, |
| "learning_rate": 3.79746835443038e-07, |
| "loss": 1.3281, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.006111535523300229, |
| "grad_norm": 14.081835334511972, |
| "learning_rate": 4.050632911392405e-07, |
| "loss": 1.2608, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.006493506493506494, |
| "grad_norm": 16.12517742296732, |
| "learning_rate": 4.3037974683544305e-07, |
| "loss": 1.2687, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.006875477463712758, |
| "grad_norm": 15.024902136863233, |
| "learning_rate": 4.5569620253164555e-07, |
| "loss": 1.2874, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.007257448433919022, |
| "grad_norm": 14.541324064699918, |
| "learning_rate": 4.81012658227848e-07, |
| "loss": 1.304, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.007639419404125287, |
| "grad_norm": 14.444473500731197, |
| "learning_rate": 5.063291139240507e-07, |
| "loss": 1.2624, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.008021390374331552, |
| "grad_norm": 13.532356409889397, |
| "learning_rate": 5.31645569620253e-07, |
| "loss": 1.2321, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.008403361344537815, |
| "grad_norm": 14.040344008725162, |
| "learning_rate": 5.569620253164557e-07, |
| "loss": 1.2684, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.00878533231474408, |
| "grad_norm": 15.065514072853418, |
| "learning_rate": 5.822784810126582e-07, |
| "loss": 1.2832, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.009167303284950344, |
| "grad_norm": 14.095590912422155, |
| "learning_rate": 6.075949367088608e-07, |
| "loss": 1.1906, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.009549274255156608, |
| "grad_norm": 13.52193472131181, |
| "learning_rate": 6.329113924050633e-07, |
| "loss": 1.2071, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.009931245225362872, |
| "grad_norm": 15.26263801289482, |
| "learning_rate": 6.582278481012658e-07, |
| "loss": 1.1598, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.010313216195569137, |
| "grad_norm": 12.160578875535336, |
| "learning_rate": 6.835443037974683e-07, |
| "loss": 1.1637, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.0106951871657754, |
| "grad_norm": 10.342684824764603, |
| "learning_rate": 7.088607594936709e-07, |
| "loss": 1.1077, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.011077158135981665, |
| "grad_norm": 15.632943983667545, |
| "learning_rate": 7.341772151898734e-07, |
| "loss": 1.1507, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.01145912910618793, |
| "grad_norm": 10.079604573711133, |
| "learning_rate": 7.59493670886076e-07, |
| "loss": 1.0396, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.011841100076394193, |
| "grad_norm": 13.611386236363026, |
| "learning_rate": 7.848101265822784e-07, |
| "loss": 1.0173, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.012223071046600458, |
| "grad_norm": 10.957950016902126, |
| "learning_rate": 8.10126582278481e-07, |
| "loss": 0.9992, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.012605042016806723, |
| "grad_norm": 30.773927277956396, |
| "learning_rate": 8.354430379746835e-07, |
| "loss": 1.024, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.012987012987012988, |
| "grad_norm": 9.213361959330522, |
| "learning_rate": 8.607594936708861e-07, |
| "loss": 0.96, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.013368983957219251, |
| "grad_norm": 9.531077209283136, |
| "learning_rate": 8.860759493670885e-07, |
| "loss": 1.0749, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.013750954927425516, |
| "grad_norm": 9.863104906307168, |
| "learning_rate": 9.113924050632911e-07, |
| "loss": 1.0182, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.01413292589763178, |
| "grad_norm": 9.525224203343395, |
| "learning_rate": 9.367088607594936e-07, |
| "loss": 0.9304, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.014514896867838044, |
| "grad_norm": 9.908163004382338, |
| "learning_rate": 9.62025316455696e-07, |
| "loss": 0.9193, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.014896867838044309, |
| "grad_norm": 5.240327212589582, |
| "learning_rate": 9.873417721518988e-07, |
| "loss": 0.8683, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.015278838808250574, |
| "grad_norm": 7.817860585016493, |
| "learning_rate": 1.0126582278481013e-06, |
| "loss": 0.8277, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01566080977845684, |
| "grad_norm": 7.212486723359385, |
| "learning_rate": 1.0379746835443038e-06, |
| "loss": 0.85, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.016042780748663103, |
| "grad_norm": 12.541315607408132, |
| "learning_rate": 1.063291139240506e-06, |
| "loss": 0.9004, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.016424751718869365, |
| "grad_norm": 6.587171853997535, |
| "learning_rate": 1.0886075949367088e-06, |
| "loss": 0.8266, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.01680672268907563, |
| "grad_norm": 37.57512879193886, |
| "learning_rate": 1.1139240506329113e-06, |
| "loss": 0.8639, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.017188693659281894, |
| "grad_norm": 4.644959514031565, |
| "learning_rate": 1.139240506329114e-06, |
| "loss": 0.8066, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.01757066462948816, |
| "grad_norm": 4.6286805207695325, |
| "learning_rate": 1.1645569620253163e-06, |
| "loss": 0.8066, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.017952635599694424, |
| "grad_norm": 7.835888478790431, |
| "learning_rate": 1.1898734177215188e-06, |
| "loss": 0.8543, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.01833460656990069, |
| "grad_norm": 5.422444896928901, |
| "learning_rate": 1.2151898734177215e-06, |
| "loss": 0.8331, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.01871657754010695, |
| "grad_norm": 5.460802978188565, |
| "learning_rate": 1.240506329113924e-06, |
| "loss": 0.8651, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.019098548510313215, |
| "grad_norm": 6.5678961432295475, |
| "learning_rate": 1.2658227848101265e-06, |
| "loss": 0.9134, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.01948051948051948, |
| "grad_norm": 4.9487683359009536, |
| "learning_rate": 1.291139240506329e-06, |
| "loss": 0.806, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.019862490450725745, |
| "grad_norm": 4.4837558296364355, |
| "learning_rate": 1.3164556962025315e-06, |
| "loss": 0.8113, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.02024446142093201, |
| "grad_norm": 5.344312043183558, |
| "learning_rate": 1.3417721518987342e-06, |
| "loss": 0.8086, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.020626432391138275, |
| "grad_norm": 5.246700443604725, |
| "learning_rate": 1.3670886075949365e-06, |
| "loss": 0.7082, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.02100840336134454, |
| "grad_norm": 5.941401610969792, |
| "learning_rate": 1.3924050632911392e-06, |
| "loss": 0.7367, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.0213903743315508, |
| "grad_norm": 5.542256214458787, |
| "learning_rate": 1.4177215189873418e-06, |
| "loss": 0.725, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.021772345301757066, |
| "grad_norm": 4.930892825499901, |
| "learning_rate": 1.4430379746835443e-06, |
| "loss": 0.7891, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.02215431627196333, |
| "grad_norm": 9.097002648121126, |
| "learning_rate": 1.4683544303797468e-06, |
| "loss": 0.8028, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.022536287242169595, |
| "grad_norm": 11.795657973005843, |
| "learning_rate": 1.4936708860759493e-06, |
| "loss": 0.8097, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.02291825821237586, |
| "grad_norm": 3.8313472673735274, |
| "learning_rate": 1.518987341772152e-06, |
| "loss": 0.6951, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.023300229182582125, |
| "grad_norm": 4.034761647826669, |
| "learning_rate": 1.5443037974683545e-06, |
| "loss": 0.7105, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.023682200152788387, |
| "grad_norm": 6.038362262794577, |
| "learning_rate": 1.5696202531645568e-06, |
| "loss": 0.7587, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.02406417112299465, |
| "grad_norm": 3.1079730758909925, |
| "learning_rate": 1.5949367088607595e-06, |
| "loss": 0.6828, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.024446142093200916, |
| "grad_norm": 3.7055669255608863, |
| "learning_rate": 1.620253164556962e-06, |
| "loss": 0.702, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.02482811306340718, |
| "grad_norm": 11.676516661636496, |
| "learning_rate": 1.6455696202531647e-06, |
| "loss": 0.7052, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.025210084033613446, |
| "grad_norm": 4.025514648722259, |
| "learning_rate": 1.670886075949367e-06, |
| "loss": 0.661, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.02559205500381971, |
| "grad_norm": 4.656351795473404, |
| "learning_rate": 1.6962025316455695e-06, |
| "loss": 0.7484, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.025974025974025976, |
| "grad_norm": 4.1084827411397, |
| "learning_rate": 1.7215189873417722e-06, |
| "loss": 0.6608, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.026355996944232237, |
| "grad_norm": 4.129035383112947, |
| "learning_rate": 1.7468354430379747e-06, |
| "loss": 0.6493, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.026737967914438502, |
| "grad_norm": 3.504125310927204, |
| "learning_rate": 1.772151898734177e-06, |
| "loss": 0.6193, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.027119938884644767, |
| "grad_norm": 3.6925426579271314, |
| "learning_rate": 1.7974683544303797e-06, |
| "loss": 0.6431, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.02750190985485103, |
| "grad_norm": 6.683623338936127, |
| "learning_rate": 1.8227848101265822e-06, |
| "loss": 0.739, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.027883880825057297, |
| "grad_norm": 3.5029780150790186, |
| "learning_rate": 1.848101265822785e-06, |
| "loss": 0.6587, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.02826585179526356, |
| "grad_norm": 5.370596801515218, |
| "learning_rate": 1.8734177215189872e-06, |
| "loss": 0.7036, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.028647822765469823, |
| "grad_norm": 4.983202118065457, |
| "learning_rate": 1.8987341772151897e-06, |
| "loss": 0.6879, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.029029793735676088, |
| "grad_norm": 11.685843678353526, |
| "learning_rate": 1.924050632911392e-06, |
| "loss": 0.6488, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.029411764705882353, |
| "grad_norm": 4.392795361950916, |
| "learning_rate": 1.949367088607595e-06, |
| "loss": 0.7056, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.029793735676088617, |
| "grad_norm": 3.9315009070892764, |
| "learning_rate": 1.9746835443037976e-06, |
| "loss": 0.6494, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.030175706646294882, |
| "grad_norm": 5.141383767356884, |
| "learning_rate": 2e-06, |
| "loss": 0.6544, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.030557677616501147, |
| "grad_norm": 4.018343651117365, |
| "learning_rate": 1.9999992345015908e-06, |
| "loss": 0.6952, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.030939648586707412, |
| "grad_norm": 5.591851744780808, |
| "learning_rate": 1.999996938007535e-06, |
| "loss": 0.6603, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.03132161955691368, |
| "grad_norm": 6.005561034856619, |
| "learning_rate": 1.999993110521348e-06, |
| "loss": 0.6373, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.03170359052711994, |
| "grad_norm": 7.102355842806267, |
| "learning_rate": 1.9999877520488907e-06, |
| "loss": 0.6101, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.03208556149732621, |
| "grad_norm": 2.867411767320501, |
| "learning_rate": 1.9999808625983663e-06, |
| "loss": 0.593, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.032467532467532464, |
| "grad_norm": 4.83662234283247, |
| "learning_rate": 1.999972442180323e-06, |
| "loss": 0.586, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.03284950343773873, |
| "grad_norm": 5.198461145382523, |
| "learning_rate": 1.9999624908076514e-06, |
| "loss": 0.6789, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.033231474407944994, |
| "grad_norm": 2.8686757091820683, |
| "learning_rate": 1.999951008495588e-06, |
| "loss": 0.5732, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.03361344537815126, |
| "grad_norm": 3.1867941404051, |
| "learning_rate": 1.9999379952617116e-06, |
| "loss": 0.6728, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.033995416348357524, |
| "grad_norm": 4.819026326282661, |
| "learning_rate": 1.9999234511259463e-06, |
| "loss": 0.6788, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.03437738731856379, |
| "grad_norm": 5.624234579635901, |
| "learning_rate": 1.999907376110558e-06, |
| "loss": 0.6142, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.034759358288770054, |
| "grad_norm": 27.028974037702127, |
| "learning_rate": 1.9998897702401585e-06, |
| "loss": 0.5891, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.03514132925897632, |
| "grad_norm": 6.062503354980905, |
| "learning_rate": 1.999870633541701e-06, |
| "loss": 0.6348, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.03552330022918258, |
| "grad_norm": 4.277480684633243, |
| "learning_rate": 1.999849966044485e-06, |
| "loss": 0.616, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.03590527119938885, |
| "grad_norm": 7.1085637623688935, |
| "learning_rate": 1.999827767780152e-06, |
| "loss": 0.7117, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.03628724216959511, |
| "grad_norm": 4.056766891368591, |
| "learning_rate": 1.999804038782687e-06, |
| "loss": 0.5858, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.03666921313980138, |
| "grad_norm": 4.674918715366667, |
| "learning_rate": 1.99977877908842e-06, |
| "loss": 0.6298, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.03705118411000764, |
| "grad_norm": 3.300022908261171, |
| "learning_rate": 1.999751988736023e-06, |
| "loss": 0.5515, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.0374331550802139, |
| "grad_norm": 8.740377754714698, |
| "learning_rate": 1.9997236677665115e-06, |
| "loss": 0.6847, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.037815126050420166, |
| "grad_norm": 7.098187800477883, |
| "learning_rate": 1.999693816223245e-06, |
| "loss": 0.6553, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.03819709702062643, |
| "grad_norm": 4.937245961472043, |
| "learning_rate": 1.9996624341519268e-06, |
| "loss": 0.6398, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.038579067990832695, |
| "grad_norm": 8.475917815171096, |
| "learning_rate": 1.999629521600602e-06, |
| "loss": 0.5671, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.03896103896103896, |
| "grad_norm": 3.174133239967434, |
| "learning_rate": 1.9995950786196597e-06, |
| "loss": 0.5323, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.039343009931245225, |
| "grad_norm": 3.0409122423678983, |
| "learning_rate": 1.999559105261833e-06, |
| "loss": 0.6296, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.03972498090145149, |
| "grad_norm": 2.886981911151555, |
| "learning_rate": 1.9995216015821954e-06, |
| "loss": 0.5191, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.040106951871657755, |
| "grad_norm": 6.822287483611468, |
| "learning_rate": 1.9994825676381657e-06, |
| "loss": 0.5861, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.04048892284186402, |
| "grad_norm": 6.200193148891678, |
| "learning_rate": 1.999442003489505e-06, |
| "loss": 0.5414, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.040870893812070284, |
| "grad_norm": 5.169789028690033, |
| "learning_rate": 1.999399909198316e-06, |
| "loss": 0.6444, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.04125286478227655, |
| "grad_norm": 7.759781519596946, |
| "learning_rate": 1.9993562848290463e-06, |
| "loss": 0.5599, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.041634835752482814, |
| "grad_norm": 4.421136398577513, |
| "learning_rate": 1.9993111304484836e-06, |
| "loss": 0.6536, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.04201680672268908, |
| "grad_norm": 3.823541000198925, |
| "learning_rate": 1.9992644461257595e-06, |
| "loss": 0.6581, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.04239877769289534, |
| "grad_norm": 5.516049899957531, |
| "learning_rate": 1.999216231932347e-06, |
| "loss": 0.6663, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.0427807486631016, |
| "grad_norm": 5.5740814564410375, |
| "learning_rate": 1.9991664879420628e-06, |
| "loss": 0.5795, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.04316271963330787, |
| "grad_norm": 3.437123292810565, |
| "learning_rate": 1.999115214231064e-06, |
| "loss": 0.6331, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.04354469060351413, |
| "grad_norm": 3.7632584467840404, |
| "learning_rate": 1.9990624108778517e-06, |
| "loss": 0.6531, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.043926661573720396, |
| "grad_norm": 4.182331120505753, |
| "learning_rate": 1.999008077963266e-06, |
| "loss": 0.5852, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.04430863254392666, |
| "grad_norm": 9.300499481983667, |
| "learning_rate": 1.9989522155704913e-06, |
| "loss": 0.5871, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.044690603514132926, |
| "grad_norm": 3.344891067734144, |
| "learning_rate": 1.9988948237850526e-06, |
| "loss": 0.5699, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.04507257448433919, |
| "grad_norm": 3.326565572079444, |
| "learning_rate": 1.9988359026948167e-06, |
| "loss": 0.5923, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.045454545454545456, |
| "grad_norm": 4.082860223491362, |
| "learning_rate": 1.9987754523899915e-06, |
| "loss": 0.6253, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.04583651642475172, |
| "grad_norm": 12.324222314245391, |
| "learning_rate": 1.998713472963126e-06, |
| "loss": 0.6368, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.046218487394957986, |
| "grad_norm": 3.325231836297131, |
| "learning_rate": 1.998649964509111e-06, |
| "loss": 0.585, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.04660045836516425, |
| "grad_norm": 2.9732772511513574, |
| "learning_rate": 1.9985849271251774e-06, |
| "loss": 0.5832, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.046982429335370515, |
| "grad_norm": 4.5992463698133035, |
| "learning_rate": 1.9985183609108972e-06, |
| "loss": 0.6219, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.04736440030557677, |
| "grad_norm": 3.4205990364099876, |
| "learning_rate": 1.9984502659681836e-06, |
| "loss": 0.5899, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.04774637127578304, |
| "grad_norm": 6.047943198436655, |
| "learning_rate": 1.9983806424012887e-06, |
| "loss": 0.5719, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.0481283422459893, |
| "grad_norm": 5.793217662490507, |
| "learning_rate": 1.9983094903168067e-06, |
| "loss": 0.6094, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.04851031321619557, |
| "grad_norm": 5.8062953058676525, |
| "learning_rate": 1.998236809823671e-06, |
| "loss": 0.6127, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.04889228418640183, |
| "grad_norm": 2.4470363943804783, |
| "learning_rate": 1.9981626010331558e-06, |
| "loss": 0.5495, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.0492742551566081, |
| "grad_norm": 4.783628150040865, |
| "learning_rate": 1.9980868640588736e-06, |
| "loss": 0.5533, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.04965622612681436, |
| "grad_norm": 3.291120740165707, |
| "learning_rate": 1.9980095990167776e-06, |
| "loss": 0.6196, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.05003819709702063, |
| "grad_norm": 3.8172581712282656, |
| "learning_rate": 1.997930806025161e-06, |
| "loss": 0.6545, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.05042016806722689, |
| "grad_norm": 5.998885200857126, |
| "learning_rate": 1.9978504852046553e-06, |
| "loss": 0.6009, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.05080213903743316, |
| "grad_norm": 6.035103071609391, |
| "learning_rate": 1.997768636678231e-06, |
| "loss": 0.6362, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.05118411000763942, |
| "grad_norm": 10.3639480840429, |
| "learning_rate": 1.9976852605711986e-06, |
| "loss": 0.6109, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.05156608097784569, |
| "grad_norm": 4.230015798354289, |
| "learning_rate": 1.9976003570112055e-06, |
| "loss": 0.6676, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.05194805194805195, |
| "grad_norm": 2.870529329674749, |
| "learning_rate": 1.9975139261282406e-06, |
| "loss": 0.5568, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.05233002291825821, |
| "grad_norm": 3.979837111136933, |
| "learning_rate": 1.9974259680546276e-06, |
| "loss": 0.5828, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.052711993888464474, |
| "grad_norm": 15.324396689162327, |
| "learning_rate": 1.997336482925031e-06, |
| "loss": 0.6132, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.05309396485867074, |
| "grad_norm": 14.502950425377572, |
| "learning_rate": 1.997245470876452e-06, |
| "loss": 0.5564, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.053475935828877004, |
| "grad_norm": 2.8972805850967966, |
| "learning_rate": 1.99715293204823e-06, |
| "loss": 0.623, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.05385790679908327, |
| "grad_norm": 3.9661984150677165, |
| "learning_rate": 1.997058866582041e-06, |
| "loss": 0.5244, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.054239877769289534, |
| "grad_norm": 4.6071033044010505, |
| "learning_rate": 1.9969632746218997e-06, |
| "loss": 0.5434, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.0546218487394958, |
| "grad_norm": 4.86413877573829, |
| "learning_rate": 1.996866156314157e-06, |
| "loss": 0.6728, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.05500381970970206, |
| "grad_norm": 3.424906514115404, |
| "learning_rate": 1.9967675118075e-06, |
| "loss": 0.5608, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.05538579067990833, |
| "grad_norm": 4.464717444097961, |
| "learning_rate": 1.996667341252953e-06, |
| "loss": 0.6112, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.05576776165011459, |
| "grad_norm": 20.909201379759565, |
| "learning_rate": 1.9965656448038783e-06, |
| "loss": 0.5953, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.05614973262032086, |
| "grad_norm": 3.2733693957476073, |
| "learning_rate": 1.9964624226159714e-06, |
| "loss": 0.6261, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.05653170359052712, |
| "grad_norm": 4.553438973985725, |
| "learning_rate": 1.9963576748472655e-06, |
| "loss": 0.5586, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.05691367456073339, |
| "grad_norm": 3.557327038420317, |
| "learning_rate": 1.99625140165813e-06, |
| "loss": 0.5531, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.057295645530939646, |
| "grad_norm": 2.805073373457496, |
| "learning_rate": 1.996143603211267e-06, |
| "loss": 0.5827, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.05767761650114591, |
| "grad_norm": 3.950929404040752, |
| "learning_rate": 1.9960342796717174e-06, |
| "loss": 0.6139, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.058059587471352175, |
| "grad_norm": 6.241356658055268, |
| "learning_rate": 1.9959234312068546e-06, |
| "loss": 0.6696, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.05844155844155844, |
| "grad_norm": 4.220303162328757, |
| "learning_rate": 1.9958110579863866e-06, |
| "loss": 0.6114, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.058823529411764705, |
| "grad_norm": 3.696778967888018, |
| "learning_rate": 1.995697160182357e-06, |
| "loss": 0.5757, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.05920550038197097, |
| "grad_norm": 4.342641450025558, |
| "learning_rate": 1.9955817379691426e-06, |
| "loss": 0.6365, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.059587471352177235, |
| "grad_norm": 3.6988467805046774, |
| "learning_rate": 1.9954647915234554e-06, |
| "loss": 0.5526, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.0599694423223835, |
| "grad_norm": 2.7467797298493184, |
| "learning_rate": 1.9953463210243386e-06, |
| "loss": 0.5721, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.060351413292589765, |
| "grad_norm": 3.722374343040301, |
| "learning_rate": 1.9952263266531716e-06, |
| "loss": 0.6569, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.06073338426279603, |
| "grad_norm": 9.533663873007717, |
| "learning_rate": 1.9951048085936645e-06, |
| "loss": 0.6318, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.061115355233002294, |
| "grad_norm": 11.718931031261699, |
| "learning_rate": 1.994981767031861e-06, |
| "loss": 0.6005, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.06149732620320856, |
| "grad_norm": 4.375012655934481, |
| "learning_rate": 1.994857202156138e-06, |
| "loss": 0.5409, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.061879297173414824, |
| "grad_norm": 5.120000733640627, |
| "learning_rate": 1.9947311141572035e-06, |
| "loss": 0.5635, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.06226126814362108, |
| "grad_norm": 3.1000408920305285, |
| "learning_rate": 1.994603503228098e-06, |
| "loss": 0.5502, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.06264323911382735, |
| "grad_norm": 8.835872288121367, |
| "learning_rate": 1.994474369564193e-06, |
| "loss": 0.6048, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.06302521008403361, |
| "grad_norm": 8.273839587331405, |
| "learning_rate": 1.9943437133631922e-06, |
| "loss": 0.6539, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.06340718105423988, |
| "grad_norm": 3.2608658274114237, |
| "learning_rate": 1.9942115348251295e-06, |
| "loss": 0.5477, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.06378915202444614, |
| "grad_norm": 5.0788247296052, |
| "learning_rate": 1.99407783415237e-06, |
| "loss": 0.5973, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.06417112299465241, |
| "grad_norm": 3.9254811565905534, |
| "learning_rate": 1.9939426115496094e-06, |
| "loss": 0.5931, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.06455309396485867, |
| "grad_norm": 3.148494731000133, |
| "learning_rate": 1.9938058672238726e-06, |
| "loss": 0.5681, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.06493506493506493, |
| "grad_norm": 3.4191069878832065, |
| "learning_rate": 1.9936676013845146e-06, |
| "loss": 0.5172, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0653170359052712, |
| "grad_norm": 3.9378468200785846, |
| "learning_rate": 1.99352781424322e-06, |
| "loss": 0.6518, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.06569900687547746, |
| "grad_norm": 3.3159019691177036, |
| "learning_rate": 1.9933865060140025e-06, |
| "loss": 0.5513, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.06608097784568373, |
| "grad_norm": 3.5786485272769575, |
| "learning_rate": 1.993243676913205e-06, |
| "loss": 0.5336, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.06646294881588999, |
| "grad_norm": 4.860049758383945, |
| "learning_rate": 1.9930993271594982e-06, |
| "loss": 0.6294, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.06684491978609626, |
| "grad_norm": 12.43362515923018, |
| "learning_rate": 1.9929534569738807e-06, |
| "loss": 0.5987, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.06722689075630252, |
| "grad_norm": 3.527533600699027, |
| "learning_rate": 1.9928060665796797e-06, |
| "loss": 0.567, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.06760886172650879, |
| "grad_norm": 8.845312363617492, |
| "learning_rate": 1.9926571562025493e-06, |
| "loss": 0.5734, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.06799083269671505, |
| "grad_norm": 3.0944989407876946, |
| "learning_rate": 1.992506726070471e-06, |
| "loss": 0.5748, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.06837280366692132, |
| "grad_norm": 4.535983087618347, |
| "learning_rate": 1.9923547764137523e-06, |
| "loss": 0.6393, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.06875477463712758, |
| "grad_norm": 4.910034811953476, |
| "learning_rate": 1.9922013074650286e-06, |
| "loss": 0.5729, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.06913674560733385, |
| "grad_norm": 4.222067308854296, |
| "learning_rate": 1.9920463194592593e-06, |
| "loss": 0.6011, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.06951871657754011, |
| "grad_norm": 5.264384268024057, |
| "learning_rate": 1.991889812633731e-06, |
| "loss": 0.642, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.06990068754774637, |
| "grad_norm": 18.552090762107316, |
| "learning_rate": 1.9917317872280553e-06, |
| "loss": 0.5649, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.07028265851795264, |
| "grad_norm": 5.4812524878829345, |
| "learning_rate": 1.9915722434841686e-06, |
| "loss": 0.5436, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.0706646294881589, |
| "grad_norm": 11.41145507933353, |
| "learning_rate": 1.9914111816463314e-06, |
| "loss": 0.5642, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.07104660045836517, |
| "grad_norm": 15.827321504847037, |
| "learning_rate": 1.9912486019611292e-06, |
| "loss": 0.5328, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.07142857142857142, |
| "grad_norm": 4.019121003233206, |
| "learning_rate": 1.9910845046774713e-06, |
| "loss": 0.5228, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.0718105423987777, |
| "grad_norm": 3.914912354546862, |
| "learning_rate": 1.9909188900465898e-06, |
| "loss": 0.5933, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.07219251336898395, |
| "grad_norm": 3.7225401404614473, |
| "learning_rate": 1.99075175832204e-06, |
| "loss": 0.6174, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.07257448433919023, |
| "grad_norm": 12.833339342812874, |
| "learning_rate": 1.9905831097597005e-06, |
| "loss": 0.5013, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.07295645530939648, |
| "grad_norm": 9.315651786262373, |
| "learning_rate": 1.9904129446177708e-06, |
| "loss": 0.5632, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.07333842627960276, |
| "grad_norm": 4.717241219168216, |
| "learning_rate": 1.9902412631567742e-06, |
| "loss": 0.5613, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.07372039724980901, |
| "grad_norm": 2.717188166571018, |
| "learning_rate": 1.9900680656395542e-06, |
| "loss": 0.5021, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.07410236822001529, |
| "grad_norm": 5.396732534352068, |
| "learning_rate": 1.9898933523312752e-06, |
| "loss": 0.5678, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.07448433919022154, |
| "grad_norm": 20.29200499142414, |
| "learning_rate": 1.989717123499423e-06, |
| "loss": 0.6235, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.0748663101604278, |
| "grad_norm": 2.994983335593462, |
| "learning_rate": 1.989539379413804e-06, |
| "loss": 0.5642, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.07524828113063407, |
| "grad_norm": 8.29189563991693, |
| "learning_rate": 1.989360120346543e-06, |
| "loss": 0.5757, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.07563025210084033, |
| "grad_norm": 3.5744702141039215, |
| "learning_rate": 1.9891793465720854e-06, |
| "loss": 0.6116, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.0760122230710466, |
| "grad_norm": 8.170124572497699, |
| "learning_rate": 1.9889970583671948e-06, |
| "loss": 0.6318, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.07639419404125286, |
| "grad_norm": 6.5109797482671485, |
| "learning_rate": 1.9888132560109544e-06, |
| "loss": 0.6488, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.07677616501145913, |
| "grad_norm": 3.1475254836277102, |
| "learning_rate": 1.988627939784765e-06, |
| "loss": 0.5516, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.07715813598166539, |
| "grad_norm": 5.7077183037786, |
| "learning_rate": 1.988441109972345e-06, |
| "loss": 0.5135, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.07754010695187166, |
| "grad_norm": 6.557122804854992, |
| "learning_rate": 1.9882527668597305e-06, |
| "loss": 0.5773, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.07792207792207792, |
| "grad_norm": 8.966667288445084, |
| "learning_rate": 1.9880629107352737e-06, |
| "loss": 0.5242, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.07830404889228419, |
| "grad_norm": 3.7233013320834623, |
| "learning_rate": 1.987871541889644e-06, |
| "loss": 0.5339, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.07868601986249045, |
| "grad_norm": 6.417418420872459, |
| "learning_rate": 1.9876786606158265e-06, |
| "loss": 0.5242, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.07906799083269672, |
| "grad_norm": 18.824464210105184, |
| "learning_rate": 1.987484267209122e-06, |
| "loss": 0.5333, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.07944996180290298, |
| "grad_norm": 3.4759810800181126, |
| "learning_rate": 1.987288361967146e-06, |
| "loss": 0.5681, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.07983193277310924, |
| "grad_norm": 3.588775053287736, |
| "learning_rate": 1.9870909451898286e-06, |
| "loss": 0.5514, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.08021390374331551, |
| "grad_norm": 4.125026639003856, |
| "learning_rate": 1.986892017179415e-06, |
| "loss": 0.5599, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.08059587471352177, |
| "grad_norm": 7.433563585133112, |
| "learning_rate": 1.986691578240462e-06, |
| "loss": 0.596, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.08097784568372804, |
| "grad_norm": 4.075158651509923, |
| "learning_rate": 1.9864896286798422e-06, |
| "loss": 0.5647, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.0813598166539343, |
| "grad_norm": 4.435622115339765, |
| "learning_rate": 1.9862861688067393e-06, |
| "loss": 0.5473, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.08174178762414057, |
| "grad_norm": 6.400752274134116, |
| "learning_rate": 1.98608119893265e-06, |
| "loss": 0.5784, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.08212375859434683, |
| "grad_norm": 4.034841024017193, |
| "learning_rate": 1.985874719371382e-06, |
| "loss": 0.5381, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.0825057295645531, |
| "grad_norm": 5.558573234345964, |
| "learning_rate": 1.985666730439055e-06, |
| "loss": 0.5173, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.08288770053475936, |
| "grad_norm": 2.0565756193889047, |
| "learning_rate": 1.9854572324541e-06, |
| "loss": 0.5066, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.08326967150496563, |
| "grad_norm": 3.3119596339453716, |
| "learning_rate": 1.985246225737257e-06, |
| "loss": 0.5819, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.08365164247517189, |
| "grad_norm": 6.043976155196966, |
| "learning_rate": 1.9850337106115766e-06, |
| "loss": 0.574, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.08403361344537816, |
| "grad_norm": 2.3257096761329574, |
| "learning_rate": 1.9848196874024194e-06, |
| "loss": 0.5431, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.08441558441558442, |
| "grad_norm": 2.611978681503251, |
| "learning_rate": 1.9846041564374543e-06, |
| "loss": 0.601, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.08479755538579067, |
| "grad_norm": 3.938394541202129, |
| "learning_rate": 1.984387118046657e-06, |
| "loss": 0.5749, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.08517952635599695, |
| "grad_norm": 3.8099196644027216, |
| "learning_rate": 1.9841685725623146e-06, |
| "loss": 0.5683, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.0855614973262032, |
| "grad_norm": 4.270888241466328, |
| "learning_rate": 1.9839485203190184e-06, |
| "loss": 0.5362, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.08594346829640948, |
| "grad_norm": 4.391700020088666, |
| "learning_rate": 1.983726961653668e-06, |
| "loss": 0.6054, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.08632543926661573, |
| "grad_norm": 6.2550070608764585, |
| "learning_rate": 1.9835038969054692e-06, |
| "loss": 0.6173, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.086707410236822, |
| "grad_norm": 2.771427521994671, |
| "learning_rate": 1.983279326415933e-06, |
| "loss": 0.5435, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.08708938120702826, |
| "grad_norm": 5.470910999557675, |
| "learning_rate": 1.983053250528876e-06, |
| "loss": 0.4756, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.08747135217723453, |
| "grad_norm": 6.526521212142111, |
| "learning_rate": 1.9828256695904202e-06, |
| "loss": 0.5879, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.08785332314744079, |
| "grad_norm": 3.6873310706476903, |
| "learning_rate": 1.982596583948991e-06, |
| "loss": 0.5784, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.08823529411764706, |
| "grad_norm": 17.41280102282283, |
| "learning_rate": 1.9823659939553177e-06, |
| "loss": 0.5494, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.08861726508785332, |
| "grad_norm": 4.966211935572487, |
| "learning_rate": 1.9821338999624334e-06, |
| "loss": 0.5255, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.0889992360580596, |
| "grad_norm": 4.187964987938789, |
| "learning_rate": 1.9819003023256724e-06, |
| "loss": 0.5995, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.08938120702826585, |
| "grad_norm": 2.9009546668814865, |
| "learning_rate": 1.9816652014026726e-06, |
| "loss": 0.6022, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.08976317799847211, |
| "grad_norm": 3.639504554196423, |
| "learning_rate": 1.9814285975533726e-06, |
| "loss": 0.4918, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.09014514896867838, |
| "grad_norm": 4.062515207488995, |
| "learning_rate": 1.981190491140012e-06, |
| "loss": 0.591, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.09052711993888464, |
| "grad_norm": 3.4989959301090203, |
| "learning_rate": 1.9809508825271307e-06, |
| "loss": 0.4974, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.09090909090909091, |
| "grad_norm": 2.8663762427604973, |
| "learning_rate": 1.9807097720815695e-06, |
| "loss": 0.5101, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.09129106187929717, |
| "grad_norm": 4.90634071902155, |
| "learning_rate": 1.980467160172467e-06, |
| "loss": 0.5322, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.09167303284950344, |
| "grad_norm": 5.428428181782822, |
| "learning_rate": 1.980223047171262e-06, |
| "loss": 0.5178, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.0920550038197097, |
| "grad_norm": 5.96934579408797, |
| "learning_rate": 1.97997743345169e-06, |
| "loss": 0.4557, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.09243697478991597, |
| "grad_norm": 3.1404125093300475, |
| "learning_rate": 1.9797303193897853e-06, |
| "loss": 0.6178, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.09281894576012223, |
| "grad_norm": 4.493186466405451, |
| "learning_rate": 1.979481705363878e-06, |
| "loss": 0.5436, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.0932009167303285, |
| "grad_norm": 3.033561739318456, |
| "learning_rate": 1.9792315917545966e-06, |
| "loss": 0.5967, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.09358288770053476, |
| "grad_norm": 2.9593338705670558, |
| "learning_rate": 1.978979978944863e-06, |
| "loss": 0.4987, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.09396485867074103, |
| "grad_norm": 3.74521861090976, |
| "learning_rate": 1.9787268673198968e-06, |
| "loss": 0.5345, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.09434682964094729, |
| "grad_norm": 4.703434116446447, |
| "learning_rate": 1.97847225726721e-06, |
| "loss": 0.5386, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.09472880061115355, |
| "grad_norm": 4.338722545992488, |
| "learning_rate": 1.97821614917661e-06, |
| "loss": 0.5508, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.09511077158135982, |
| "grad_norm": 3.3068457311915957, |
| "learning_rate": 1.977958543440199e-06, |
| "loss": 0.5321, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.09549274255156608, |
| "grad_norm": 7.6890680175286725, |
| "learning_rate": 1.977699440452368e-06, |
| "loss": 0.5812, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.09587471352177235, |
| "grad_norm": 5.039749423839696, |
| "learning_rate": 1.9774388406098046e-06, |
| "loss": 0.5594, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.0962566844919786, |
| "grad_norm": 9.378502926954392, |
| "learning_rate": 1.9771767443114856e-06, |
| "loss": 0.5639, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.09663865546218488, |
| "grad_norm": 5.953497760251802, |
| "learning_rate": 1.9769131519586804e-06, |
| "loss": 0.5354, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.09702062643239114, |
| "grad_norm": 3.3592755873335056, |
| "learning_rate": 1.976648063954947e-06, |
| "loss": 0.5193, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.09740259740259741, |
| "grad_norm": 10.155837325369578, |
| "learning_rate": 1.9763814807061354e-06, |
| "loss": 0.5577, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.09778456837280367, |
| "grad_norm": 3.7192213821503093, |
| "learning_rate": 1.9761134026203823e-06, |
| "loss": 0.5794, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.09816653934300994, |
| "grad_norm": 3.2022524038735525, |
| "learning_rate": 1.975843830108116e-06, |
| "loss": 0.5756, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.0985485103132162, |
| "grad_norm": 2.954946222885205, |
| "learning_rate": 1.97557276358205e-06, |
| "loss": 0.5548, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.09893048128342247, |
| "grad_norm": 6.3254586604717975, |
| "learning_rate": 1.9753002034571864e-06, |
| "loss": 0.5414, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.09931245225362872, |
| "grad_norm": 6.168737429198778, |
| "learning_rate": 1.9750261501508146e-06, |
| "loss": 0.6532, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.09969442322383498, |
| "grad_norm": 2.3671571449627655, |
| "learning_rate": 1.974750604082509e-06, |
| "loss": 0.5148, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.10007639419404125, |
| "grad_norm": 3.069161204383331, |
| "learning_rate": 1.9744735656741294e-06, |
| "loss": 0.6238, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.10045836516424751, |
| "grad_norm": 3.25395270581985, |
| "learning_rate": 1.9741950353498208e-06, |
| "loss": 0.4785, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.10084033613445378, |
| "grad_norm": 3.6118907652227628, |
| "learning_rate": 1.9739150135360126e-06, |
| "loss": 0.5878, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.10122230710466004, |
| "grad_norm": 2.958467227272678, |
| "learning_rate": 1.973633500661417e-06, |
| "loss": 0.5537, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.10160427807486631, |
| "grad_norm": 3.495534041343745, |
| "learning_rate": 1.9733504971570297e-06, |
| "loss": 0.5092, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.10198624904507257, |
| "grad_norm": 4.3395578802723405, |
| "learning_rate": 1.9730660034561275e-06, |
| "loss": 0.5533, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.10236822001527884, |
| "grad_norm": 5.357259140150383, |
| "learning_rate": 1.97278001999427e-06, |
| "loss": 0.5497, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.1027501909854851, |
| "grad_norm": 12.330903021774263, |
| "learning_rate": 1.9724925472092967e-06, |
| "loss": 0.6312, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.10313216195569137, |
| "grad_norm": 5.008650457149163, |
| "learning_rate": 1.9722035855413275e-06, |
| "loss": 0.6209, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.10351413292589763, |
| "grad_norm": 8.70082828754419, |
| "learning_rate": 1.971913135432762e-06, |
| "loss": 0.541, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.1038961038961039, |
| "grad_norm": 4.3423176215549075, |
| "learning_rate": 1.971621197328278e-06, |
| "loss": 0.5866, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.10427807486631016, |
| "grad_norm": 4.755627538602661, |
| "learning_rate": 1.971327771674832e-06, |
| "loss": 0.6044, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.10466004583651642, |
| "grad_norm": 3.407967193816134, |
| "learning_rate": 1.9710328589216576e-06, |
| "loss": 0.5433, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.10504201680672269, |
| "grad_norm": 2.4416142840249395, |
| "learning_rate": 1.9707364595202657e-06, |
| "loss": 0.4781, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.10542398777692895, |
| "grad_norm": 5.02202128886637, |
| "learning_rate": 1.9704385739244427e-06, |
| "loss": 0.5189, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.10580595874713522, |
| "grad_norm": 3.696189900415905, |
| "learning_rate": 1.9701392025902504e-06, |
| "loss": 0.5763, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.10618792971734148, |
| "grad_norm": 5.409243605057394, |
| "learning_rate": 1.9698383459760253e-06, |
| "loss": 0.5131, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.10656990068754775, |
| "grad_norm": 11.832202446687807, |
| "learning_rate": 1.9695360045423778e-06, |
| "loss": 0.4832, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.10695187165775401, |
| "grad_norm": 9.10805760479928, |
| "learning_rate": 1.969232178752192e-06, |
| "loss": 0.4996, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.10733384262796028, |
| "grad_norm": 7.3407876873468645, |
| "learning_rate": 1.968926869070624e-06, |
| "loss": 0.5288, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.10771581359816654, |
| "grad_norm": 2.3010355675392824, |
| "learning_rate": 1.9686200759651023e-06, |
| "loss": 0.4662, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.10809778456837281, |
| "grad_norm": 3.3916106572355864, |
| "learning_rate": 1.9683117999053253e-06, |
| "loss": 0.5179, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.10847975553857907, |
| "grad_norm": 5.6361615135843275, |
| "learning_rate": 1.9680020413632638e-06, |
| "loss": 0.5758, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.10886172650878534, |
| "grad_norm": 5.188466938832083, |
| "learning_rate": 1.967690800813156e-06, |
| "loss": 0.5608, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.1092436974789916, |
| "grad_norm": 2.9205838185447686, |
| "learning_rate": 1.9673780787315115e-06, |
| "loss": 0.5626, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.10962566844919786, |
| "grad_norm": 6.5632102507293215, |
| "learning_rate": 1.967063875597106e-06, |
| "loss": 0.5899, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.11000763941940413, |
| "grad_norm": 8.610035941457873, |
| "learning_rate": 1.966748191890983e-06, |
| "loss": 0.5404, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.11038961038961038, |
| "grad_norm": 3.3221648613789347, |
| "learning_rate": 1.9664310280964544e-06, |
| "loss": 0.5609, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.11077158135981666, |
| "grad_norm": 2.9946952305092736, |
| "learning_rate": 1.9661123846990962e-06, |
| "loss": 0.487, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.11115355233002291, |
| "grad_norm": 6.61554377599513, |
| "learning_rate": 1.9657922621867504e-06, |
| "loss": 0.5585, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.11153552330022919, |
| "grad_norm": 2.9463233379442144, |
| "learning_rate": 1.965470661049524e-06, |
| "loss": 0.6021, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.11191749427043544, |
| "grad_norm": 7.9703423929212835, |
| "learning_rate": 1.965147581779787e-06, |
| "loss": 0.5475, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.11229946524064172, |
| "grad_norm": 2.8612057281120924, |
| "learning_rate": 1.964823024872173e-06, |
| "loss": 0.5855, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.11268143621084797, |
| "grad_norm": 2.1630491598249604, |
| "learning_rate": 1.9644969908235776e-06, |
| "loss": 0.4495, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.11306340718105425, |
| "grad_norm": 3.5789900025276857, |
| "learning_rate": 1.964169480133158e-06, |
| "loss": 0.6358, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.1134453781512605, |
| "grad_norm": 2.8373987767544913, |
| "learning_rate": 1.963840493302331e-06, |
| "loss": 0.5472, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.11382734912146678, |
| "grad_norm": 7.808336378861631, |
| "learning_rate": 1.963510030834775e-06, |
| "loss": 0.6182, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.11420932009167303, |
| "grad_norm": 4.073657677829145, |
| "learning_rate": 1.963178093236428e-06, |
| "loss": 0.606, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.11459129106187929, |
| "grad_norm": 3.8323245034578877, |
| "learning_rate": 1.962844681015484e-06, |
| "loss": 0.4772, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.11497326203208556, |
| "grad_norm": 2.3060243380962335, |
| "learning_rate": 1.962509794682397e-06, |
| "loss": 0.4921, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.11535523300229182, |
| "grad_norm": 2.409944953358685, |
| "learning_rate": 1.962173434749876e-06, |
| "loss": 0.4867, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.11573720397249809, |
| "grad_norm": 4.736990510231778, |
| "learning_rate": 1.9618356017328875e-06, |
| "loss": 0.6127, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.11611917494270435, |
| "grad_norm": 13.532790760092732, |
| "learning_rate": 1.961496296148653e-06, |
| "loss": 0.5324, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.11650114591291062, |
| "grad_norm": 7.960055537091021, |
| "learning_rate": 1.961155518516648e-06, |
| "loss": 0.5629, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.11688311688311688, |
| "grad_norm": 5.04926063772735, |
| "learning_rate": 1.960813269358602e-06, |
| "loss": 0.518, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.11726508785332315, |
| "grad_norm": 2.965213442633116, |
| "learning_rate": 1.960469549198497e-06, |
| "loss": 0.5874, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.11764705882352941, |
| "grad_norm": 8.225939704122897, |
| "learning_rate": 1.960124358562568e-06, |
| "loss": 0.4595, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.11802902979373568, |
| "grad_norm": 5.646811578255499, |
| "learning_rate": 1.9597776979793007e-06, |
| "loss": 0.6004, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.11841100076394194, |
| "grad_norm": 3.776921618778041, |
| "learning_rate": 1.9594295679794314e-06, |
| "loss": 0.616, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.11879297173414821, |
| "grad_norm": 4.691043925903622, |
| "learning_rate": 1.9590799690959456e-06, |
| "loss": 0.5223, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.11917494270435447, |
| "grad_norm": 7.166107995372442, |
| "learning_rate": 1.9587289018640787e-06, |
| "loss": 0.7085, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.11955691367456073, |
| "grad_norm": 15.214836688871152, |
| "learning_rate": 1.9583763668213126e-06, |
| "loss": 0.5985, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.119938884644767, |
| "grad_norm": 5.312919105323761, |
| "learning_rate": 1.9580223645073786e-06, |
| "loss": 0.5971, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.12032085561497326, |
| "grad_norm": 7.7691966008692726, |
| "learning_rate": 1.9576668954642518e-06, |
| "loss": 0.5043, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.12070282658517953, |
| "grad_norm": 20.610762040077248, |
| "learning_rate": 1.9573099602361553e-06, |
| "loss": 0.5065, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.12108479755538579, |
| "grad_norm": 29.086197245117923, |
| "learning_rate": 1.9569515593695548e-06, |
| "loss": 0.6144, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.12146676852559206, |
| "grad_norm": 33.12029190501601, |
| "learning_rate": 1.9565916934131618e-06, |
| "loss": 0.5545, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.12184873949579832, |
| "grad_norm": 3.7108467677188957, |
| "learning_rate": 1.956230362917929e-06, |
| "loss": 0.5478, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.12223071046600459, |
| "grad_norm": 2.7764343487978542, |
| "learning_rate": 1.955867568437053e-06, |
| "loss": 0.5677, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.12261268143621085, |
| "grad_norm": 9.477033271988223, |
| "learning_rate": 1.955503310525971e-06, |
| "loss": 0.5512, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.12299465240641712, |
| "grad_norm": 4.3968460001669705, |
| "learning_rate": 1.9551375897423604e-06, |
| "loss": 0.5451, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.12337662337662338, |
| "grad_norm": 15.561330195325665, |
| "learning_rate": 1.9547704066461382e-06, |
| "loss": 0.5541, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.12375859434682965, |
| "grad_norm": 4.550270318340944, |
| "learning_rate": 1.9544017617994617e-06, |
| "loss": 0.5589, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.1241405653170359, |
| "grad_norm": 2.8993973240496076, |
| "learning_rate": 1.9540316557667236e-06, |
| "loss": 0.5785, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.12452253628724216, |
| "grad_norm": 4.873813049680309, |
| "learning_rate": 1.9536600891145557e-06, |
| "loss": 0.621, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.12490450725744844, |
| "grad_norm": 4.381877155485381, |
| "learning_rate": 1.9532870624118256e-06, |
| "loss": 0.5463, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.1252864782276547, |
| "grad_norm": 3.1981786582566847, |
| "learning_rate": 1.9529125762296356e-06, |
| "loss": 0.5076, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.12566844919786097, |
| "grad_norm": 4.331387448900339, |
| "learning_rate": 1.952536631141323e-06, |
| "loss": 0.6154, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.12605042016806722, |
| "grad_norm": 3.473212725915271, |
| "learning_rate": 1.9521592277224587e-06, |
| "loss": 0.5971, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.12643239113827348, |
| "grad_norm": 8.63087057459746, |
| "learning_rate": 1.9517803665508457e-06, |
| "loss": 0.5196, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.12681436210847977, |
| "grad_norm": 4.474812207067407, |
| "learning_rate": 1.9514000482065196e-06, |
| "loss": 0.5319, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.12719633307868602, |
| "grad_norm": 3.494968501203762, |
| "learning_rate": 1.951018273271747e-06, |
| "loss": 0.5551, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.12757830404889228, |
| "grad_norm": 2.739301748152415, |
| "learning_rate": 1.950635042331023e-06, |
| "loss": 0.4947, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.12796027501909854, |
| "grad_norm": 2.156954452463087, |
| "learning_rate": 1.950250355971074e-06, |
| "loss": 0.5026, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.12834224598930483, |
| "grad_norm": 2.921994242594192, |
| "learning_rate": 1.9498642147808527e-06, |
| "loss": 0.5793, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.12872421695951108, |
| "grad_norm": 3.7642621680466335, |
| "learning_rate": 1.949476619351541e-06, |
| "loss": 0.5749, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.12910618792971734, |
| "grad_norm": 13.229444503504032, |
| "learning_rate": 1.949087570276545e-06, |
| "loss": 0.5863, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.1294881588999236, |
| "grad_norm": 6.610583772086692, |
| "learning_rate": 1.948697068151499e-06, |
| "loss": 0.5771, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.12987012987012986, |
| "grad_norm": 2.662384200035279, |
| "learning_rate": 1.94830511357426e-06, |
| "loss": 0.462, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.13025210084033614, |
| "grad_norm": 9.227368925480821, |
| "learning_rate": 1.9479117071449085e-06, |
| "loss": 0.5419, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.1306340718105424, |
| "grad_norm": 6.245643052478665, |
| "learning_rate": 1.9475168494657496e-06, |
| "loss": 0.5623, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.13101604278074866, |
| "grad_norm": 3.1169872496295317, |
| "learning_rate": 1.9471205411413082e-06, |
| "loss": 0.5091, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.13139801375095492, |
| "grad_norm": 3.0664080058582632, |
| "learning_rate": 1.9467227827783316e-06, |
| "loss": 0.4964, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.1317799847211612, |
| "grad_norm": 6.616028945372317, |
| "learning_rate": 1.9463235749857863e-06, |
| "loss": 0.5546, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.13216195569136746, |
| "grad_norm": 10.19707015916137, |
| "learning_rate": 1.945922918374859e-06, |
| "loss": 0.4688, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.13254392666157372, |
| "grad_norm": 5.984892791306968, |
| "learning_rate": 1.9455208135589527e-06, |
| "loss": 0.5519, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.13292589763177998, |
| "grad_norm": 2.835298431212049, |
| "learning_rate": 1.9451172611536887e-06, |
| "loss": 0.5318, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.13330786860198626, |
| "grad_norm": 4.4156176487630265, |
| "learning_rate": 1.944712261776905e-06, |
| "loss": 0.5398, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.13368983957219252, |
| "grad_norm": 3.4902594137991043, |
| "learning_rate": 1.9443058160486537e-06, |
| "loss": 0.5508, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.13407181054239878, |
| "grad_norm": 2.8430946741850014, |
| "learning_rate": 1.943897924591203e-06, |
| "loss": 0.5059, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.13445378151260504, |
| "grad_norm": 27.0985585658399, |
| "learning_rate": 1.943488588029032e-06, |
| "loss": 0.5883, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.1348357524828113, |
| "grad_norm": 3.540536403271325, |
| "learning_rate": 1.9430778069888346e-06, |
| "loss": 0.544, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.13521772345301758, |
| "grad_norm": 3.9935274512439083, |
| "learning_rate": 1.942665582099515e-06, |
| "loss": 0.5443, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.13559969442322384, |
| "grad_norm": 8.228728108484132, |
| "learning_rate": 1.942251913992188e-06, |
| "loss": 0.5741, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.1359816653934301, |
| "grad_norm": 2.3745527365165198, |
| "learning_rate": 1.9418368033001787e-06, |
| "loss": 0.5113, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.13636363636363635, |
| "grad_norm": 5.445201095086467, |
| "learning_rate": 1.9414202506590197e-06, |
| "loss": 0.5209, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.13674560733384264, |
| "grad_norm": 3.103070791430745, |
| "learning_rate": 1.941002256706452e-06, |
| "loss": 0.5854, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.1371275783040489, |
| "grad_norm": 5.735410039753363, |
| "learning_rate": 1.9405828220824233e-06, |
| "loss": 0.5015, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.13750954927425516, |
| "grad_norm": 44.47727501508621, |
| "learning_rate": 1.9401619474290863e-06, |
| "loss": 0.5022, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.1378915202444614, |
| "grad_norm": 11.54466660420791, |
| "learning_rate": 1.939739633390799e-06, |
| "loss": 0.5026, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.1382734912146677, |
| "grad_norm": 2.727692184162092, |
| "learning_rate": 1.939315880614122e-06, |
| "loss": 0.512, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.13865546218487396, |
| "grad_norm": 5.387638675900171, |
| "learning_rate": 1.9388906897478206e-06, |
| "loss": 0.4948, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.13903743315508021, |
| "grad_norm": 5.4738762897403035, |
| "learning_rate": 1.9384640614428603e-06, |
| "loss": 0.568, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.13941940412528647, |
| "grad_norm": 3.137853676264805, |
| "learning_rate": 1.9380359963524073e-06, |
| "loss": 0.455, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.13980137509549273, |
| "grad_norm": 2.85450795255527, |
| "learning_rate": 1.9376064951318286e-06, |
| "loss": 0.574, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.14018334606569902, |
| "grad_norm": 8.305956887878198, |
| "learning_rate": 1.9371755584386884e-06, |
| "loss": 0.5438, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.14056531703590527, |
| "grad_norm": 4.809269321089941, |
| "learning_rate": 1.93674318693275e-06, |
| "loss": 0.4855, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.14094728800611153, |
| "grad_norm": 4.453335651466671, |
| "learning_rate": 1.9363093812759723e-06, |
| "loss": 0.5861, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.1413292589763178, |
| "grad_norm": 3.2978567378812635, |
| "learning_rate": 1.935874142132511e-06, |
| "loss": 0.5345, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.14171122994652408, |
| "grad_norm": 12.008265469941955, |
| "learning_rate": 1.9354374701687153e-06, |
| "loss": 0.5366, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.14209320091673033, |
| "grad_norm": 3.2919020301413893, |
| "learning_rate": 1.9349993660531286e-06, |
| "loss": 0.5114, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.1424751718869366, |
| "grad_norm": 7.145023652793651, |
| "learning_rate": 1.9345598304564875e-06, |
| "loss": 0.5006, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 2.6546012908244814, |
| "learning_rate": 1.934118864051719e-06, |
| "loss": 0.5025, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.14323911382734913, |
| "grad_norm": 4.844608194906241, |
| "learning_rate": 1.9336764675139416e-06, |
| "loss": 0.4897, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.1436210847975554, |
| "grad_norm": 12.286804180963175, |
| "learning_rate": 1.933232641520463e-06, |
| "loss": 0.5046, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.14400305576776165, |
| "grad_norm": 5.710044727414116, |
| "learning_rate": 1.932787386750779e-06, |
| "loss": 0.6171, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.1443850267379679, |
| "grad_norm": 7.339897212556863, |
| "learning_rate": 1.932340703886573e-06, |
| "loss": 0.5777, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.14476699770817417, |
| "grad_norm": 4.745725943014659, |
| "learning_rate": 1.931892593611716e-06, |
| "loss": 0.508, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.14514896867838045, |
| "grad_norm": 3.736133374274852, |
| "learning_rate": 1.931443056612263e-06, |
| "loss": 0.4604, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.1455309396485867, |
| "grad_norm": 10.799583523666772, |
| "learning_rate": 1.9309920935764536e-06, |
| "loss": 0.5162, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.14591291061879297, |
| "grad_norm": 3.286063954330593, |
| "learning_rate": 1.9305397051947108e-06, |
| "loss": 0.5461, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.14629488158899923, |
| "grad_norm": 4.010526947754956, |
| "learning_rate": 1.9300858921596395e-06, |
| "loss": 0.4637, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.1466768525592055, |
| "grad_norm": 9.445646971706344, |
| "learning_rate": 1.9296306551660266e-06, |
| "loss": 0.5238, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.14705882352941177, |
| "grad_norm": 9.813123565905315, |
| "learning_rate": 1.9291739949108382e-06, |
| "loss": 0.5323, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.14744079449961803, |
| "grad_norm": 3.120586727864197, |
| "learning_rate": 1.9287159120932198e-06, |
| "loss": 0.5204, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.14782276546982429, |
| "grad_norm": 7.671023206343543, |
| "learning_rate": 1.928256407414494e-06, |
| "loss": 0.5225, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.14820473644003057, |
| "grad_norm": 3.7529804673105693, |
| "learning_rate": 1.9277954815781623e-06, |
| "loss": 0.5061, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.14858670741023683, |
| "grad_norm": 5.585820882674302, |
| "learning_rate": 1.9273331352899e-06, |
| "loss": 0.5897, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.1489686783804431, |
| "grad_norm": 3.2965758857033327, |
| "learning_rate": 1.9268693692575576e-06, |
| "loss": 0.5668, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.14935064935064934, |
| "grad_norm": 4.062527614549751, |
| "learning_rate": 1.9264041841911595e-06, |
| "loss": 0.4788, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.1497326203208556, |
| "grad_norm": 4.527958514536294, |
| "learning_rate": 1.925937580802903e-06, |
| "loss": 0.5355, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.1501145912910619, |
| "grad_norm": 4.559973018919794, |
| "learning_rate": 1.9254695598071557e-06, |
| "loss": 0.5363, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.15049656226126815, |
| "grad_norm": 3.0412227798466955, |
| "learning_rate": 1.925000121920457e-06, |
| "loss": 0.5067, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.1508785332314744, |
| "grad_norm": 4.231912383154748, |
| "learning_rate": 1.924529267861514e-06, |
| "loss": 0.5168, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.15126050420168066, |
| "grad_norm": 5.197316808761914, |
| "learning_rate": 1.9240569983512036e-06, |
| "loss": 0.5945, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.15164247517188695, |
| "grad_norm": 4.14321526210149, |
| "learning_rate": 1.9235833141125685e-06, |
| "loss": 0.4682, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.1520244461420932, |
| "grad_norm": 3.7461263120755057, |
| "learning_rate": 1.9231082158708177e-06, |
| "loss": 0.6016, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.15240641711229946, |
| "grad_norm": 2.480897417284355, |
| "learning_rate": 1.9226317043533252e-06, |
| "loss": 0.5446, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.15278838808250572, |
| "grad_norm": 8.07438667998488, |
| "learning_rate": 1.922153780289629e-06, |
| "loss": 0.462, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.153170359052712, |
| "grad_norm": 3.3341835643001883, |
| "learning_rate": 1.9216744444114283e-06, |
| "loss": 0.5137, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.15355233002291827, |
| "grad_norm": 12.924968914010819, |
| "learning_rate": 1.921193697452586e-06, |
| "loss": 0.5115, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.15393430099312452, |
| "grad_norm": 3.88799749897571, |
| "learning_rate": 1.9207115401491236e-06, |
| "loss": 0.5732, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.15431627196333078, |
| "grad_norm": 5.63628842857517, |
| "learning_rate": 1.920227973239222e-06, |
| "loss": 0.5443, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.15469824293353704, |
| "grad_norm": 11.214095099098154, |
| "learning_rate": 1.919742997463221e-06, |
| "loss": 0.5328, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.15508021390374332, |
| "grad_norm": 5.566051428810673, |
| "learning_rate": 1.919256613563617e-06, |
| "loss": 0.4916, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.15546218487394958, |
| "grad_norm": 3.3238053815734765, |
| "learning_rate": 1.9187688222850625e-06, |
| "loss": 0.5405, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.15584415584415584, |
| "grad_norm": 3.1790419527835585, |
| "learning_rate": 1.9182796243743637e-06, |
| "loss": 0.5485, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.1562261268143621, |
| "grad_norm": 3.220273652822294, |
| "learning_rate": 1.917789020580482e-06, |
| "loss": 0.5343, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.15660809778456838, |
| "grad_norm": 4.181974969495614, |
| "learning_rate": 1.917297011654529e-06, |
| "loss": 0.5917, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.15699006875477464, |
| "grad_norm": 3.804677452137214, |
| "learning_rate": 1.9168035983497697e-06, |
| "loss": 0.4721, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.1573720397249809, |
| "grad_norm": 4.355811094333278, |
| "learning_rate": 1.9163087814216184e-06, |
| "loss": 0.5569, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.15775401069518716, |
| "grad_norm": 4.541557617112437, |
| "learning_rate": 1.9158125616276375e-06, |
| "loss": 0.5277, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.15813598166539344, |
| "grad_norm": 5.070373471399588, |
| "learning_rate": 1.9153149397275384e-06, |
| "loss": 0.5818, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.1585179526355997, |
| "grad_norm": 3.3582128161192233, |
| "learning_rate": 1.9148159164831785e-06, |
| "loss": 0.5603, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.15889992360580596, |
| "grad_norm": 6.345325700372508, |
| "learning_rate": 1.9143154926585612e-06, |
| "loss": 0.5772, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.15928189457601222, |
| "grad_norm": 18.705450602040244, |
| "learning_rate": 1.9138136690198334e-06, |
| "loss": 0.5301, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.15966386554621848, |
| "grad_norm": 32.11250304588509, |
| "learning_rate": 1.9133104463352852e-06, |
| "loss": 0.5438, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.16004583651642476, |
| "grad_norm": 3.606136169430874, |
| "learning_rate": 1.9128058253753495e-06, |
| "loss": 0.467, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.16042780748663102, |
| "grad_norm": 3.241408005700225, |
| "learning_rate": 1.9122998069125995e-06, |
| "loss": 0.5028, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.16080977845683728, |
| "grad_norm": 3.518264294880338, |
| "learning_rate": 1.911792391721747e-06, |
| "loss": 0.5709, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.16119174942704353, |
| "grad_norm": 3.966087908470135, |
| "learning_rate": 1.911283580579644e-06, |
| "loss": 0.5232, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.16157372039724982, |
| "grad_norm": 8.417923174743748, |
| "learning_rate": 1.910773374265278e-06, |
| "loss": 0.5512, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.16195569136745608, |
| "grad_norm": 5.490712115215511, |
| "learning_rate": 1.910261773559774e-06, |
| "loss": 0.5249, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.16233766233766234, |
| "grad_norm": 5.775193829980829, |
| "learning_rate": 1.90974877924639e-06, |
| "loss": 0.5097, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.1627196333078686, |
| "grad_norm": 2.6822332745102724, |
| "learning_rate": 1.9092343921105193e-06, |
| "loss": 0.5452, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.16310160427807488, |
| "grad_norm": 2.5693849411860046, |
| "learning_rate": 1.908718612939687e-06, |
| "loss": 0.5138, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.16348357524828114, |
| "grad_norm": 3.9807799406529933, |
| "learning_rate": 1.90820144252355e-06, |
| "loss": 0.5094, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.1638655462184874, |
| "grad_norm": 6.894858559350513, |
| "learning_rate": 1.907682881653893e-06, |
| "loss": 0.5386, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.16424751718869365, |
| "grad_norm": 9.704522067639532, |
| "learning_rate": 1.9071629311246325e-06, |
| "loss": 0.5815, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.1646294881588999, |
| "grad_norm": 4.038760905743012, |
| "learning_rate": 1.90664159173181e-06, |
| "loss": 0.584, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.1650114591291062, |
| "grad_norm": 3.933972091240762, |
| "learning_rate": 1.9061188642735955e-06, |
| "loss": 0.5936, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.16539343009931246, |
| "grad_norm": 2.809066574314389, |
| "learning_rate": 1.905594749550282e-06, |
| "loss": 0.5166, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.1657754010695187, |
| "grad_norm": 3.232605764012116, |
| "learning_rate": 1.9050692483642884e-06, |
| "loss": 0.5989, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.16615737203972497, |
| "grad_norm": 2.779376649403726, |
| "learning_rate": 1.9045423615201549e-06, |
| "loss": 0.5631, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.16653934300993126, |
| "grad_norm": 6.427025295782284, |
| "learning_rate": 1.9040140898245437e-06, |
| "loss": 0.5532, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.16692131398013751, |
| "grad_norm": 3.2510627416912663, |
| "learning_rate": 1.9034844340862368e-06, |
| "loss": 0.5343, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.16730328495034377, |
| "grad_norm": 3.1443297662844194, |
| "learning_rate": 1.902953395116136e-06, |
| "loss": 0.5134, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.16768525592055003, |
| "grad_norm": 4.539213723683322, |
| "learning_rate": 1.9024209737272597e-06, |
| "loss": 0.5408, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.16806722689075632, |
| "grad_norm": 11.775446749973153, |
| "learning_rate": 1.9018871707347435e-06, |
| "loss": 0.4969, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.16844919786096257, |
| "grad_norm": 4.384039093799763, |
| "learning_rate": 1.9013519869558386e-06, |
| "loss": 0.5977, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.16883116883116883, |
| "grad_norm": 7.162891708265767, |
| "learning_rate": 1.900815423209909e-06, |
| "loss": 0.5363, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.1692131398013751, |
| "grad_norm": 3.56315041439479, |
| "learning_rate": 1.9002774803184322e-06, |
| "loss": 0.4719, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.16959511077158135, |
| "grad_norm": 3.969115647981566, |
| "learning_rate": 1.8997381591049972e-06, |
| "loss": 0.5716, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.16997708174178763, |
| "grad_norm": 9.170649989885177, |
| "learning_rate": 1.8991974603953034e-06, |
| "loss": 0.5123, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.1703590527119939, |
| "grad_norm": 3.0650147223282116, |
| "learning_rate": 1.8986553850171583e-06, |
| "loss": 0.6062, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.17074102368220015, |
| "grad_norm": 7.195397239965812, |
| "learning_rate": 1.8981119338004775e-06, |
| "loss": 0.5529, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.1711229946524064, |
| "grad_norm": 6.176705315024795, |
| "learning_rate": 1.897567107577284e-06, |
| "loss": 0.4855, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.1715049656226127, |
| "grad_norm": 7.689916301252105, |
| "learning_rate": 1.8970209071817035e-06, |
| "loss": 0.514, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.17188693659281895, |
| "grad_norm": 2.510661764320135, |
| "learning_rate": 1.8964733334499684e-06, |
| "loss": 0.5258, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.1722689075630252, |
| "grad_norm": 4.630483174397124, |
| "learning_rate": 1.8959243872204115e-06, |
| "loss": 0.5394, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.17265087853323147, |
| "grad_norm": 5.331327229967195, |
| "learning_rate": 1.8953740693334686e-06, |
| "loss": 0.5786, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.17303284950343775, |
| "grad_norm": 2.6657247574130567, |
| "learning_rate": 1.8948223806316737e-06, |
| "loss": 0.4973, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.173414820473644, |
| "grad_norm": 7.0345595478062135, |
| "learning_rate": 1.894269321959661e-06, |
| "loss": 0.5408, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.17379679144385027, |
| "grad_norm": 4.786102808716031, |
| "learning_rate": 1.8937148941641613e-06, |
| "loss": 0.4833, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.17417876241405653, |
| "grad_norm": 3.6696330883093613, |
| "learning_rate": 1.8931590980940022e-06, |
| "loss": 0.4945, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.17456073338426278, |
| "grad_norm": 8.668385378720208, |
| "learning_rate": 1.8926019346001052e-06, |
| "loss": 0.5804, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.17494270435446907, |
| "grad_norm": 21.438661768324913, |
| "learning_rate": 1.8920434045354861e-06, |
| "loss": 0.6104, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.17532467532467533, |
| "grad_norm": 4.64360218036425, |
| "learning_rate": 1.8914835087552528e-06, |
| "loss": 0.5346, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.17570664629488159, |
| "grad_norm": 13.424051118396385, |
| "learning_rate": 1.8909222481166036e-06, |
| "loss": 0.4748, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.17608861726508784, |
| "grad_norm": 6.281611519049626, |
| "learning_rate": 1.8903596234788268e-06, |
| "loss": 0.5342, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.17647058823529413, |
| "grad_norm": 6.22921812160575, |
| "learning_rate": 1.8897956357032993e-06, |
| "loss": 0.628, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.1768525592055004, |
| "grad_norm": 5.1035629491397625, |
| "learning_rate": 1.8892302856534843e-06, |
| "loss": 0.5212, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.17723453017570664, |
| "grad_norm": 3.0915992711189064, |
| "learning_rate": 1.888663574194931e-06, |
| "loss": 0.534, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.1776165011459129, |
| "grad_norm": 5.332723908181321, |
| "learning_rate": 1.8880955021952726e-06, |
| "loss": 0.5906, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.1779984721161192, |
| "grad_norm": 3.0869913876672905, |
| "learning_rate": 1.8875260705242262e-06, |
| "loss": 0.6028, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.17838044308632545, |
| "grad_norm": 2.6777228858601188, |
| "learning_rate": 1.8869552800535891e-06, |
| "loss": 0.5192, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.1787624140565317, |
| "grad_norm": 7.132223781890689, |
| "learning_rate": 1.8863831316572401e-06, |
| "loss": 0.5569, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.17914438502673796, |
| "grad_norm": 3.0366710097530087, |
| "learning_rate": 1.8858096262111365e-06, |
| "loss": 0.5749, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.17952635599694422, |
| "grad_norm": 4.359613446921773, |
| "learning_rate": 1.8852347645933134e-06, |
| "loss": 0.5339, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.1799083269671505, |
| "grad_norm": 3.0313288425668476, |
| "learning_rate": 1.8846585476838817e-06, |
| "loss": 0.51, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.18029029793735676, |
| "grad_norm": 4.623850202032284, |
| "learning_rate": 1.8840809763650283e-06, |
| "loss": 0.5301, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.18067226890756302, |
| "grad_norm": 4.412433502531477, |
| "learning_rate": 1.8835020515210125e-06, |
| "loss": 0.5456, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.18105423987776928, |
| "grad_norm": 7.180970352151755, |
| "learning_rate": 1.8829217740381667e-06, |
| "loss": 0.5878, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.18143621084797557, |
| "grad_norm": 3.2580271139855563, |
| "learning_rate": 1.8823401448048938e-06, |
| "loss": 0.4877, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.18181818181818182, |
| "grad_norm": 6.005766550636256, |
| "learning_rate": 1.8817571647116662e-06, |
| "loss": 0.5422, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.18220015278838808, |
| "grad_norm": 63.296707768024426, |
| "learning_rate": 1.8811728346510249e-06, |
| "loss": 0.5151, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.18258212375859434, |
| "grad_norm": 4.7969886037725065, |
| "learning_rate": 1.8805871555175769e-06, |
| "loss": 0.5801, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.18296409472880062, |
| "grad_norm": 3.870191837934829, |
| "learning_rate": 1.8800001282079953e-06, |
| "loss": 0.6167, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.18334606569900688, |
| "grad_norm": 5.9456808869839834, |
| "learning_rate": 1.8794117536210172e-06, |
| "loss": 0.5733, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.18372803666921314, |
| "grad_norm": 2.677736985974354, |
| "learning_rate": 1.878822032657442e-06, |
| "loss": 0.4856, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.1841100076394194, |
| "grad_norm": 2.003203767851191, |
| "learning_rate": 1.878230966220131e-06, |
| "loss": 0.4177, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.18449197860962566, |
| "grad_norm": 4.958590655184132, |
| "learning_rate": 1.8776385552140044e-06, |
| "loss": 0.4863, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.18487394957983194, |
| "grad_norm": 3.148442186053413, |
| "learning_rate": 1.877044800546042e-06, |
| "loss": 0.4769, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.1852559205500382, |
| "grad_norm": 3.8142357273417606, |
| "learning_rate": 1.8764497031252801e-06, |
| "loss": 0.4691, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.18563789152024446, |
| "grad_norm": 8.575634805488457, |
| "learning_rate": 1.8758532638628114e-06, |
| "loss": 0.5217, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.18601986249045072, |
| "grad_norm": 4.300124100036272, |
| "learning_rate": 1.875255483671782e-06, |
| "loss": 0.5795, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.186401833460657, |
| "grad_norm": 2.579583524654158, |
| "learning_rate": 1.8746563634673915e-06, |
| "loss": 0.4782, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.18678380443086326, |
| "grad_norm": 3.701604778764327, |
| "learning_rate": 1.8740559041668912e-06, |
| "loss": 0.5805, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.18716577540106952, |
| "grad_norm": 2.6956484266740404, |
| "learning_rate": 1.8734541066895825e-06, |
| "loss": 0.5391, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.18754774637127578, |
| "grad_norm": 2.436956916352227, |
| "learning_rate": 1.8728509719568154e-06, |
| "loss": 0.5029, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.18792971734148206, |
| "grad_norm": 3.796654944502864, |
| "learning_rate": 1.872246500891987e-06, |
| "loss": 0.4712, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.18831168831168832, |
| "grad_norm": 2.9952676449872095, |
| "learning_rate": 1.8716406944205407e-06, |
| "loss": 0.5643, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.18869365928189458, |
| "grad_norm": 5.071645117174178, |
| "learning_rate": 1.8710335534699644e-06, |
| "loss": 0.5572, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.18907563025210083, |
| "grad_norm": 3.2218956223207593, |
| "learning_rate": 1.8704250789697888e-06, |
| "loss": 0.5891, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.1894576012223071, |
| "grad_norm": 2.567742013094073, |
| "learning_rate": 1.8698152718515865e-06, |
| "loss": 0.5372, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.18983957219251338, |
| "grad_norm": 2.894313808043004, |
| "learning_rate": 1.8692041330489702e-06, |
| "loss": 0.5649, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.19022154316271964, |
| "grad_norm": 3.9977337336849113, |
| "learning_rate": 1.8685916634975915e-06, |
| "loss": 0.6096, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.1906035141329259, |
| "grad_norm": 4.36211085391409, |
| "learning_rate": 1.8679778641351396e-06, |
| "loss": 0.5662, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.19098548510313215, |
| "grad_norm": 2.824022282196409, |
| "learning_rate": 1.867362735901339e-06, |
| "loss": 0.5705, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.19136745607333844, |
| "grad_norm": 3.853603764532641, |
| "learning_rate": 1.8667462797379488e-06, |
| "loss": 0.5318, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.1917494270435447, |
| "grad_norm": 5.2244234015604984, |
| "learning_rate": 1.8661284965887623e-06, |
| "loss": 0.451, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.19213139801375095, |
| "grad_norm": 3.6467105779523474, |
| "learning_rate": 1.8655093873996026e-06, |
| "loss": 0.4862, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.1925133689839572, |
| "grad_norm": 4.761577705016175, |
| "learning_rate": 1.8648889531183248e-06, |
| "loss": 0.4919, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.1928953399541635, |
| "grad_norm": 2.5269159027313837, |
| "learning_rate": 1.8642671946948108e-06, |
| "loss": 0.5126, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.19327731092436976, |
| "grad_norm": 3.6966310858711693, |
| "learning_rate": 1.8636441130809718e-06, |
| "loss": 0.5381, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.193659281894576, |
| "grad_norm": 2.9896407474985973, |
| "learning_rate": 1.863019709230743e-06, |
| "loss": 0.5316, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.19404125286478227, |
| "grad_norm": 2.818002574138244, |
| "learning_rate": 1.8623939841000853e-06, |
| "loss": 0.4808, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.19442322383498853, |
| "grad_norm": 3.2154224870415122, |
| "learning_rate": 1.8617669386469812e-06, |
| "loss": 0.5802, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.19480519480519481, |
| "grad_norm": 3.6329119766814633, |
| "learning_rate": 1.861138573831436e-06, |
| "loss": 0.5505, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.19518716577540107, |
| "grad_norm": 2.809782100245695, |
| "learning_rate": 1.8605088906154735e-06, |
| "loss": 0.463, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.19556913674560733, |
| "grad_norm": 5.423524792453049, |
| "learning_rate": 1.8598778899631376e-06, |
| "loss": 0.5187, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.1959511077158136, |
| "grad_norm": 3.1713155248240663, |
| "learning_rate": 1.8592455728404873e-06, |
| "loss": 0.4596, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.19633307868601987, |
| "grad_norm": 2.996096261985183, |
| "learning_rate": 1.8586119402155993e-06, |
| "loss": 0.5526, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.19671504965622613, |
| "grad_norm": 5.79015390194803, |
| "learning_rate": 1.8579769930585619e-06, |
| "loss": 0.576, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.1970970206264324, |
| "grad_norm": 3.9655195684002087, |
| "learning_rate": 1.8573407323414779e-06, |
| "loss": 0.6321, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.19747899159663865, |
| "grad_norm": 8.441146227044339, |
| "learning_rate": 1.85670315903846e-06, |
| "loss": 0.586, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.19786096256684493, |
| "grad_norm": 17.00964825679305, |
| "learning_rate": 1.8560642741256314e-06, |
| "loss": 0.4568, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.1982429335370512, |
| "grad_norm": 3.084747706798306, |
| "learning_rate": 1.8554240785811226e-06, |
| "loss": 0.5206, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.19862490450725745, |
| "grad_norm": 3.4741332461100276, |
| "learning_rate": 1.8547825733850711e-06, |
| "loss": 0.577, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.1990068754774637, |
| "grad_norm": 3.4594383129448287, |
| "learning_rate": 1.854139759519619e-06, |
| "loss": 0.5612, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.19938884644766997, |
| "grad_norm": 2.9282239639784917, |
| "learning_rate": 1.8534956379689124e-06, |
| "loss": 0.5305, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.19977081741787625, |
| "grad_norm": 6.784944014323334, |
| "learning_rate": 1.8528502097190994e-06, |
| "loss": 0.5725, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.2001527883880825, |
| "grad_norm": 2.2673706400022087, |
| "learning_rate": 1.8522034757583287e-06, |
| "loss": 0.496, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.20053475935828877, |
| "grad_norm": 3.050042571012531, |
| "learning_rate": 1.851555437076748e-06, |
| "loss": 0.4564, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.20091673032849502, |
| "grad_norm": 4.446629028729372, |
| "learning_rate": 1.8509060946665019e-06, |
| "loss": 0.5623, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.2012987012987013, |
| "grad_norm": 3.1836775296072672, |
| "learning_rate": 1.850255449521732e-06, |
| "loss": 0.5551, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.20168067226890757, |
| "grad_norm": 3.322504159555205, |
| "learning_rate": 1.8496035026385742e-06, |
| "loss": 0.5071, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.20206264323911383, |
| "grad_norm": 26.70160743717929, |
| "learning_rate": 1.8489502550151565e-06, |
| "loss": 0.5149, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.20244461420932008, |
| "grad_norm": 2.873326692598321, |
| "learning_rate": 1.8482957076515995e-06, |
| "loss": 0.4605, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.20282658517952637, |
| "grad_norm": 13.38960869623681, |
| "learning_rate": 1.8476398615500126e-06, |
| "loss": 0.5483, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.20320855614973263, |
| "grad_norm": 5.120750671092757, |
| "learning_rate": 1.8469827177144945e-06, |
| "loss": 0.6108, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.20359052711993889, |
| "grad_norm": 6.1165279573398985, |
| "learning_rate": 1.8463242771511302e-06, |
| "loss": 0.5603, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.20397249809014514, |
| "grad_norm": 5.845196697566236, |
| "learning_rate": 1.8456645408679901e-06, |
| "loss": 0.4849, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.2043544690603514, |
| "grad_norm": 6.374199389125064, |
| "learning_rate": 1.8450035098751284e-06, |
| "loss": 0.5738, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.2047364400305577, |
| "grad_norm": 2.969692491461521, |
| "learning_rate": 1.8443411851845815e-06, |
| "loss": 0.4948, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.20511841100076394, |
| "grad_norm": 4.83667398940346, |
| "learning_rate": 1.8436775678103662e-06, |
| "loss": 0.6144, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.2055003819709702, |
| "grad_norm": 3.2292434566996824, |
| "learning_rate": 1.8430126587684784e-06, |
| "loss": 0.5715, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.20588235294117646, |
| "grad_norm": 2.105787012901775, |
| "learning_rate": 1.8423464590768922e-06, |
| "loss": 0.5088, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.20626432391138275, |
| "grad_norm": 2.6848990053540946, |
| "learning_rate": 1.8416789697555571e-06, |
| "loss": 0.5096, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.206646294881589, |
| "grad_norm": 3.0525532670931645, |
| "learning_rate": 1.841010191826397e-06, |
| "loss": 0.5034, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.20702826585179526, |
| "grad_norm": 3.006624669051693, |
| "learning_rate": 1.8403401263133087e-06, |
| "loss": 0.5712, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.20741023682200152, |
| "grad_norm": 12.853137756580841, |
| "learning_rate": 1.8396687742421605e-06, |
| "loss": 0.5399, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.2077922077922078, |
| "grad_norm": 5.52397493327756, |
| "learning_rate": 1.8389961366407904e-06, |
| "loss": 0.5681, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.20817417876241406, |
| "grad_norm": 4.301081096217388, |
| "learning_rate": 1.8383222145390045e-06, |
| "loss": 0.4553, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.20855614973262032, |
| "grad_norm": 9.972029144703678, |
| "learning_rate": 1.8376470089685748e-06, |
| "loss": 0.4972, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.20893812070282658, |
| "grad_norm": 2.956531873075386, |
| "learning_rate": 1.8369705209632397e-06, |
| "loss": 0.4763, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.20932009167303284, |
| "grad_norm": 2.352517812933045, |
| "learning_rate": 1.8362927515586993e-06, |
| "loss": 0.4823, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.20970206264323912, |
| "grad_norm": 4.984880824898305, |
| "learning_rate": 1.8356137017926169e-06, |
| "loss": 0.5279, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.21008403361344538, |
| "grad_norm": 8.689006523073017, |
| "learning_rate": 1.834933372704616e-06, |
| "loss": 0.5201, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.21046600458365164, |
| "grad_norm": 2.6869978648037134, |
| "learning_rate": 1.834251765336277e-06, |
| "loss": 0.5057, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.2108479755538579, |
| "grad_norm": 6.246534246290759, |
| "learning_rate": 1.83356888073114e-06, |
| "loss": 0.6429, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.21122994652406418, |
| "grad_norm": 4.424921302293031, |
| "learning_rate": 1.8328847199346983e-06, |
| "loss": 0.513, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.21161191749427044, |
| "grad_norm": 2.506864073735516, |
| "learning_rate": 1.8321992839944002e-06, |
| "loss": 0.512, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.2119938884644767, |
| "grad_norm": 4.147884646333317, |
| "learning_rate": 1.831512573959646e-06, |
| "loss": 0.5281, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.21237585943468296, |
| "grad_norm": 6.453459918356094, |
| "learning_rate": 1.8308245908817862e-06, |
| "loss": 0.6072, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.21275783040488924, |
| "grad_norm": 3.227311642376684, |
| "learning_rate": 1.830135335814121e-06, |
| "loss": 0.5904, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.2131398013750955, |
| "grad_norm": 6.966546708900706, |
| "learning_rate": 1.829444809811898e-06, |
| "loss": 0.5652, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.21352177234530176, |
| "grad_norm": 4.746858728601705, |
| "learning_rate": 1.8287530139323098e-06, |
| "loss": 0.5912, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.21390374331550802, |
| "grad_norm": 3.7259365079328033, |
| "learning_rate": 1.8280599492344937e-06, |
| "loss": 0.5546, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.21428571428571427, |
| "grad_norm": 3.347921431679426, |
| "learning_rate": 1.82736561677953e-06, |
| "loss": 0.4817, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.21466768525592056, |
| "grad_norm": 4.556644147705862, |
| "learning_rate": 1.8266700176304388e-06, |
| "loss": 0.5861, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.21504965622612682, |
| "grad_norm": 3.1934469152204796, |
| "learning_rate": 1.825973152852181e-06, |
| "loss": 0.4972, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.21543162719633308, |
| "grad_norm": 6.385370127198636, |
| "learning_rate": 1.825275023511654e-06, |
| "loss": 0.5343, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.21581359816653933, |
| "grad_norm": 2.3151167357495517, |
| "learning_rate": 1.8245756306776911e-06, |
| "loss": 0.4989, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.21619556913674562, |
| "grad_norm": 10.555445276500569, |
| "learning_rate": 1.8238749754210611e-06, |
| "loss": 0.5253, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.21657754010695188, |
| "grad_norm": 7.1724974034380775, |
| "learning_rate": 1.8231730588144652e-06, |
| "loss": 0.5069, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.21695951107715813, |
| "grad_norm": 4.575058249729483, |
| "learning_rate": 1.8224698819325348e-06, |
| "loss": 0.5316, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.2173414820473644, |
| "grad_norm": 4.002746245635046, |
| "learning_rate": 1.8217654458518318e-06, |
| "loss": 0.4499, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.21772345301757068, |
| "grad_norm": 3.948887241448565, |
| "learning_rate": 1.8210597516508457e-06, |
| "loss": 0.4636, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.21810542398777694, |
| "grad_norm": 3.3365032713673455, |
| "learning_rate": 1.820352800409992e-06, |
| "loss": 0.484, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.2184873949579832, |
| "grad_norm": 4.252239694976291, |
| "learning_rate": 1.8196445932116106e-06, |
| "loss": 0.491, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.21886936592818945, |
| "grad_norm": 3.7309930962337203, |
| "learning_rate": 1.8189351311399647e-06, |
| "loss": 0.531, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.2192513368983957, |
| "grad_norm": 5.44261846473984, |
| "learning_rate": 1.8182244152812384e-06, |
| "loss": 0.4911, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.219633307868602, |
| "grad_norm": 3.287807998242394, |
| "learning_rate": 1.8175124467235351e-06, |
| "loss": 0.4891, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.22001527883880825, |
| "grad_norm": 4.594516974830945, |
| "learning_rate": 1.8167992265568772e-06, |
| "loss": 0.5013, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.2203972498090145, |
| "grad_norm": 3.3754014701183963, |
| "learning_rate": 1.816084755873202e-06, |
| "loss": 0.5385, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.22077922077922077, |
| "grad_norm": 6.383964843549181, |
| "learning_rate": 1.8153690357663618e-06, |
| "loss": 0.5765, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.22116119174942706, |
| "grad_norm": 2.609056317491272, |
| "learning_rate": 1.8146520673321217e-06, |
| "loss": 0.5439, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.2215431627196333, |
| "grad_norm": 3.213598585954573, |
| "learning_rate": 1.8139338516681584e-06, |
| "loss": 0.561, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.22192513368983957, |
| "grad_norm": 5.179533809421901, |
| "learning_rate": 1.8132143898740578e-06, |
| "loss": 0.5489, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.22230710466004583, |
| "grad_norm": 9.17495571569845, |
| "learning_rate": 1.8124936830513131e-06, |
| "loss": 0.4893, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.22268907563025211, |
| "grad_norm": 3.861478481777529, |
| "learning_rate": 1.8117717323033247e-06, |
| "loss": 0.5399, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.22307104660045837, |
| "grad_norm": 17.525616277161284, |
| "learning_rate": 1.811048538735397e-06, |
| "loss": 0.5589, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.22345301757066463, |
| "grad_norm": 9.15611393827099, |
| "learning_rate": 1.8103241034547363e-06, |
| "loss": 0.58, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.2238349885408709, |
| "grad_norm": 13.153208089545293, |
| "learning_rate": 1.8095984275704516e-06, |
| "loss": 0.5168, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.22421695951107715, |
| "grad_norm": 5.251421468073455, |
| "learning_rate": 1.8088715121935497e-06, |
| "loss": 0.5016, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.22459893048128343, |
| "grad_norm": 3.6958396485074796, |
| "learning_rate": 1.8081433584369363e-06, |
| "loss": 0.4577, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.2249809014514897, |
| "grad_norm": 3.265559498367038, |
| "learning_rate": 1.807413967415412e-06, |
| "loss": 0.4782, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.22536287242169595, |
| "grad_norm": 7.799514923143363, |
| "learning_rate": 1.806683340245672e-06, |
| "loss": 0.4997, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.2257448433919022, |
| "grad_norm": 4.542184627206501, |
| "learning_rate": 1.805951478046305e-06, |
| "loss": 0.4958, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.2261268143621085, |
| "grad_norm": 3.198561665660559, |
| "learning_rate": 1.8052183819377889e-06, |
| "loss": 0.5066, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.22650878533231475, |
| "grad_norm": 3.285441933069285, |
| "learning_rate": 1.8044840530424922e-06, |
| "loss": 0.5231, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.226890756302521, |
| "grad_norm": 2.437769468942543, |
| "learning_rate": 1.803748492484669e-06, |
| "loss": 0.4666, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.22727272727272727, |
| "grad_norm": 6.185410715981308, |
| "learning_rate": 1.8030117013904614e-06, |
| "loss": 0.5438, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.22765469824293355, |
| "grad_norm": 4.448274163968093, |
| "learning_rate": 1.8022736808878935e-06, |
| "loss": 0.5467, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.2280366692131398, |
| "grad_norm": 3.3611937875245914, |
| "learning_rate": 1.8015344321068725e-06, |
| "loss": 0.5204, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.22841864018334607, |
| "grad_norm": 4.302668649886635, |
| "learning_rate": 1.800793956179186e-06, |
| "loss": 0.5256, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.22880061115355232, |
| "grad_norm": 4.160541159727651, |
| "learning_rate": 1.8000522542385003e-06, |
| "loss": 0.4741, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.22918258212375858, |
| "grad_norm": 2.9709820967481644, |
| "learning_rate": 1.7993093274203587e-06, |
| "loss": 0.6171, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.22956455309396487, |
| "grad_norm": 2.65482208122776, |
| "learning_rate": 1.7985651768621795e-06, |
| "loss": 0.5183, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.22994652406417113, |
| "grad_norm": 3.1309167619949796, |
| "learning_rate": 1.7978198037032556e-06, |
| "loss": 0.5214, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.23032849503437738, |
| "grad_norm": 4.786873785109864, |
| "learning_rate": 1.7970732090847501e-06, |
| "loss": 0.5158, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.23071046600458364, |
| "grad_norm": 3.56544383535294, |
| "learning_rate": 1.7963253941496973e-06, |
| "loss": 0.5161, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.23109243697478993, |
| "grad_norm": 2.9671537454160832, |
| "learning_rate": 1.7955763600429994e-06, |
| "loss": 0.5702, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.23147440794499619, |
| "grad_norm": 3.4057549644612157, |
| "learning_rate": 1.7948261079114256e-06, |
| "loss": 0.4706, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.23185637891520244, |
| "grad_norm": 3.421956791631102, |
| "learning_rate": 1.794074638903609e-06, |
| "loss": 0.4638, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.2322383498854087, |
| "grad_norm": 4.060003714618489, |
| "learning_rate": 1.7933219541700466e-06, |
| "loss": 0.5237, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.232620320855615, |
| "grad_norm": 3.345537066191882, |
| "learning_rate": 1.7925680548630964e-06, |
| "loss": 0.549, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.23300229182582124, |
| "grad_norm": 2.6400782835864427, |
| "learning_rate": 1.7918129421369757e-06, |
| "loss": 0.4747, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.2333842627960275, |
| "grad_norm": 10.017074793252261, |
| "learning_rate": 1.7910566171477598e-06, |
| "loss": 0.4876, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.23376623376623376, |
| "grad_norm": 6.217327964146683, |
| "learning_rate": 1.7902990810533794e-06, |
| "loss": 0.4992, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.23414820473644002, |
| "grad_norm": 3.137084300891758, |
| "learning_rate": 1.7895403350136202e-06, |
| "loss": 0.5136, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.2345301757066463, |
| "grad_norm": 3.560640966628178, |
| "learning_rate": 1.7887803801901203e-06, |
| "loss": 0.4693, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.23491214667685256, |
| "grad_norm": 3.234048960558977, |
| "learning_rate": 1.7880192177463673e-06, |
| "loss": 0.4843, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.23529411764705882, |
| "grad_norm": 3.6336304860358797, |
| "learning_rate": 1.7872568488476993e-06, |
| "loss": 0.4352, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.23567608861726508, |
| "grad_norm": 5.110350004291673, |
| "learning_rate": 1.7864932746613001e-06, |
| "loss": 0.5919, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.23605805958747136, |
| "grad_norm": 2.9444363443075736, |
| "learning_rate": 1.7857284963561997e-06, |
| "loss": 0.4616, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.23644003055767762, |
| "grad_norm": 4.154488116453747, |
| "learning_rate": 1.7849625151032712e-06, |
| "loss": 0.5212, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.23682200152788388, |
| "grad_norm": 3.6500330910954366, |
| "learning_rate": 1.7841953320752292e-06, |
| "loss": 0.5011, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.23720397249809014, |
| "grad_norm": 2.851279009859859, |
| "learning_rate": 1.7834269484466287e-06, |
| "loss": 0.5079, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.23758594346829642, |
| "grad_norm": 8.11545498157721, |
| "learning_rate": 1.7826573653938626e-06, |
| "loss": 0.496, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.23796791443850268, |
| "grad_norm": 2.1317456264414694, |
| "learning_rate": 1.7818865840951598e-06, |
| "loss": 0.4544, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.23834988540870894, |
| "grad_norm": 2.9181580549057813, |
| "learning_rate": 1.7811146057305847e-06, |
| "loss": 0.4882, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.2387318563789152, |
| "grad_norm": 6.282079208223222, |
| "learning_rate": 1.780341431482033e-06, |
| "loss": 0.5373, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.23911382734912145, |
| "grad_norm": 2.718832511733045, |
| "learning_rate": 1.7795670625332325e-06, |
| "loss": 0.4645, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.23949579831932774, |
| "grad_norm": 2.7269521874649874, |
| "learning_rate": 1.7787915000697389e-06, |
| "loss": 0.4627, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.239877769289534, |
| "grad_norm": 5.112524310187623, |
| "learning_rate": 1.7780147452789368e-06, |
| "loss": 0.5181, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.24025974025974026, |
| "grad_norm": 2.7695910787876876, |
| "learning_rate": 1.7772367993500348e-06, |
| "loss": 0.5327, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.24064171122994651, |
| "grad_norm": 2.6145267906202285, |
| "learning_rate": 1.7764576634740656e-06, |
| "loss": 0.5574, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.2410236822001528, |
| "grad_norm": 3.233765828743376, |
| "learning_rate": 1.7756773388438838e-06, |
| "loss": 0.521, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.24140565317035906, |
| "grad_norm": 6.696009320726811, |
| "learning_rate": 1.7748958266541642e-06, |
| "loss": 0.508, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.24178762414056532, |
| "grad_norm": 39.56183412074627, |
| "learning_rate": 1.7741131281013992e-06, |
| "loss": 0.481, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.24216959511077157, |
| "grad_norm": 3.869094625881142, |
| "learning_rate": 1.7733292443838978e-06, |
| "loss": 0.5563, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.24255156608097786, |
| "grad_norm": 21.41941056342672, |
| "learning_rate": 1.7725441767017837e-06, |
| "loss": 0.4881, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.24293353705118412, |
| "grad_norm": 15.102471638374096, |
| "learning_rate": 1.7717579262569925e-06, |
| "loss": 0.5545, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.24331550802139038, |
| "grad_norm": 3.070865106455782, |
| "learning_rate": 1.770970494253272e-06, |
| "loss": 0.5642, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.24369747899159663, |
| "grad_norm": 4.240853022191859, |
| "learning_rate": 1.7701818818961774e-06, |
| "loss": 0.5046, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.2440794499618029, |
| "grad_norm": 3.8398989956706755, |
| "learning_rate": 1.7693920903930714e-06, |
| "loss": 0.4598, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.24446142093200918, |
| "grad_norm": 2.6480456412643107, |
| "learning_rate": 1.7686011209531233e-06, |
| "loss": 0.5043, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.24484339190221543, |
| "grad_norm": 3.600797780452132, |
| "learning_rate": 1.7678089747873042e-06, |
| "loss": 0.5166, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.2452253628724217, |
| "grad_norm": 3.057904111996566, |
| "learning_rate": 1.7670156531083875e-06, |
| "loss": 0.4996, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.24560733384262795, |
| "grad_norm": 3.4477591809238732, |
| "learning_rate": 1.7662211571309457e-06, |
| "loss": 0.6209, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.24598930481283424, |
| "grad_norm": 3.562378985300235, |
| "learning_rate": 1.7654254880713504e-06, |
| "loss": 0.5704, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.2463712757830405, |
| "grad_norm": 3.6440398708100474, |
| "learning_rate": 1.764628647147768e-06, |
| "loss": 0.5646, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.24675324675324675, |
| "grad_norm": 3.1071802553619663, |
| "learning_rate": 1.7638306355801591e-06, |
| "loss": 0.5275, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.247135217723453, |
| "grad_norm": 6.557389815257588, |
| "learning_rate": 1.7630314545902776e-06, |
| "loss": 0.5619, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.2475171886936593, |
| "grad_norm": 2.7462182181993424, |
| "learning_rate": 1.7622311054016663e-06, |
| "loss": 0.5185, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.24789915966386555, |
| "grad_norm": 5.974295269074101, |
| "learning_rate": 1.7614295892396577e-06, |
| "loss": 0.6382, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.2482811306340718, |
| "grad_norm": 3.112336954489829, |
| "learning_rate": 1.7606269073313705e-06, |
| "loss": 0.5138, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.24866310160427807, |
| "grad_norm": 3.740381602615543, |
| "learning_rate": 1.7598230609057078e-06, |
| "loss": 0.4806, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.24904507257448433, |
| "grad_norm": 4.65790554328188, |
| "learning_rate": 1.7590180511933564e-06, |
| "loss": 0.4916, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.2494270435446906, |
| "grad_norm": 2.7493165403420243, |
| "learning_rate": 1.7582118794267834e-06, |
| "loss": 0.5118, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.24980901451489687, |
| "grad_norm": 3.3105503913880514, |
| "learning_rate": 1.757404546840235e-06, |
| "loss": 0.5298, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.25019098548510316, |
| "grad_norm": 12.455689339904634, |
| "learning_rate": 1.7565960546697353e-06, |
| "loss": 0.5318, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.2505729564553094, |
| "grad_norm": 3.380636142736357, |
| "learning_rate": 1.7557864041530828e-06, |
| "loss": 0.4663, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.2509549274255157, |
| "grad_norm": 3.815668309210232, |
| "learning_rate": 1.7549755965298497e-06, |
| "loss": 0.5581, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.25133689839572193, |
| "grad_norm": 4.378975343400351, |
| "learning_rate": 1.7541636330413807e-06, |
| "loss": 0.4936, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.2517188693659282, |
| "grad_norm": 6.528924785964309, |
| "learning_rate": 1.7533505149307887e-06, |
| "loss": 0.5414, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.25210084033613445, |
| "grad_norm": 3.4142135055928167, |
| "learning_rate": 1.752536243442955e-06, |
| "loss": 0.568, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.2524828113063407, |
| "grad_norm": 2.8288694978964974, |
| "learning_rate": 1.7517208198245266e-06, |
| "loss": 0.5267, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.25286478227654696, |
| "grad_norm": 3.50959890251818, |
| "learning_rate": 1.7509042453239146e-06, |
| "loss": 0.5019, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.2532467532467532, |
| "grad_norm": 2.7771422738572618, |
| "learning_rate": 1.7500865211912923e-06, |
| "loss": 0.4467, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.25362872421695953, |
| "grad_norm": 2.5295925264464367, |
| "learning_rate": 1.7492676486785923e-06, |
| "loss": 0.5013, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.2540106951871658, |
| "grad_norm": 4.8645822646400925, |
| "learning_rate": 1.7484476290395058e-06, |
| "loss": 0.6675, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.25439266615737205, |
| "grad_norm": 2.7906824801853958, |
| "learning_rate": 1.7476264635294803e-06, |
| "loss": 0.4964, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.2547746371275783, |
| "grad_norm": 4.426841802219747, |
| "learning_rate": 1.7468041534057176e-06, |
| "loss": 0.5779, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.25515660809778457, |
| "grad_norm": 2.7424287930246862, |
| "learning_rate": 1.745980699927172e-06, |
| "loss": 0.4458, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.2555385790679908, |
| "grad_norm": 3.006140806996059, |
| "learning_rate": 1.7451561043545481e-06, |
| "loss": 0.5388, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.2559205500381971, |
| "grad_norm": 3.042148103787358, |
| "learning_rate": 1.7443303679502993e-06, |
| "loss": 0.5555, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.25630252100840334, |
| "grad_norm": 6.668821228222795, |
| "learning_rate": 1.743503491978625e-06, |
| "loss": 0.4967, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.25668449197860965, |
| "grad_norm": 2.5308707340238827, |
| "learning_rate": 1.74267547770547e-06, |
| "loss": 0.4663, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.2570664629488159, |
| "grad_norm": 6.833924393698803, |
| "learning_rate": 1.7418463263985213e-06, |
| "loss": 0.5645, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.25744843391902217, |
| "grad_norm": 3.248878235436747, |
| "learning_rate": 1.741016039327207e-06, |
| "loss": 0.5378, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.2578304048892284, |
| "grad_norm": 12.379869478911926, |
| "learning_rate": 1.7401846177626937e-06, |
| "loss": 0.4715, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.2582123758594347, |
| "grad_norm": 2.4700767549860916, |
| "learning_rate": 1.7393520629778858e-06, |
| "loss": 0.4435, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.25859434682964094, |
| "grad_norm": 3.8376182240242347, |
| "learning_rate": 1.7385183762474216e-06, |
| "loss": 0.5057, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.2589763177998472, |
| "grad_norm": 2.787561200875686, |
| "learning_rate": 1.737683558847673e-06, |
| "loss": 0.4756, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.25935828877005346, |
| "grad_norm": 8.0937580576913, |
| "learning_rate": 1.7368476120567425e-06, |
| "loss": 0.4516, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.2597402597402597, |
| "grad_norm": 3.348677722488532, |
| "learning_rate": 1.7360105371544624e-06, |
| "loss": 0.534, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.26012223071046603, |
| "grad_norm": 4.392537977655502, |
| "learning_rate": 1.735172335422391e-06, |
| "loss": 0.5828, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.2605042016806723, |
| "grad_norm": 4.871306831406301, |
| "learning_rate": 1.7343330081438134e-06, |
| "loss": 0.5305, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.26088617265087855, |
| "grad_norm": 2.0945191210539447, |
| "learning_rate": 1.7334925566037364e-06, |
| "loss": 0.4465, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.2612681436210848, |
| "grad_norm": 2.4815453087895363, |
| "learning_rate": 1.7326509820888891e-06, |
| "loss": 0.5161, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.26165011459129106, |
| "grad_norm": 2.512152878408946, |
| "learning_rate": 1.7318082858877189e-06, |
| "loss": 0.4902, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.2620320855614973, |
| "grad_norm": 3.8314532509519004, |
| "learning_rate": 1.7309644692903908e-06, |
| "loss": 0.5136, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.2624140565317036, |
| "grad_norm": 4.003857938523139, |
| "learning_rate": 1.7301195335887861e-06, |
| "loss": 0.5135, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.26279602750190983, |
| "grad_norm": 2.336318230756018, |
| "learning_rate": 1.7292734800764983e-06, |
| "loss": 0.4443, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.2631779984721161, |
| "grad_norm": 4.912046668250144, |
| "learning_rate": 1.7284263100488325e-06, |
| "loss": 0.5218, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.2635599694423224, |
| "grad_norm": 14.649714625917312, |
| "learning_rate": 1.7275780248028035e-06, |
| "loss": 0.5415, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.26394194041252866, |
| "grad_norm": 4.778500699991443, |
| "learning_rate": 1.7267286256371334e-06, |
| "loss": 0.5475, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.2643239113827349, |
| "grad_norm": 5.334807436364755, |
| "learning_rate": 1.7258781138522494e-06, |
| "loss": 0.486, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.2647058823529412, |
| "grad_norm": 4.407848861156866, |
| "learning_rate": 1.7250264907502823e-06, |
| "loss": 0.4618, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.26508785332314744, |
| "grad_norm": 4.081137578513586, |
| "learning_rate": 1.7241737576350646e-06, |
| "loss": 0.5714, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.2654698242933537, |
| "grad_norm": 3.458527890022539, |
| "learning_rate": 1.7233199158121278e-06, |
| "loss": 0.5386, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.26585179526355995, |
| "grad_norm": 3.6226981960725078, |
| "learning_rate": 1.722464966588701e-06, |
| "loss": 0.5704, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.2662337662337662, |
| "grad_norm": 2.633192513546539, |
| "learning_rate": 1.7216089112737092e-06, |
| "loss": 0.4667, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.2666157372039725, |
| "grad_norm": 24.569139675810856, |
| "learning_rate": 1.7207517511777697e-06, |
| "loss": 0.4881, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.2669977081741788, |
| "grad_norm": 3.7580265708859746, |
| "learning_rate": 1.719893487613192e-06, |
| "loss": 0.568, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.26737967914438504, |
| "grad_norm": 4.400795314770478, |
| "learning_rate": 1.7190341218939753e-06, |
| "loss": 0.5632, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2677616501145913, |
| "grad_norm": 7.344189586259203, |
| "learning_rate": 1.7181736553358053e-06, |
| "loss": 0.5036, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.26814362108479756, |
| "grad_norm": 2.778695505776415, |
| "learning_rate": 1.717312089256054e-06, |
| "loss": 0.5119, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.2685255920550038, |
| "grad_norm": 2.5695491741039467, |
| "learning_rate": 1.7164494249737759e-06, |
| "loss": 0.5112, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.2689075630252101, |
| "grad_norm": 2.8934171667829185, |
| "learning_rate": 1.715585663809708e-06, |
| "loss": 0.4718, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.26928953399541633, |
| "grad_norm": 2.6161453326453, |
| "learning_rate": 1.7147208070862646e-06, |
| "loss": 0.5047, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.2696715049656226, |
| "grad_norm": 3.8219439204812944, |
| "learning_rate": 1.7138548561275398e-06, |
| "loss": 0.5148, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.2700534759358289, |
| "grad_norm": 2.1503328382135853, |
| "learning_rate": 1.7129878122593016e-06, |
| "loss": 0.5314, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.27043544690603516, |
| "grad_norm": 3.646190022749743, |
| "learning_rate": 1.712119676808991e-06, |
| "loss": 0.5311, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.2708174178762414, |
| "grad_norm": 3.731396032506048, |
| "learning_rate": 1.7112504511057205e-06, |
| "loss": 0.5884, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.2711993888464477, |
| "grad_norm": 3.6777917230456723, |
| "learning_rate": 1.7103801364802725e-06, |
| "loss": 0.5793, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.27158135981665393, |
| "grad_norm": 6.13373394566763, |
| "learning_rate": 1.7095087342650953e-06, |
| "loss": 0.5185, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.2719633307868602, |
| "grad_norm": 3.087001723579745, |
| "learning_rate": 1.7086362457943032e-06, |
| "loss": 0.5009, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.27234530175706645, |
| "grad_norm": 4.490686392373228, |
| "learning_rate": 1.7077626724036733e-06, |
| "loss": 0.5339, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.2727272727272727, |
| "grad_norm": 3.005739224446104, |
| "learning_rate": 1.7068880154306436e-06, |
| "loss": 0.5423, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.27310924369747897, |
| "grad_norm": 4.784083760333966, |
| "learning_rate": 1.7060122762143113e-06, |
| "loss": 0.5006, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.2734912146676853, |
| "grad_norm": 2.508657710501213, |
| "learning_rate": 1.70513545609543e-06, |
| "loss": 0.5682, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.27387318563789154, |
| "grad_norm": 4.199766413295021, |
| "learning_rate": 1.704257556416409e-06, |
| "loss": 0.5516, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.2742551566080978, |
| "grad_norm": 5.212747405600585, |
| "learning_rate": 1.7033785785213097e-06, |
| "loss": 0.5135, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.27463712757830405, |
| "grad_norm": 3.1721537708110477, |
| "learning_rate": 1.7024985237558442e-06, |
| "loss": 0.5187, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.2750190985485103, |
| "grad_norm": 3.2955829455322654, |
| "learning_rate": 1.701617393467374e-06, |
| "loss": 0.5187, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.27540106951871657, |
| "grad_norm": 4.975453907210425, |
| "learning_rate": 1.7007351890049066e-06, |
| "loss": 0.5139, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.2757830404889228, |
| "grad_norm": 3.4265908223186003, |
| "learning_rate": 1.6998519117190939e-06, |
| "loss": 0.5373, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.2761650114591291, |
| "grad_norm": 3.750942277195497, |
| "learning_rate": 1.6989675629622311e-06, |
| "loss": 0.5224, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.2765469824293354, |
| "grad_norm": 2.9167247213499623, |
| "learning_rate": 1.698082144088253e-06, |
| "loss": 0.4503, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.27692895339954166, |
| "grad_norm": 2.6892296224397443, |
| "learning_rate": 1.6971956564527331e-06, |
| "loss": 0.4308, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.2773109243697479, |
| "grad_norm": 7.484679803225303, |
| "learning_rate": 1.6963081014128814e-06, |
| "loss": 0.5805, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.27769289533995417, |
| "grad_norm": 2.8744376077945795, |
| "learning_rate": 1.6954194803275418e-06, |
| "loss": 0.5175, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.27807486631016043, |
| "grad_norm": 2.939035122326872, |
| "learning_rate": 1.6945297945571898e-06, |
| "loss": 0.5184, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.2784568372803667, |
| "grad_norm": 3.3676310306978636, |
| "learning_rate": 1.6936390454639323e-06, |
| "loss": 0.507, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.27883880825057294, |
| "grad_norm": 7.253458488294697, |
| "learning_rate": 1.6927472344115027e-06, |
| "loss": 0.5281, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.2792207792207792, |
| "grad_norm": 3.159544953754157, |
| "learning_rate": 1.6918543627652615e-06, |
| "loss": 0.516, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.27960275019098546, |
| "grad_norm": 4.138952498409637, |
| "learning_rate": 1.6909604318921918e-06, |
| "loss": 0.4688, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.2799847211611918, |
| "grad_norm": 2.741705159701366, |
| "learning_rate": 1.6900654431608992e-06, |
| "loss": 0.4839, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.28036669213139803, |
| "grad_norm": 3.693179714945185, |
| "learning_rate": 1.6891693979416081e-06, |
| "loss": 0.51, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.2807486631016043, |
| "grad_norm": 7.452946357187165, |
| "learning_rate": 1.688272297606162e-06, |
| "loss": 0.4816, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.28113063407181055, |
| "grad_norm": 2.742131074634169, |
| "learning_rate": 1.6873741435280175e-06, |
| "loss": 0.4426, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.2815126050420168, |
| "grad_norm": 5.630665693685824, |
| "learning_rate": 1.686474937082246e-06, |
| "loss": 0.548, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.28189457601222306, |
| "grad_norm": 5.688789138069565, |
| "learning_rate": 1.68557467964553e-06, |
| "loss": 0.5106, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.2822765469824293, |
| "grad_norm": 2.4631885801224636, |
| "learning_rate": 1.6846733725961605e-06, |
| "loss": 0.4238, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.2826585179526356, |
| "grad_norm": 3.5667080177526422, |
| "learning_rate": 1.6837710173140359e-06, |
| "loss": 0.5505, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.28304048892284184, |
| "grad_norm": 11.929409510968739, |
| "learning_rate": 1.6828676151806587e-06, |
| "loss": 0.504, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.28342245989304815, |
| "grad_norm": 4.103107051996335, |
| "learning_rate": 1.6819631675791355e-06, |
| "loss": 0.494, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.2838044308632544, |
| "grad_norm": 3.9061323742785876, |
| "learning_rate": 1.6810576758941726e-06, |
| "loss": 0.471, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.28418640183346067, |
| "grad_norm": 3.0871420882267007, |
| "learning_rate": 1.6801511415120743e-06, |
| "loss": 0.4401, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.2845683728036669, |
| "grad_norm": 5.957436850330277, |
| "learning_rate": 1.6792435658207422e-06, |
| "loss": 0.5614, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.2849503437738732, |
| "grad_norm": 7.907512720107014, |
| "learning_rate": 1.6783349502096718e-06, |
| "loss": 0.5234, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.28533231474407944, |
| "grad_norm": 7.7846738390269765, |
| "learning_rate": 1.6774252960699508e-06, |
| "loss": 0.5755, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 4.268104953943963, |
| "learning_rate": 1.6765146047942569e-06, |
| "loss": 0.5197, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.28609625668449196, |
| "grad_norm": 4.400981542332326, |
| "learning_rate": 1.6756028777768546e-06, |
| "loss": 0.5216, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.28647822765469827, |
| "grad_norm": 3.237959983040218, |
| "learning_rate": 1.6746901164135964e-06, |
| "loss": 0.4327, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.2868601986249045, |
| "grad_norm": 2.4956321020896826, |
| "learning_rate": 1.6737763221019165e-06, |
| "loss": 0.4209, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.2872421695951108, |
| "grad_norm": 3.3785218467080993, |
| "learning_rate": 1.6728614962408307e-06, |
| "loss": 0.5133, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.28762414056531704, |
| "grad_norm": 5.755907594154302, |
| "learning_rate": 1.671945640230935e-06, |
| "loss": 0.493, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.2880061115355233, |
| "grad_norm": 3.5371156438993467, |
| "learning_rate": 1.6710287554744018e-06, |
| "loss": 0.5648, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.28838808250572956, |
| "grad_norm": 3.31050461562023, |
| "learning_rate": 1.670110843374979e-06, |
| "loss": 0.5592, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.2887700534759358, |
| "grad_norm": 10.456411989986606, |
| "learning_rate": 1.669191905337987e-06, |
| "loss": 0.517, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.2891520244461421, |
| "grad_norm": 2.042811323137261, |
| "learning_rate": 1.6682719427703162e-06, |
| "loss": 0.4311, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.28953399541634833, |
| "grad_norm": 3.7313229869118527, |
| "learning_rate": 1.6673509570804277e-06, |
| "loss": 0.5495, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.28991596638655465, |
| "grad_norm": 5.091712054727465, |
| "learning_rate": 1.6664289496783469e-06, |
| "loss": 0.5404, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.2902979373567609, |
| "grad_norm": 3.7509496079574665, |
| "learning_rate": 1.6655059219756642e-06, |
| "loss": 0.4976, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.29067990832696716, |
| "grad_norm": 3.0937726229305142, |
| "learning_rate": 1.6645818753855323e-06, |
| "loss": 0.4678, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.2910618792971734, |
| "grad_norm": 3.323105883693273, |
| "learning_rate": 1.6636568113226634e-06, |
| "loss": 0.4866, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.2914438502673797, |
| "grad_norm": 5.253451440192973, |
| "learning_rate": 1.662730731203328e-06, |
| "loss": 0.4973, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.29182582123758594, |
| "grad_norm": 3.4256082230957503, |
| "learning_rate": 1.661803636445351e-06, |
| "loss": 0.536, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.2922077922077922, |
| "grad_norm": 3.3306256815135216, |
| "learning_rate": 1.6608755284681126e-06, |
| "loss": 0.5426, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.29258976317799845, |
| "grad_norm": 2.9356322251185865, |
| "learning_rate": 1.6599464086925426e-06, |
| "loss": 0.5344, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.2929717341482047, |
| "grad_norm": 4.155899142256312, |
| "learning_rate": 1.65901627854112e-06, |
| "loss": 0.5349, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.293353705118411, |
| "grad_norm": 3.777614719641604, |
| "learning_rate": 1.658085139437872e-06, |
| "loss": 0.494, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.2937356760886173, |
| "grad_norm": 2.5142543924089114, |
| "learning_rate": 1.6571529928083692e-06, |
| "loss": 0.4697, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.29411764705882354, |
| "grad_norm": 6.722985418682869, |
| "learning_rate": 1.6562198400797252e-06, |
| "loss": 0.5333, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.2944996180290298, |
| "grad_norm": 3.631912374117992, |
| "learning_rate": 1.6552856826805935e-06, |
| "loss": 0.5664, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.29488158899923606, |
| "grad_norm": 4.989274098547631, |
| "learning_rate": 1.6543505220411663e-06, |
| "loss": 0.5367, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.2952635599694423, |
| "grad_norm": 2.228577586192082, |
| "learning_rate": 1.6534143595931717e-06, |
| "loss": 0.5349, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.29564553093964857, |
| "grad_norm": 2.6656508541994577, |
| "learning_rate": 1.6524771967698711e-06, |
| "loss": 0.5048, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.29602750190985483, |
| "grad_norm": 3.3576646899531513, |
| "learning_rate": 1.6515390350060584e-06, |
| "loss": 0.4657, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.29640947288006114, |
| "grad_norm": 3.3575492714076565, |
| "learning_rate": 1.650599875738056e-06, |
| "loss": 0.5392, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.2967914438502674, |
| "grad_norm": 5.6684435372916155, |
| "learning_rate": 1.6496597204037135e-06, |
| "loss": 0.4961, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.29717341482047366, |
| "grad_norm": 4.442816807184914, |
| "learning_rate": 1.6487185704424057e-06, |
| "loss": 0.5325, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.2975553857906799, |
| "grad_norm": 4.592388725625374, |
| "learning_rate": 1.6477764272950307e-06, |
| "loss": 0.4357, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.2979373567608862, |
| "grad_norm": 4.286297730878364, |
| "learning_rate": 1.6468332924040062e-06, |
| "loss": 0.5542, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.29831932773109243, |
| "grad_norm": 2.275160007815433, |
| "learning_rate": 1.645889167213269e-06, |
| "loss": 0.518, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.2987012987012987, |
| "grad_norm": 2.535647558636044, |
| "learning_rate": 1.6449440531682717e-06, |
| "loss": 0.5267, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.29908326967150495, |
| "grad_norm": 4.43586450645513, |
| "learning_rate": 1.6439979517159808e-06, |
| "loss": 0.5018, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.2994652406417112, |
| "grad_norm": 3.189161112968492, |
| "learning_rate": 1.6430508643048743e-06, |
| "loss": 0.4726, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.2998472116119175, |
| "grad_norm": 3.1935773386697597, |
| "learning_rate": 1.6421027923849408e-06, |
| "loss": 0.5276, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.3002291825821238, |
| "grad_norm": 10.751918205851151, |
| "learning_rate": 1.641153737407675e-06, |
| "loss": 0.4862, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.30061115355233003, |
| "grad_norm": 3.0434845781199065, |
| "learning_rate": 1.6402037008260768e-06, |
| "loss": 0.5054, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.3009931245225363, |
| "grad_norm": 3.2685180753421568, |
| "learning_rate": 1.6392526840946492e-06, |
| "loss": 0.5713, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.30137509549274255, |
| "grad_norm": 3.0040361376646034, |
| "learning_rate": 1.6383006886693962e-06, |
| "loss": 0.4681, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.3017570664629488, |
| "grad_norm": 3.635855546494789, |
| "learning_rate": 1.6373477160078197e-06, |
| "loss": 0.5384, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.30213903743315507, |
| "grad_norm": 4.5175144684161825, |
| "learning_rate": 1.6363937675689174e-06, |
| "loss": 0.5288, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.3025210084033613, |
| "grad_norm": 12.922747266951877, |
| "learning_rate": 1.6354388448131818e-06, |
| "loss": 0.5918, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.3029029793735676, |
| "grad_norm": 3.686851028164734, |
| "learning_rate": 1.6344829492025962e-06, |
| "loss": 0.612, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.3032849503437739, |
| "grad_norm": 7.826182578996057, |
| "learning_rate": 1.633526082200634e-06, |
| "loss": 0.5536, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.30366692131398015, |
| "grad_norm": 2.7716060805059484, |
| "learning_rate": 1.6325682452722556e-06, |
| "loss": 0.5195, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.3040488922841864, |
| "grad_norm": 3.1362624859125665, |
| "learning_rate": 1.6316094398839062e-06, |
| "loss": 0.5409, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.30443086325439267, |
| "grad_norm": 10.90252915912184, |
| "learning_rate": 1.6306496675035132e-06, |
| "loss": 0.569, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.3048128342245989, |
| "grad_norm": 3.266629007159501, |
| "learning_rate": 1.629688929600486e-06, |
| "loss": 0.4988, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.3051948051948052, |
| "grad_norm": 4.016902341607713, |
| "learning_rate": 1.6287272276457112e-06, |
| "loss": 0.4002, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.30557677616501144, |
| "grad_norm": 2.5154528680969337, |
| "learning_rate": 1.6277645631115506e-06, |
| "loss": 0.5111, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.3059587471352177, |
| "grad_norm": 9.075085882863707, |
| "learning_rate": 1.6268009374718411e-06, |
| "loss": 0.4927, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.306340718105424, |
| "grad_norm": 31.66511136078232, |
| "learning_rate": 1.6258363522018908e-06, |
| "loss": 0.5165, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.3067226890756303, |
| "grad_norm": 5.0751500129820695, |
| "learning_rate": 1.624870808778476e-06, |
| "loss": 0.474, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.30710466004583653, |
| "grad_norm": 2.236087119497167, |
| "learning_rate": 1.6239043086798411e-06, |
| "loss": 0.4722, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.3074866310160428, |
| "grad_norm": 5.903012956063016, |
| "learning_rate": 1.6229368533856947e-06, |
| "loss": 0.5124, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.30786860198624905, |
| "grad_norm": 14.118675520926457, |
| "learning_rate": 1.6219684443772075e-06, |
| "loss": 0.533, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.3082505729564553, |
| "grad_norm": 3.7142239375936987, |
| "learning_rate": 1.6209990831370105e-06, |
| "loss": 0.5129, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.30863254392666156, |
| "grad_norm": 3.8244519908044703, |
| "learning_rate": 1.6200287711491928e-06, |
| "loss": 0.5359, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.3090145148968678, |
| "grad_norm": 3.3849363817604634, |
| "learning_rate": 1.6190575098992993e-06, |
| "loss": 0.5272, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.3093964858670741, |
| "grad_norm": 4.1533543275022256, |
| "learning_rate": 1.6180853008743278e-06, |
| "loss": 0.512, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.3097784568372804, |
| "grad_norm": 4.30937018622683, |
| "learning_rate": 1.6171121455627268e-06, |
| "loss": 0.4175, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.31016042780748665, |
| "grad_norm": 3.549926399740357, |
| "learning_rate": 1.6161380454543943e-06, |
| "loss": 0.5586, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.3105423987776929, |
| "grad_norm": 3.5404138591865766, |
| "learning_rate": 1.6151630020406742e-06, |
| "loss": 0.518, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.31092436974789917, |
| "grad_norm": 3.093654062052044, |
| "learning_rate": 1.6141870168143551e-06, |
| "loss": 0.5309, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.3113063407181054, |
| "grad_norm": 2.648167452438263, |
| "learning_rate": 1.6132100912696673e-06, |
| "loss": 0.5085, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.3116883116883117, |
| "grad_norm": 13.440120277339844, |
| "learning_rate": 1.612232226902281e-06, |
| "loss": 0.5151, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.31207028265851794, |
| "grad_norm": 9.911688500332652, |
| "learning_rate": 1.6112534252093028e-06, |
| "loss": 0.5441, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.3124522536287242, |
| "grad_norm": 4.568940753889595, |
| "learning_rate": 1.610273687689275e-06, |
| "loss": 0.4445, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.31283422459893045, |
| "grad_norm": 3.757739018041412, |
| "learning_rate": 1.6092930158421733e-06, |
| "loss": 0.4919, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.31321619556913677, |
| "grad_norm": 2.7005901111787938, |
| "learning_rate": 1.6083114111694025e-06, |
| "loss": 0.5493, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.313598166539343, |
| "grad_norm": 5.329346610981711, |
| "learning_rate": 1.6073288751737968e-06, |
| "loss": 0.5071, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.3139801375095493, |
| "grad_norm": 4.155621924910232, |
| "learning_rate": 1.606345409359615e-06, |
| "loss": 0.511, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.31436210847975554, |
| "grad_norm": 3.2160415173643697, |
| "learning_rate": 1.6053610152325407e-06, |
| "loss": 0.5149, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.3147440794499618, |
| "grad_norm": 4.498833992646186, |
| "learning_rate": 1.6043756942996781e-06, |
| "loss": 0.4367, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.31512605042016806, |
| "grad_norm": 2.667390368969184, |
| "learning_rate": 1.6033894480695503e-06, |
| "loss": 0.5029, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.3155080213903743, |
| "grad_norm": 2.7056494777003834, |
| "learning_rate": 1.6024022780520967e-06, |
| "loss": 0.5443, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.3158899923605806, |
| "grad_norm": 4.119909442418713, |
| "learning_rate": 1.6014141857586723e-06, |
| "loss": 0.4948, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.3162719633307869, |
| "grad_norm": 3.0422282372245544, |
| "learning_rate": 1.6004251727020427e-06, |
| "loss": 0.4634, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.31665393430099315, |
| "grad_norm": 2.3102004429656255, |
| "learning_rate": 1.599435240396384e-06, |
| "loss": 0.4902, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.3170359052711994, |
| "grad_norm": 5.010024744168456, |
| "learning_rate": 1.598444390357279e-06, |
| "loss": 0.5581, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.31741787624140566, |
| "grad_norm": 4.281690511868047, |
| "learning_rate": 1.5974526241017168e-06, |
| "loss": 0.5059, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.3177998472116119, |
| "grad_norm": 2.7057434095712356, |
| "learning_rate": 1.5964599431480876e-06, |
| "loss": 0.5423, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.3181818181818182, |
| "grad_norm": 3.2463832233266796, |
| "learning_rate": 1.595466349016183e-06, |
| "loss": 0.5701, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.31856378915202443, |
| "grad_norm": 4.658997201978724, |
| "learning_rate": 1.5944718432271924e-06, |
| "loss": 0.6041, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.3189457601222307, |
| "grad_norm": 4.653531281239861, |
| "learning_rate": 1.5934764273037011e-06, |
| "loss": 0.5121, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.31932773109243695, |
| "grad_norm": 4.039817069755923, |
| "learning_rate": 1.5924801027696879e-06, |
| "loss": 0.4844, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.31970970206264326, |
| "grad_norm": 5.172647458886336, |
| "learning_rate": 1.5914828711505221e-06, |
| "loss": 0.5569, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.3200916730328495, |
| "grad_norm": 4.281902465737346, |
| "learning_rate": 1.5904847339729627e-06, |
| "loss": 0.5001, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.3204736440030558, |
| "grad_norm": 8.404705127449969, |
| "learning_rate": 1.5894856927651538e-06, |
| "loss": 0.5092, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.32085561497326204, |
| "grad_norm": 2.973031951264967, |
| "learning_rate": 1.5884857490566248e-06, |
| "loss": 0.5156, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.3212375859434683, |
| "grad_norm": 2.3748881103612334, |
| "learning_rate": 1.587484904378286e-06, |
| "loss": 0.5132, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.32161955691367455, |
| "grad_norm": 3.582807394186353, |
| "learning_rate": 1.586483160262428e-06, |
| "loss": 0.5619, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.3220015278838808, |
| "grad_norm": 2.8054071099189954, |
| "learning_rate": 1.585480518242717e-06, |
| "loss": 0.5356, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.32238349885408707, |
| "grad_norm": 2.0739297348896106, |
| "learning_rate": 1.5844769798541957e-06, |
| "loss": 0.434, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.3227654698242933, |
| "grad_norm": 2.7875432362127843, |
| "learning_rate": 1.5834725466332777e-06, |
| "loss": 0.4314, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.32314744079449964, |
| "grad_norm": 8.036151517702406, |
| "learning_rate": 1.5824672201177469e-06, |
| "loss": 0.524, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.3235294117647059, |
| "grad_norm": 2.9892156832631587, |
| "learning_rate": 1.5814610018467552e-06, |
| "loss": 0.6175, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.32391138273491216, |
| "grad_norm": 4.311205923674658, |
| "learning_rate": 1.5804538933608194e-06, |
| "loss": 0.5605, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.3242933537051184, |
| "grad_norm": 3.5892677119039305, |
| "learning_rate": 1.5794458962018195e-06, |
| "loss": 0.4768, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.3246753246753247, |
| "grad_norm": 6.4505241661083845, |
| "learning_rate": 1.5784370119129964e-06, |
| "loss": 0.5404, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.32505729564553093, |
| "grad_norm": 12.166502490348956, |
| "learning_rate": 1.5774272420389481e-06, |
| "loss": 0.5269, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.3254392666157372, |
| "grad_norm": 11.495508400688843, |
| "learning_rate": 1.576416588125629e-06, |
| "loss": 0.4747, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.32582123758594345, |
| "grad_norm": 3.701605295686685, |
| "learning_rate": 1.5754050517203477e-06, |
| "loss": 0.5328, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.32620320855614976, |
| "grad_norm": 15.817995551065785, |
| "learning_rate": 1.5743926343717626e-06, |
| "loss": 0.5315, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.326585179526356, |
| "grad_norm": 13.590005349057503, |
| "learning_rate": 1.5733793376298818e-06, |
| "loss": 0.5502, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.3269671504965623, |
| "grad_norm": 3.0797220814221093, |
| "learning_rate": 1.572365163046059e-06, |
| "loss": 0.5087, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.32734912146676853, |
| "grad_norm": 11.583847119214154, |
| "learning_rate": 1.571350112172993e-06, |
| "loss": 0.4712, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.3277310924369748, |
| "grad_norm": 3.2961430613576517, |
| "learning_rate": 1.5703341865647225e-06, |
| "loss": 0.501, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.32811306340718105, |
| "grad_norm": 3.928057328770376, |
| "learning_rate": 1.569317387776627e-06, |
| "loss": 0.5554, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.3284950343773873, |
| "grad_norm": 2.5322774096420075, |
| "learning_rate": 1.5682997173654217e-06, |
| "loss": 0.5155, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.32887700534759357, |
| "grad_norm": 6.961657500082668, |
| "learning_rate": 1.567281176889158e-06, |
| "loss": 0.5751, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.3292589763177998, |
| "grad_norm": 2.686658068544792, |
| "learning_rate": 1.5662617679072166e-06, |
| "loss": 0.4305, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.32964094728800614, |
| "grad_norm": 3.691449670890614, |
| "learning_rate": 1.56524149198031e-06, |
| "loss": 0.4606, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.3300229182582124, |
| "grad_norm": 5.90480604880744, |
| "learning_rate": 1.5642203506704777e-06, |
| "loss": 0.5178, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.33040488922841865, |
| "grad_norm": 3.246819425219357, |
| "learning_rate": 1.5631983455410835e-06, |
| "loss": 0.5048, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.3307868601986249, |
| "grad_norm": 2.9229315626683285, |
| "learning_rate": 1.562175478156814e-06, |
| "loss": 0.5188, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.33116883116883117, |
| "grad_norm": 3.9641201967098465, |
| "learning_rate": 1.5611517500836757e-06, |
| "loss": 0.4967, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.3315508021390374, |
| "grad_norm": 2.964095577159658, |
| "learning_rate": 1.5601271628889939e-06, |
| "loss": 0.4629, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.3319327731092437, |
| "grad_norm": 3.108639008409921, |
| "learning_rate": 1.5591017181414072e-06, |
| "loss": 0.5235, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.33231474407944994, |
| "grad_norm": 5.145992404038292, |
| "learning_rate": 1.558075417410869e-06, |
| "loss": 0.4824, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.3326967150496562, |
| "grad_norm": 4.293286297651328, |
| "learning_rate": 1.557048262268642e-06, |
| "loss": 0.5072, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.3330786860198625, |
| "grad_norm": 2.4399864551955286, |
| "learning_rate": 1.556020254287298e-06, |
| "loss": 0.5023, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.33346065699006877, |
| "grad_norm": 3.949450729997129, |
| "learning_rate": 1.5549913950407132e-06, |
| "loss": 0.5172, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.33384262796027503, |
| "grad_norm": 3.399165386574725, |
| "learning_rate": 1.5539616861040688e-06, |
| "loss": 0.5144, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.3342245989304813, |
| "grad_norm": 4.669292406099275, |
| "learning_rate": 1.552931129053845e-06, |
| "loss": 0.6242, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.33460656990068754, |
| "grad_norm": 3.1373804904522458, |
| "learning_rate": 1.5518997254678217e-06, |
| "loss": 0.5548, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.3349885408708938, |
| "grad_norm": 2.916976585751487, |
| "learning_rate": 1.5508674769250748e-06, |
| "loss": 0.5318, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.33537051184110006, |
| "grad_norm": 3.5908802384512413, |
| "learning_rate": 1.549834385005973e-06, |
| "loss": 0.4623, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.3357524828113063, |
| "grad_norm": 2.200974095180301, |
| "learning_rate": 1.5488004512921768e-06, |
| "loss": 0.4454, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.33613445378151263, |
| "grad_norm": 3.3527657355943656, |
| "learning_rate": 1.547765677366636e-06, |
| "loss": 0.427, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.3365164247517189, |
| "grad_norm": 2.837928974571468, |
| "learning_rate": 1.5467300648135859e-06, |
| "loss": 0.4713, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.33689839572192515, |
| "grad_norm": 2.788964382737859, |
| "learning_rate": 1.5456936152185458e-06, |
| "loss": 0.4874, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.3372803666921314, |
| "grad_norm": 6.609913165528445, |
| "learning_rate": 1.5446563301683169e-06, |
| "loss": 0.4809, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.33766233766233766, |
| "grad_norm": 3.0894351341308135, |
| "learning_rate": 1.5436182112509793e-06, |
| "loss": 0.4754, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.3380443086325439, |
| "grad_norm": 2.98238262427399, |
| "learning_rate": 1.5425792600558898e-06, |
| "loss": 0.5204, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.3384262796027502, |
| "grad_norm": 3.3134083797259413, |
| "learning_rate": 1.541539478173679e-06, |
| "loss": 0.461, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.33880825057295644, |
| "grad_norm": 3.0428586325349434, |
| "learning_rate": 1.5404988671962505e-06, |
| "loss": 0.5149, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.3391902215431627, |
| "grad_norm": 8.556822675299031, |
| "learning_rate": 1.5394574287167756e-06, |
| "loss": 0.5548, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.339572192513369, |
| "grad_norm": 40.331980616781514, |
| "learning_rate": 1.5384151643296935e-06, |
| "loss": 0.5335, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.33995416348357527, |
| "grad_norm": 3.2769661613654697, |
| "learning_rate": 1.5373720756307078e-06, |
| "loss": 0.5156, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.3403361344537815, |
| "grad_norm": 2.630930679330716, |
| "learning_rate": 1.536328164216784e-06, |
| "loss": 0.5501, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.3407181054239878, |
| "grad_norm": 3.487737259280104, |
| "learning_rate": 1.5352834316861472e-06, |
| "loss": 0.4984, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.34110007639419404, |
| "grad_norm": 3.1969075890967336, |
| "learning_rate": 1.534237879638279e-06, |
| "loss": 0.4516, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.3414820473644003, |
| "grad_norm": 4.640794216980847, |
| "learning_rate": 1.5331915096739172e-06, |
| "loss": 0.4934, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.34186401833460656, |
| "grad_norm": 3.252307864765291, |
| "learning_rate": 1.53214432339505e-06, |
| "loss": 0.5368, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.3422459893048128, |
| "grad_norm": 5.048618425090819, |
| "learning_rate": 1.5310963224049172e-06, |
| "loss": 0.4647, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.3426279602750191, |
| "grad_norm": 6.500653003128015, |
| "learning_rate": 1.5300475083080038e-06, |
| "loss": 0.5108, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.3430099312452254, |
| "grad_norm": 6.8570957671235915, |
| "learning_rate": 1.528997882710042e-06, |
| "loss": 0.4578, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.34339190221543164, |
| "grad_norm": 2.578823000754853, |
| "learning_rate": 1.5279474472180045e-06, |
| "loss": 0.4267, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.3437738731856379, |
| "grad_norm": 3.414024136450465, |
| "learning_rate": 1.526896203440105e-06, |
| "loss": 0.5203, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.34415584415584416, |
| "grad_norm": 8.992620514875538, |
| "learning_rate": 1.525844152985794e-06, |
| "loss": 0.5287, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.3445378151260504, |
| "grad_norm": 4.945730735463921, |
| "learning_rate": 1.5247912974657579e-06, |
| "loss": 0.6097, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.3449197860962567, |
| "grad_norm": 5.514250273126038, |
| "learning_rate": 1.523737638491915e-06, |
| "loss": 0.5202, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.34530175706646293, |
| "grad_norm": 3.6791103306942485, |
| "learning_rate": 1.5226831776774139e-06, |
| "loss": 0.4856, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.3456837280366692, |
| "grad_norm": 4.689131774070413, |
| "learning_rate": 1.5216279166366306e-06, |
| "loss": 0.5033, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.3460656990068755, |
| "grad_norm": 2.189339157882484, |
| "learning_rate": 1.5205718569851665e-06, |
| "loss": 0.4472, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.34644766997708176, |
| "grad_norm": 6.16605398997605, |
| "learning_rate": 1.5195150003398455e-06, |
| "loss": 0.5039, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.346829640947288, |
| "grad_norm": 110.6126110619747, |
| "learning_rate": 1.5184573483187116e-06, |
| "loss": 0.4873, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.3472116119174943, |
| "grad_norm": 2.7540819070670244, |
| "learning_rate": 1.517398902541027e-06, |
| "loss": 0.5104, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.34759358288770054, |
| "grad_norm": 3.353857432293008, |
| "learning_rate": 1.5163396646272686e-06, |
| "loss": 0.568, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.3479755538579068, |
| "grad_norm": 3.8221680503464, |
| "learning_rate": 1.5152796361991264e-06, |
| "loss": 0.4608, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.34835752482811305, |
| "grad_norm": 5.973897087213475, |
| "learning_rate": 1.5142188188795005e-06, |
| "loss": 0.4826, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.3487394957983193, |
| "grad_norm": 3.3970328884940275, |
| "learning_rate": 1.5131572142924989e-06, |
| "loss": 0.5158, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.34912146676852557, |
| "grad_norm": 4.76313072091096, |
| "learning_rate": 1.5120948240634347e-06, |
| "loss": 0.4796, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.3495034377387319, |
| "grad_norm": 5.967721554053427, |
| "learning_rate": 1.511031649818824e-06, |
| "loss": 0.5212, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.34988540870893814, |
| "grad_norm": 2.9216360097531178, |
| "learning_rate": 1.5099676931863834e-06, |
| "loss": 0.5179, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.3502673796791444, |
| "grad_norm": 3.172000569956292, |
| "learning_rate": 1.5089029557950271e-06, |
| "loss": 0.5656, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.35064935064935066, |
| "grad_norm": 5.813299529779351, |
| "learning_rate": 1.507837439274864e-06, |
| "loss": 0.4995, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.3510313216195569, |
| "grad_norm": 3.7781237809507098, |
| "learning_rate": 1.5067711452571975e-06, |
| "loss": 0.5843, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.35141329258976317, |
| "grad_norm": 10.782452205742599, |
| "learning_rate": 1.5057040753745195e-06, |
| "loss": 0.5142, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.35179526355996943, |
| "grad_norm": 5.7483669629282454, |
| "learning_rate": 1.5046362312605111e-06, |
| "loss": 0.5085, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.3521772345301757, |
| "grad_norm": 4.458336973237563, |
| "learning_rate": 1.5035676145500381e-06, |
| "loss": 0.5518, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.35255920550038194, |
| "grad_norm": 3.4207203325004905, |
| "learning_rate": 1.5024982268791492e-06, |
| "loss": 0.5429, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.35294117647058826, |
| "grad_norm": 2.7591171419666174, |
| "learning_rate": 1.5014280698850738e-06, |
| "loss": 0.4842, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.3533231474407945, |
| "grad_norm": 2.209637927985218, |
| "learning_rate": 1.5003571452062188e-06, |
| "loss": 0.4312, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.3537051184110008, |
| "grad_norm": 4.7613159279196005, |
| "learning_rate": 1.499285454482166e-06, |
| "loss": 0.5084, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.35408708938120703, |
| "grad_norm": 3.4608843679091894, |
| "learning_rate": 1.498212999353671e-06, |
| "loss": 0.5299, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.3544690603514133, |
| "grad_norm": 2.9484392042079395, |
| "learning_rate": 1.497139781462659e-06, |
| "loss": 0.5075, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.35485103132161955, |
| "grad_norm": 4.041471908677798, |
| "learning_rate": 1.496065802452223e-06, |
| "loss": 0.4838, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.3552330022918258, |
| "grad_norm": 3.0586178857981374, |
| "learning_rate": 1.4949910639666217e-06, |
| "loss": 0.464, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.35561497326203206, |
| "grad_norm": 8.57318431884878, |
| "learning_rate": 1.4939155676512765e-06, |
| "loss": 0.5569, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.3559969442322384, |
| "grad_norm": 2.91974056190333, |
| "learning_rate": 1.4928393151527682e-06, |
| "loss": 0.5208, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.35637891520244463, |
| "grad_norm": 4.689793265209578, |
| "learning_rate": 1.4917623081188367e-06, |
| "loss": 0.4852, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.3567608861726509, |
| "grad_norm": 43.24004926210291, |
| "learning_rate": 1.4906845481983756e-06, |
| "loss": 0.5409, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.35714285714285715, |
| "grad_norm": 2.663567979947614, |
| "learning_rate": 1.4896060370414327e-06, |
| "loss": 0.4745, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.3575248281130634, |
| "grad_norm": 5.681770625260996, |
| "learning_rate": 1.4885267762992046e-06, |
| "loss": 0.5177, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.35790679908326967, |
| "grad_norm": 2.8746557940398936, |
| "learning_rate": 1.4874467676240361e-06, |
| "loss": 0.4514, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.3582887700534759, |
| "grad_norm": 2.72188447367986, |
| "learning_rate": 1.4863660126694172e-06, |
| "loss": 0.4785, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.3586707410236822, |
| "grad_norm": 3.6175411817892997, |
| "learning_rate": 1.4852845130899801e-06, |
| "loss": 0.4918, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.35905271199388844, |
| "grad_norm": 20.16523000109139, |
| "learning_rate": 1.4842022705414975e-06, |
| "loss": 0.5141, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.35943468296409475, |
| "grad_norm": 3.31480666789411, |
| "learning_rate": 1.4831192866808789e-06, |
| "loss": 0.575, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.359816653934301, |
| "grad_norm": 5.126295586451847, |
| "learning_rate": 1.4820355631661695e-06, |
| "loss": 0.4575, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.36019862490450727, |
| "grad_norm": 4.398695016233855, |
| "learning_rate": 1.4809511016565467e-06, |
| "loss": 0.4898, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.3605805958747135, |
| "grad_norm": 5.322409252746599, |
| "learning_rate": 1.4798659038123174e-06, |
| "loss": 0.4963, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.3609625668449198, |
| "grad_norm": 2.9183001547754595, |
| "learning_rate": 1.478779971294916e-06, |
| "loss": 0.4457, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.36134453781512604, |
| "grad_norm": 36.832544737198184, |
| "learning_rate": 1.4776933057669015e-06, |
| "loss": 0.5173, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.3617265087853323, |
| "grad_norm": 3.408698119711173, |
| "learning_rate": 1.4766059088919556e-06, |
| "loss": 0.563, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.36210847975553856, |
| "grad_norm": 7.428213130895087, |
| "learning_rate": 1.4755177823348796e-06, |
| "loss": 0.4943, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.3624904507257448, |
| "grad_norm": 4.738013482670962, |
| "learning_rate": 1.4744289277615915e-06, |
| "loss": 0.5229, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.36287242169595113, |
| "grad_norm": 2.378619303414155, |
| "learning_rate": 1.4733393468391246e-06, |
| "loss": 0.4536, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.3632543926661574, |
| "grad_norm": 3.403223882172209, |
| "learning_rate": 1.4722490412356234e-06, |
| "loss": 0.5731, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.36363636363636365, |
| "grad_norm": 4.240397305405459, |
| "learning_rate": 1.4711580126203425e-06, |
| "loss": 0.5746, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.3640183346065699, |
| "grad_norm": 2.5189168596943823, |
| "learning_rate": 1.4700662626636432e-06, |
| "loss": 0.4387, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.36440030557677616, |
| "grad_norm": 6.257987655553435, |
| "learning_rate": 1.4689737930369916e-06, |
| "loss": 0.455, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.3647822765469824, |
| "grad_norm": 2.5872762152510487, |
| "learning_rate": 1.4678806054129546e-06, |
| "loss": 0.4462, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.3651642475171887, |
| "grad_norm": 4.1215541610904936, |
| "learning_rate": 1.4667867014651992e-06, |
| "loss": 0.5034, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.36554621848739494, |
| "grad_norm": 4.878479213331288, |
| "learning_rate": 1.465692082868489e-06, |
| "loss": 0.6026, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.36592818945760125, |
| "grad_norm": 6.856687402344972, |
| "learning_rate": 1.4645967512986817e-06, |
| "loss": 0.5123, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.3663101604278075, |
| "grad_norm": 5.736416942272037, |
| "learning_rate": 1.463500708432726e-06, |
| "loss": 0.5408, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.36669213139801377, |
| "grad_norm": 7.295068052144105, |
| "learning_rate": 1.46240395594866e-06, |
| "loss": 0.4872, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.36707410236822, |
| "grad_norm": 6.789232985077266, |
| "learning_rate": 1.461306495525609e-06, |
| "loss": 0.5294, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.3674560733384263, |
| "grad_norm": 10.419922076787683, |
| "learning_rate": 1.4602083288437807e-06, |
| "loss": 0.5054, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.36783804430863254, |
| "grad_norm": 3.8318471946405115, |
| "learning_rate": 1.459109457584465e-06, |
| "loss": 0.6145, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.3682200152788388, |
| "grad_norm": 2.7453447380279865, |
| "learning_rate": 1.4580098834300306e-06, |
| "loss": 0.4706, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.36860198624904505, |
| "grad_norm": 3.3957056908352534, |
| "learning_rate": 1.4569096080639217e-06, |
| "loss": 0.4816, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.3689839572192513, |
| "grad_norm": 21.643808173296733, |
| "learning_rate": 1.4558086331706566e-06, |
| "loss": 0.5029, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.3693659281894576, |
| "grad_norm": 3.509279777143902, |
| "learning_rate": 1.4547069604358237e-06, |
| "loss": 0.4814, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.3697478991596639, |
| "grad_norm": 2.7867056631866935, |
| "learning_rate": 1.4536045915460815e-06, |
| "loss": 0.4967, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.37012987012987014, |
| "grad_norm": 5.711653548074173, |
| "learning_rate": 1.4525015281891525e-06, |
| "loss": 0.543, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.3705118411000764, |
| "grad_norm": 3.753851754021124, |
| "learning_rate": 1.451397772053824e-06, |
| "loss": 0.4908, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.37089381207028266, |
| "grad_norm": 3.0791284718021577, |
| "learning_rate": 1.450293324829942e-06, |
| "loss": 0.4725, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.3712757830404889, |
| "grad_norm": 3.5159822093311144, |
| "learning_rate": 1.4491881882084122e-06, |
| "loss": 0.5371, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.3716577540106952, |
| "grad_norm": 3.6907461066883953, |
| "learning_rate": 1.4480823638811957e-06, |
| "loss": 0.5091, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.37203972498090143, |
| "grad_norm": 18.50413628464325, |
| "learning_rate": 1.4469758535413056e-06, |
| "loss": 0.5464, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.3724216959511077, |
| "grad_norm": 37.857841620925036, |
| "learning_rate": 1.4458686588828055e-06, |
| "loss": 0.4892, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.372803666921314, |
| "grad_norm": 4.062087723456443, |
| "learning_rate": 1.4447607816008073e-06, |
| "loss": 0.4883, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.37318563789152026, |
| "grad_norm": 4.0502899507248475, |
| "learning_rate": 1.4436522233914676e-06, |
| "loss": 0.5115, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.3735676088617265, |
| "grad_norm": 3.2830982872044614, |
| "learning_rate": 1.442542985951985e-06, |
| "loss": 0.5604, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.3739495798319328, |
| "grad_norm": 3.7574384597214383, |
| "learning_rate": 1.4414330709805988e-06, |
| "loss": 0.4615, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.37433155080213903, |
| "grad_norm": 9.626314969376834, |
| "learning_rate": 1.4403224801765856e-06, |
| "loss": 0.4969, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.3747135217723453, |
| "grad_norm": 3.031009660999545, |
| "learning_rate": 1.439211215240256e-06, |
| "loss": 0.5262, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.37509549274255155, |
| "grad_norm": 3.6146467716884856, |
| "learning_rate": 1.438099277872953e-06, |
| "loss": 0.5456, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.3754774637127578, |
| "grad_norm": 2.5938956320763737, |
| "learning_rate": 1.4369866697770494e-06, |
| "loss": 0.5533, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.3758594346829641, |
| "grad_norm": 6.142448242177804, |
| "learning_rate": 1.435873392655945e-06, |
| "loss": 0.4812, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.3762414056531704, |
| "grad_norm": 3.5063299893044184, |
| "learning_rate": 1.4347594482140629e-06, |
| "loss": 0.4996, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.37662337662337664, |
| "grad_norm": 3.1597222773438127, |
| "learning_rate": 1.4336448381568484e-06, |
| "loss": 0.4993, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.3770053475935829, |
| "grad_norm": 4.8775854732586295, |
| "learning_rate": 1.4325295641907666e-06, |
| "loss": 0.4694, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.37738731856378915, |
| "grad_norm": 13.788910507294815, |
| "learning_rate": 1.4314136280232983e-06, |
| "loss": 0.4994, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.3777692895339954, |
| "grad_norm": 5.68951446399639, |
| "learning_rate": 1.4302970313629375e-06, |
| "loss": 0.5316, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.37815126050420167, |
| "grad_norm": 2.690273908691655, |
| "learning_rate": 1.429179775919191e-06, |
| "loss": 0.5368, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.3785332314744079, |
| "grad_norm": 3.22293202334982, |
| "learning_rate": 1.428061863402573e-06, |
| "loss": 0.4405, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.3789152024446142, |
| "grad_norm": 2.495156170749678, |
| "learning_rate": 1.426943295524604e-06, |
| "loss": 0.4883, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.3792971734148205, |
| "grad_norm": 7.075696664168943, |
| "learning_rate": 1.4258240739978073e-06, |
| "loss": 0.5525, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.37967914438502676, |
| "grad_norm": 3.803537398333798, |
| "learning_rate": 1.4247042005357086e-06, |
| "loss": 0.5246, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.380061115355233, |
| "grad_norm": 6.842575378629883, |
| "learning_rate": 1.4235836768528297e-06, |
| "loss": 0.5811, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.3804430863254393, |
| "grad_norm": 3.0681376523619655, |
| "learning_rate": 1.4224625046646895e-06, |
| "loss": 0.5135, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.38082505729564553, |
| "grad_norm": 3.347087827176987, |
| "learning_rate": 1.4213406856877983e-06, |
| "loss": 0.4539, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.3812070282658518, |
| "grad_norm": 4.27287433479762, |
| "learning_rate": 1.420218221639658e-06, |
| "loss": 0.5221, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.38158899923605805, |
| "grad_norm": 3.439342338737529, |
| "learning_rate": 1.4190951142387574e-06, |
| "loss": 0.5051, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.3819709702062643, |
| "grad_norm": 3.194297131410048, |
| "learning_rate": 1.4179713652045701e-06, |
| "loss": 0.4774, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.38235294117647056, |
| "grad_norm": 13.77060808359035, |
| "learning_rate": 1.416846976257552e-06, |
| "loss": 0.3837, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.3827349121466769, |
| "grad_norm": 4.67459855390606, |
| "learning_rate": 1.4157219491191402e-06, |
| "loss": 0.4918, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.38311688311688313, |
| "grad_norm": 5.012892064717471, |
| "learning_rate": 1.4145962855117463e-06, |
| "loss": 0.5793, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.3834988540870894, |
| "grad_norm": 3.2392189698830522, |
| "learning_rate": 1.4134699871587583e-06, |
| "loss": 0.4695, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.38388082505729565, |
| "grad_norm": 3.4936570085767307, |
| "learning_rate": 1.4123430557845355e-06, |
| "loss": 0.4915, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.3842627960275019, |
| "grad_norm": 4.30960732257421, |
| "learning_rate": 1.4112154931144065e-06, |
| "loss": 0.5364, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.38464476699770817, |
| "grad_norm": 4.6293362264783475, |
| "learning_rate": 1.4100873008746654e-06, |
| "loss": 0.4841, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.3850267379679144, |
| "grad_norm": 4.359299187255675, |
| "learning_rate": 1.4089584807925712e-06, |
| "loss": 0.5463, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.3854087089381207, |
| "grad_norm": 5.72992461473088, |
| "learning_rate": 1.4078290345963442e-06, |
| "loss": 0.4207, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.385790679908327, |
| "grad_norm": 3.166354780239239, |
| "learning_rate": 1.4066989640151628e-06, |
| "loss": 0.5181, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.38617265087853325, |
| "grad_norm": 12.031522401908607, |
| "learning_rate": 1.4055682707791613e-06, |
| "loss": 0.5968, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.3865546218487395, |
| "grad_norm": 5.0095346697800265, |
| "learning_rate": 1.4044369566194272e-06, |
| "loss": 0.5062, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.38693659281894577, |
| "grad_norm": 3.876436159087894, |
| "learning_rate": 1.4033050232679994e-06, |
| "loss": 0.5436, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.387318563789152, |
| "grad_norm": 5.328253283162843, |
| "learning_rate": 1.4021724724578643e-06, |
| "loss": 0.5046, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.3877005347593583, |
| "grad_norm": 3.774429591348757, |
| "learning_rate": 1.4010393059229531e-06, |
| "loss": 0.4722, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.38808250572956454, |
| "grad_norm": 3.1480818323642943, |
| "learning_rate": 1.3999055253981404e-06, |
| "loss": 0.5092, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.3884644766997708, |
| "grad_norm": 3.365454476940935, |
| "learning_rate": 1.3987711326192407e-06, |
| "loss": 0.5091, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.38884644766997706, |
| "grad_norm": 4.863633592183512, |
| "learning_rate": 1.3976361293230057e-06, |
| "loss": 0.533, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.38922841864018337, |
| "grad_norm": 6.886090786327392, |
| "learning_rate": 1.3965005172471216e-06, |
| "loss": 0.4529, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.38961038961038963, |
| "grad_norm": 7.3423065455983325, |
| "learning_rate": 1.3953642981302069e-06, |
| "loss": 0.5695, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.3899923605805959, |
| "grad_norm": 5.385929885807556, |
| "learning_rate": 1.39422747371181e-06, |
| "loss": 0.5426, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.39037433155080214, |
| "grad_norm": 2.5061006448252496, |
| "learning_rate": 1.3930900457324049e-06, |
| "loss": 0.4652, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.3907563025210084, |
| "grad_norm": 3.441807437463952, |
| "learning_rate": 1.39195201593339e-06, |
| "loss": 0.4943, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.39113827349121466, |
| "grad_norm": 2.7967035679368677, |
| "learning_rate": 1.390813386057086e-06, |
| "loss": 0.4768, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.3915202444614209, |
| "grad_norm": 12.060161384187719, |
| "learning_rate": 1.3896741578467312e-06, |
| "loss": 0.5359, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.3919022154316272, |
| "grad_norm": 4.632740176202578, |
| "learning_rate": 1.3885343330464806e-06, |
| "loss": 0.4465, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.39228418640183343, |
| "grad_norm": 5.177448822725735, |
| "learning_rate": 1.3873939134014018e-06, |
| "loss": 0.4365, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.39266615737203975, |
| "grad_norm": 3.291902676662862, |
| "learning_rate": 1.3862529006574746e-06, |
| "loss": 0.5519, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.393048128342246, |
| "grad_norm": 18.431443968539902, |
| "learning_rate": 1.385111296561585e-06, |
| "loss": 0.5076, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.39343009931245226, |
| "grad_norm": 5.846749288635415, |
| "learning_rate": 1.3839691028615256e-06, |
| "loss": 0.4993, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.3938120702826585, |
| "grad_norm": 2.518950053819085, |
| "learning_rate": 1.3828263213059915e-06, |
| "loss": 0.5063, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.3941940412528648, |
| "grad_norm": 4.014978335278681, |
| "learning_rate": 1.3816829536445772e-06, |
| "loss": 0.5106, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.39457601222307104, |
| "grad_norm": 2.3272006995407195, |
| "learning_rate": 1.380539001627775e-06, |
| "loss": 0.443, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.3949579831932773, |
| "grad_norm": 3.199604096797627, |
| "learning_rate": 1.3793944670069722e-06, |
| "loss": 0.5355, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.39533995416348355, |
| "grad_norm": 16.726178083364527, |
| "learning_rate": 1.3782493515344475e-06, |
| "loss": 0.3989, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.39572192513368987, |
| "grad_norm": 4.009218609638227, |
| "learning_rate": 1.377103656963369e-06, |
| "loss": 0.5018, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.3961038961038961, |
| "grad_norm": 4.280555960177796, |
| "learning_rate": 1.3759573850477912e-06, |
| "loss": 0.4962, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.3964858670741024, |
| "grad_norm": 4.8440714534516465, |
| "learning_rate": 1.3748105375426529e-06, |
| "loss": 0.5608, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.39686783804430864, |
| "grad_norm": 4.633871486245415, |
| "learning_rate": 1.3736631162037742e-06, |
| "loss": 0.518, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.3972498090145149, |
| "grad_norm": 4.724392552772769, |
| "learning_rate": 1.3725151227878535e-06, |
| "loss": 0.5363, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.39763177998472116, |
| "grad_norm": 3.818997639249547, |
| "learning_rate": 1.3713665590524646e-06, |
| "loss": 0.5116, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.3980137509549274, |
| "grad_norm": 4.386142417291277, |
| "learning_rate": 1.3702174267560556e-06, |
| "loss": 0.5294, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.3983957219251337, |
| "grad_norm": 4.4357329961026934, |
| "learning_rate": 1.369067727657944e-06, |
| "loss": 0.5314, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.39877769289533993, |
| "grad_norm": 21.471149918132554, |
| "learning_rate": 1.3679174635183153e-06, |
| "loss": 0.5344, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.39915966386554624, |
| "grad_norm": 2.8010450710974797, |
| "learning_rate": 1.3667666360982203e-06, |
| "loss": 0.4522, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.3995416348357525, |
| "grad_norm": 3.5143617209461953, |
| "learning_rate": 1.3656152471595728e-06, |
| "loss": 0.4183, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.39992360580595876, |
| "grad_norm": 8.181944527126483, |
| "learning_rate": 1.3644632984651448e-06, |
| "loss": 0.4511, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.400305576776165, |
| "grad_norm": 5.379745216859139, |
| "learning_rate": 1.3633107917785664e-06, |
| "loss": 0.421, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.4006875477463713, |
| "grad_norm": 2.4402346546106646, |
| "learning_rate": 1.3621577288643216e-06, |
| "loss": 0.4628, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.40106951871657753, |
| "grad_norm": 3.9850365443403932, |
| "learning_rate": 1.361004111487746e-06, |
| "loss": 0.4997, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.4014514896867838, |
| "grad_norm": 2.704222372248545, |
| "learning_rate": 1.3598499414150246e-06, |
| "loss": 0.5521, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.40183346065699005, |
| "grad_norm": 2.8845884083437605, |
| "learning_rate": 1.3586952204131872e-06, |
| "loss": 0.5612, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.4022154316271963, |
| "grad_norm": 8.402444586867139, |
| "learning_rate": 1.3575399502501087e-06, |
| "loss": 0.5217, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.4025974025974026, |
| "grad_norm": 10.742726804172774, |
| "learning_rate": 1.356384132694504e-06, |
| "loss": 0.5248, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.4029793735676089, |
| "grad_norm": 6.63035505780104, |
| "learning_rate": 1.3552277695159263e-06, |
| "loss": 0.4687, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.40336134453781514, |
| "grad_norm": 3.0647578675508713, |
| "learning_rate": 1.3540708624847627e-06, |
| "loss": 0.4879, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.4037433155080214, |
| "grad_norm": 3.245881096555954, |
| "learning_rate": 1.3529134133722357e-06, |
| "loss": 0.4494, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.40412528647822765, |
| "grad_norm": 3.162026694383143, |
| "learning_rate": 1.3517554239503954e-06, |
| "loss": 0.477, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.4045072574484339, |
| "grad_norm": 2.3227326186227257, |
| "learning_rate": 1.3505968959921203e-06, |
| "loss": 0.4746, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.40488922841864017, |
| "grad_norm": 2.766756314398787, |
| "learning_rate": 1.3494378312711127e-06, |
| "loss": 0.5027, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.4052711993888464, |
| "grad_norm": 3.7607550125463973, |
| "learning_rate": 1.348278231561897e-06, |
| "loss": 0.4389, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.40565317035905274, |
| "grad_norm": 7.409561698492238, |
| "learning_rate": 1.3471180986398171e-06, |
| "loss": 0.5558, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.406035141329259, |
| "grad_norm": 3.782115767604086, |
| "learning_rate": 1.3459574342810323e-06, |
| "loss": 0.482, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.40641711229946526, |
| "grad_norm": 4.512926298981771, |
| "learning_rate": 1.3447962402625162e-06, |
| "loss": 0.5108, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.4067990832696715, |
| "grad_norm": 4.105292005167865, |
| "learning_rate": 1.3436345183620534e-06, |
| "loss": 0.5059, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.40718105423987777, |
| "grad_norm": 6.221311275182224, |
| "learning_rate": 1.3424722703582361e-06, |
| "loss": 0.4603, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.40756302521008403, |
| "grad_norm": 4.414167751227817, |
| "learning_rate": 1.3413094980304624e-06, |
| "loss": 0.5248, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.4079449961802903, |
| "grad_norm": 12.937308649413591, |
| "learning_rate": 1.340146203158933e-06, |
| "loss": 0.4323, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.40832696715049654, |
| "grad_norm": 5.121224260075271, |
| "learning_rate": 1.338982387524649e-06, |
| "loss": 0.5008, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.4087089381207028, |
| "grad_norm": 3.972389452208772, |
| "learning_rate": 1.337818052909408e-06, |
| "loss": 0.5284, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.4090909090909091, |
| "grad_norm": 3.4747430919143008, |
| "learning_rate": 1.3366532010958026e-06, |
| "loss": 0.5428, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.4094728800611154, |
| "grad_norm": 2.7257266623536758, |
| "learning_rate": 1.3354878338672175e-06, |
| "loss": 0.4877, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.40985485103132163, |
| "grad_norm": 3.5251937039902557, |
| "learning_rate": 1.3343219530078262e-06, |
| "loss": 0.5642, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.4102368220015279, |
| "grad_norm": 2.655070455453984, |
| "learning_rate": 1.3331555603025883e-06, |
| "loss": 0.505, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.41061879297173415, |
| "grad_norm": 3.4577485873396485, |
| "learning_rate": 1.3319886575372473e-06, |
| "loss": 0.4826, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.4110007639419404, |
| "grad_norm": 2.8641674357455105, |
| "learning_rate": 1.3308212464983281e-06, |
| "loss": 0.495, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.41138273491214666, |
| "grad_norm": 5.807472208222438, |
| "learning_rate": 1.3296533289731331e-06, |
| "loss": 0.5409, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.4117647058823529, |
| "grad_norm": 4.120319959181346, |
| "learning_rate": 1.3284849067497397e-06, |
| "loss": 0.513, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.4121466768525592, |
| "grad_norm": 2.579678150552606, |
| "learning_rate": 1.3273159816169996e-06, |
| "loss": 0.4294, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.4125286478227655, |
| "grad_norm": 2.268653454252923, |
| "learning_rate": 1.3261465553645328e-06, |
| "loss": 0.432, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.41291061879297175, |
| "grad_norm": 3.025464669627552, |
| "learning_rate": 1.3249766297827273e-06, |
| "loss": 0.4897, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.413292589763178, |
| "grad_norm": 2.581761258324958, |
| "learning_rate": 1.3238062066627355e-06, |
| "loss": 0.488, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.41367456073338427, |
| "grad_norm": 5.55222952808423, |
| "learning_rate": 1.3226352877964715e-06, |
| "loss": 0.5, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.4140565317035905, |
| "grad_norm": 3.507013858142906, |
| "learning_rate": 1.3214638749766084e-06, |
| "loss": 0.4684, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.4144385026737968, |
| "grad_norm": 8.364119968396363, |
| "learning_rate": 1.3202919699965756e-06, |
| "loss": 0.5217, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.41482047364400304, |
| "grad_norm": 6.241126374949574, |
| "learning_rate": 1.3191195746505555e-06, |
| "loss": 0.5791, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.4152024446142093, |
| "grad_norm": 5.717811993856367, |
| "learning_rate": 1.3179466907334815e-06, |
| "loss": 0.5686, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.4155844155844156, |
| "grad_norm": 16.33849161999853, |
| "learning_rate": 1.316773320041036e-06, |
| "loss": 0.4645, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.41596638655462187, |
| "grad_norm": 4.365623914056201, |
| "learning_rate": 1.315599464369645e-06, |
| "loss": 0.5383, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.4163483575248281, |
| "grad_norm": 6.688011636580966, |
| "learning_rate": 1.3144251255164784e-06, |
| "loss": 0.4655, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.4167303284950344, |
| "grad_norm": 6.08767403439254, |
| "learning_rate": 1.313250305279445e-06, |
| "loss": 0.491, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.41711229946524064, |
| "grad_norm": 4.5512808754921465, |
| "learning_rate": 1.3120750054571904e-06, |
| "loss": 0.4797, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.4174942704354469, |
| "grad_norm": 3.6174836414142613, |
| "learning_rate": 1.3108992278490955e-06, |
| "loss": 0.5254, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.41787624140565316, |
| "grad_norm": 2.5776694158723545, |
| "learning_rate": 1.309722974255272e-06, |
| "loss": 0.4617, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.4182582123758594, |
| "grad_norm": 3.392763958571057, |
| "learning_rate": 1.30854624647656e-06, |
| "loss": 0.5418, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.4186401833460657, |
| "grad_norm": 4.80249052847419, |
| "learning_rate": 1.3073690463145265e-06, |
| "loss": 0.511, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.419022154316272, |
| "grad_norm": 2.063868288679112, |
| "learning_rate": 1.3061913755714608e-06, |
| "loss": 0.4279, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.41940412528647825, |
| "grad_norm": 4.44752038172148, |
| "learning_rate": 1.3050132360503734e-06, |
| "loss": 0.5067, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.4197860962566845, |
| "grad_norm": 2.43079310683215, |
| "learning_rate": 1.3038346295549917e-06, |
| "loss": 0.5036, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.42016806722689076, |
| "grad_norm": 3.087986254246443, |
| "learning_rate": 1.3026555578897593e-06, |
| "loss": 0.4194, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.420550038197097, |
| "grad_norm": 2.861068330916895, |
| "learning_rate": 1.3014760228598301e-06, |
| "loss": 0.4778, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.4209320091673033, |
| "grad_norm": 3.1273940487669885, |
| "learning_rate": 1.3002960262710692e-06, |
| "loss": 0.4957, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.42131398013750954, |
| "grad_norm": 11.09705122642753, |
| "learning_rate": 1.2991155699300475e-06, |
| "loss": 0.4412, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.4216959511077158, |
| "grad_norm": 2.7211661339062307, |
| "learning_rate": 1.2979346556440395e-06, |
| "loss": 0.5002, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.42207792207792205, |
| "grad_norm": 2.7058097762325817, |
| "learning_rate": 1.296753285221022e-06, |
| "loss": 0.4455, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.42245989304812837, |
| "grad_norm": 3.33956849912416, |
| "learning_rate": 1.2955714604696691e-06, |
| "loss": 0.4393, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.4228418640183346, |
| "grad_norm": 4.30714280374409, |
| "learning_rate": 1.2943891831993501e-06, |
| "loss": 0.5359, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.4232238349885409, |
| "grad_norm": 3.737944346749512, |
| "learning_rate": 1.2932064552201284e-06, |
| "loss": 0.5078, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.42360580595874714, |
| "grad_norm": 5.440523550465423, |
| "learning_rate": 1.2920232783427566e-06, |
| "loss": 0.4403, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.4239877769289534, |
| "grad_norm": 3.729701007104042, |
| "learning_rate": 1.2908396543786746e-06, |
| "loss": 0.4342, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.42436974789915966, |
| "grad_norm": 15.43994590063205, |
| "learning_rate": 1.289655585140007e-06, |
| "loss": 0.4321, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.4247517188693659, |
| "grad_norm": 6.587078251439573, |
| "learning_rate": 1.28847107243956e-06, |
| "loss": 0.495, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.42513368983957217, |
| "grad_norm": 2.9102958858974572, |
| "learning_rate": 1.2872861180908193e-06, |
| "loss": 0.4504, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.4255156608097785, |
| "grad_norm": 3.945596990949721, |
| "learning_rate": 1.2861007239079452e-06, |
| "loss": 0.5611, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.42589763177998474, |
| "grad_norm": 11.583149332286014, |
| "learning_rate": 1.2849148917057734e-06, |
| "loss": 0.6082, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.426279602750191, |
| "grad_norm": 3.2093573724227946, |
| "learning_rate": 1.2837286232998086e-06, |
| "loss": 0.5386, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.42666157372039726, |
| "grad_norm": 4.195111563739433, |
| "learning_rate": 1.2825419205062242e-06, |
| "loss": 0.4815, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.4270435446906035, |
| "grad_norm": 4.241364490014807, |
| "learning_rate": 1.2813547851418587e-06, |
| "loss": 0.484, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.4274255156608098, |
| "grad_norm": 12.638732356072397, |
| "learning_rate": 1.2801672190242118e-06, |
| "loss": 0.506, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.42780748663101603, |
| "grad_norm": 3.4114092408838244, |
| "learning_rate": 1.2789792239714442e-06, |
| "loss": 0.4662, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.4281894576012223, |
| "grad_norm": 5.375163307961225, |
| "learning_rate": 1.2777908018023721e-06, |
| "loss": 0.5754, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 4.0498153971710344, |
| "learning_rate": 1.2766019543364666e-06, |
| "loss": 0.5991, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.42895339954163486, |
| "grad_norm": 5.283353271896474, |
| "learning_rate": 1.2754126833938484e-06, |
| "loss": 0.4709, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.4293353705118411, |
| "grad_norm": 2.9423837625564113, |
| "learning_rate": 1.2742229907952883e-06, |
| "loss": 0.4637, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.4297173414820474, |
| "grad_norm": 4.802382213000816, |
| "learning_rate": 1.2730328783622023e-06, |
| "loss": 0.5025, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.43009931245225363, |
| "grad_norm": 4.729613483707908, |
| "learning_rate": 1.2718423479166476e-06, |
| "loss": 0.5789, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.4304812834224599, |
| "grad_norm": 5.930344033965177, |
| "learning_rate": 1.270651401281323e-06, |
| "loss": 0.4907, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.43086325439266615, |
| "grad_norm": 6.399249174579611, |
| "learning_rate": 1.2694600402795643e-06, |
| "loss": 0.5434, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.4312452253628724, |
| "grad_norm": 4.725874325597204, |
| "learning_rate": 1.2682682667353414e-06, |
| "loss": 0.5078, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.43162719633307867, |
| "grad_norm": 3.127784542081372, |
| "learning_rate": 1.267076082473255e-06, |
| "loss": 0.5306, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.4320091673032849, |
| "grad_norm": 3.8951466721721224, |
| "learning_rate": 1.2658834893185364e-06, |
| "loss": 0.5297, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.43239113827349124, |
| "grad_norm": 5.319632220306428, |
| "learning_rate": 1.2646904890970419e-06, |
| "loss": 0.5037, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.4327731092436975, |
| "grad_norm": 12.156586498156457, |
| "learning_rate": 1.26349708363525e-06, |
| "loss": 0.4712, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.43315508021390375, |
| "grad_norm": 3.6579913095297862, |
| "learning_rate": 1.2623032747602618e-06, |
| "loss": 0.5496, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.43353705118411, |
| "grad_norm": 34.59162241017071, |
| "learning_rate": 1.2611090642997941e-06, |
| "loss": 0.482, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.43391902215431627, |
| "grad_norm": 4.465393830779184, |
| "learning_rate": 1.25991445408218e-06, |
| "loss": 0.4429, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.4343009931245225, |
| "grad_norm": 5.2710706198103106, |
| "learning_rate": 1.2587194459363634e-06, |
| "loss": 0.4629, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.4346829640947288, |
| "grad_norm": 5.0416569180088775, |
| "learning_rate": 1.2575240416918981e-06, |
| "loss": 0.5584, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.43506493506493504, |
| "grad_norm": 4.160767116481659, |
| "learning_rate": 1.2563282431789446e-06, |
| "loss": 0.5367, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.43544690603514136, |
| "grad_norm": 2.4278904417735303, |
| "learning_rate": 1.255132052228266e-06, |
| "loss": 0.4322, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.4358288770053476, |
| "grad_norm": 2.4236151260570815, |
| "learning_rate": 1.2539354706712273e-06, |
| "loss": 0.4969, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.4362108479755539, |
| "grad_norm": 12.539260411009332, |
| "learning_rate": 1.2527385003397906e-06, |
| "loss": 0.5147, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.43659281894576013, |
| "grad_norm": 4.294317064309806, |
| "learning_rate": 1.2515411430665142e-06, |
| "loss": 0.5108, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.4369747899159664, |
| "grad_norm": 7.562652888743942, |
| "learning_rate": 1.2503434006845481e-06, |
| "loss": 0.537, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.43735676088617265, |
| "grad_norm": 3.4712632895110094, |
| "learning_rate": 1.2491452750276317e-06, |
| "loss": 0.4542, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.4377387318563789, |
| "grad_norm": 5.924873661240008, |
| "learning_rate": 1.2479467679300922e-06, |
| "loss": 0.4853, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.43812070282658516, |
| "grad_norm": 5.020685434497532, |
| "learning_rate": 1.2467478812268395e-06, |
| "loss": 0.4446, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.4385026737967914, |
| "grad_norm": 4.134680076848503, |
| "learning_rate": 1.2455486167533657e-06, |
| "loss": 0.5461, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.43888464476699773, |
| "grad_norm": 3.7379112252874798, |
| "learning_rate": 1.2443489763457412e-06, |
| "loss": 0.4621, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.439266615737204, |
| "grad_norm": 3.151380948128957, |
| "learning_rate": 1.243148961840611e-06, |
| "loss": 0.4803, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.43964858670741025, |
| "grad_norm": 3.8402582005785755, |
| "learning_rate": 1.241948575075194e-06, |
| "loss": 0.4318, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.4400305576776165, |
| "grad_norm": 4.3539310487694785, |
| "learning_rate": 1.2407478178872778e-06, |
| "loss": 0.5722, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.44041252864782277, |
| "grad_norm": 85.6797164911384, |
| "learning_rate": 1.2395466921152186e-06, |
| "loss": 0.4572, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.440794499618029, |
| "grad_norm": 2.984376733858304, |
| "learning_rate": 1.2383451995979358e-06, |
| "loss": 0.4593, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.4411764705882353, |
| "grad_norm": 26.434417170354024, |
| "learning_rate": 1.2371433421749111e-06, |
| "loss": 0.5017, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.44155844155844154, |
| "grad_norm": 4.189293243485018, |
| "learning_rate": 1.2359411216861834e-06, |
| "loss": 0.4995, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.4419404125286478, |
| "grad_norm": 3.477942627441066, |
| "learning_rate": 1.2347385399723493e-06, |
| "loss": 0.4496, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.4423223834988541, |
| "grad_norm": 4.821052532779218, |
| "learning_rate": 1.2335355988745576e-06, |
| "loss": 0.5236, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.44270435446906037, |
| "grad_norm": 1.9972719363055844, |
| "learning_rate": 1.2323323002345067e-06, |
| "loss": 0.4232, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.4430863254392666, |
| "grad_norm": 5.052703891846589, |
| "learning_rate": 1.2311286458944433e-06, |
| "loss": 0.6035, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.4434682964094729, |
| "grad_norm": 3.4310540089602064, |
| "learning_rate": 1.229924637697158e-06, |
| "loss": 0.5135, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.44385026737967914, |
| "grad_norm": 11.968302863287425, |
| "learning_rate": 1.2287202774859845e-06, |
| "loss": 0.5268, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.4442322383498854, |
| "grad_norm": 6.062066545185635, |
| "learning_rate": 1.2275155671047936e-06, |
| "loss": 0.5321, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.44461420932009166, |
| "grad_norm": 3.0589899077634497, |
| "learning_rate": 1.226310508397993e-06, |
| "loss": 0.5545, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.4449961802902979, |
| "grad_norm": 3.0982702971013514, |
| "learning_rate": 1.2251051032105244e-06, |
| "loss": 0.5269, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.44537815126050423, |
| "grad_norm": 4.013047995670636, |
| "learning_rate": 1.2238993533878589e-06, |
| "loss": 0.5273, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.4457601222307105, |
| "grad_norm": 7.539763111242043, |
| "learning_rate": 1.2226932607759955e-06, |
| "loss": 0.5206, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.44614209320091675, |
| "grad_norm": 2.9559753054331424, |
| "learning_rate": 1.2214868272214585e-06, |
| "loss": 0.538, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.446524064171123, |
| "grad_norm": 4.4700657838149676, |
| "learning_rate": 1.2202800545712935e-06, |
| "loss": 0.5165, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.44690603514132926, |
| "grad_norm": 3.4528057031524275, |
| "learning_rate": 1.219072944673066e-06, |
| "loss": 0.5843, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.4472880061115355, |
| "grad_norm": 12.836652899377839, |
| "learning_rate": 1.2178654993748567e-06, |
| "loss": 0.5116, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.4476699770817418, |
| "grad_norm": 3.953361297445188, |
| "learning_rate": 1.2166577205252615e-06, |
| "loss": 0.4924, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.44805194805194803, |
| "grad_norm": 3.5127667403238614, |
| "learning_rate": 1.2154496099733854e-06, |
| "loss": 0.4392, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.4484339190221543, |
| "grad_norm": 2.6671312447026296, |
| "learning_rate": 1.2142411695688414e-06, |
| "loss": 0.5055, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.4488158899923606, |
| "grad_norm": 3.137496881253067, |
| "learning_rate": 1.2130324011617488e-06, |
| "loss": 0.5346, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.44919786096256686, |
| "grad_norm": 8.708672027129488, |
| "learning_rate": 1.2118233066027277e-06, |
| "loss": 0.4389, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.4495798319327731, |
| "grad_norm": 3.3420365876445635, |
| "learning_rate": 1.210613887742898e-06, |
| "loss": 0.5132, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.4499618029029794, |
| "grad_norm": 5.556852945541378, |
| "learning_rate": 1.2094041464338761e-06, |
| "loss": 0.4858, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.45034377387318564, |
| "grad_norm": 3.6150252098952342, |
| "learning_rate": 1.2081940845277725e-06, |
| "loss": 0.4491, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.4507257448433919, |
| "grad_norm": 2.339849832497659, |
| "learning_rate": 1.2069837038771876e-06, |
| "loss": 0.4443, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.45110771581359815, |
| "grad_norm": 4.6405994630916, |
| "learning_rate": 1.205773006335211e-06, |
| "loss": 0.5103, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.4514896867838044, |
| "grad_norm": 3.5040573388223177, |
| "learning_rate": 1.204561993755416e-06, |
| "loss": 0.5253, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.45187165775401067, |
| "grad_norm": 4.02406379001709, |
| "learning_rate": 1.2033506679918594e-06, |
| "loss": 0.4484, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.452253628724217, |
| "grad_norm": 3.388381395227733, |
| "learning_rate": 1.2021390308990768e-06, |
| "loss": 0.5967, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.45263559969442324, |
| "grad_norm": 6.444932583069334, |
| "learning_rate": 1.2009270843320816e-06, |
| "loss": 0.4168, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.4530175706646295, |
| "grad_norm": 3.593703564283377, |
| "learning_rate": 1.199714830146359e-06, |
| "loss": 0.4991, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.45339954163483576, |
| "grad_norm": 4.728487675258837, |
| "learning_rate": 1.198502270197867e-06, |
| "loss": 0.4914, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.453781512605042, |
| "grad_norm": 6.306712452964065, |
| "learning_rate": 1.1972894063430308e-06, |
| "loss": 0.461, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.4541634835752483, |
| "grad_norm": 7.151973114117311, |
| "learning_rate": 1.1960762404387413e-06, |
| "loss": 0.4825, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.45454545454545453, |
| "grad_norm": 10.55885615899538, |
| "learning_rate": 1.194862774342351e-06, |
| "loss": 0.4894, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.4549274255156608, |
| "grad_norm": 3.9313808891061663, |
| "learning_rate": 1.1936490099116735e-06, |
| "loss": 0.4786, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.4553093964858671, |
| "grad_norm": 3.476901809122921, |
| "learning_rate": 1.192434949004978e-06, |
| "loss": 0.5184, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.45569136745607336, |
| "grad_norm": 5.668372530085146, |
| "learning_rate": 1.1912205934809876e-06, |
| "loss": 0.5004, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.4560733384262796, |
| "grad_norm": 6.4296870499289405, |
| "learning_rate": 1.190005945198877e-06, |
| "loss": 0.5397, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.4564553093964859, |
| "grad_norm": 4.725008296529506, |
| "learning_rate": 1.1887910060182692e-06, |
| "loss": 0.4638, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.45683728036669213, |
| "grad_norm": 2.7978312243428793, |
| "learning_rate": 1.1875757777992313e-06, |
| "loss": 0.5007, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.4572192513368984, |
| "grad_norm": 2.4020090664757068, |
| "learning_rate": 1.1863602624022742e-06, |
| "loss": 0.4408, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.45760122230710465, |
| "grad_norm": 4.360547237284794, |
| "learning_rate": 1.1851444616883486e-06, |
| "loss": 0.4635, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.4579831932773109, |
| "grad_norm": 2.9512084782993777, |
| "learning_rate": 1.183928377518841e-06, |
| "loss": 0.5623, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.45836516424751717, |
| "grad_norm": 6.979742443985152, |
| "learning_rate": 1.1827120117555728e-06, |
| "loss": 0.5724, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.4587471352177235, |
| "grad_norm": 5.730731417502142, |
| "learning_rate": 1.181495366260796e-06, |
| "loss": 0.4598, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.45912910618792974, |
| "grad_norm": 3.0871943137221187, |
| "learning_rate": 1.1802784428971903e-06, |
| "loss": 0.542, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.459511077158136, |
| "grad_norm": 2.8116100600100378, |
| "learning_rate": 1.1790612435278627e-06, |
| "loss": 0.4651, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.45989304812834225, |
| "grad_norm": 5.698967179605214, |
| "learning_rate": 1.1778437700163407e-06, |
| "loss": 0.5649, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.4602750190985485, |
| "grad_norm": 3.1755623138697575, |
| "learning_rate": 1.1766260242265725e-06, |
| "loss": 0.5049, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.46065699006875477, |
| "grad_norm": 5.599555390928325, |
| "learning_rate": 1.175408008022923e-06, |
| "loss": 0.4122, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.461038961038961, |
| "grad_norm": 3.580170190977131, |
| "learning_rate": 1.1741897232701713e-06, |
| "loss": 0.5239, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.4614209320091673, |
| "grad_norm": 14.971991557910343, |
| "learning_rate": 1.1729711718335075e-06, |
| "loss": 0.479, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.46180290297937354, |
| "grad_norm": 2.880404350454197, |
| "learning_rate": 1.1717523555785303e-06, |
| "loss": 0.4886, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.46218487394957986, |
| "grad_norm": 7.781190797017161, |
| "learning_rate": 1.1705332763712427e-06, |
| "loss": 0.5012, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.4625668449197861, |
| "grad_norm": 2.4434443535930597, |
| "learning_rate": 1.1693139360780517e-06, |
| "loss": 0.4754, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.46294881588999237, |
| "grad_norm": 45.20097004657449, |
| "learning_rate": 1.168094336565763e-06, |
| "loss": 0.4677, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.46333078686019863, |
| "grad_norm": 2.145571295650126, |
| "learning_rate": 1.1668744797015799e-06, |
| "loss": 0.4429, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.4637127578304049, |
| "grad_norm": 2.751742226788619, |
| "learning_rate": 1.1656543673530993e-06, |
| "loss": 0.4968, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.46409472880061114, |
| "grad_norm": 2.909178017099473, |
| "learning_rate": 1.1644340013883094e-06, |
| "loss": 0.4326, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.4644766997708174, |
| "grad_norm": 5.284530413482173, |
| "learning_rate": 1.1632133836755864e-06, |
| "loss": 0.4681, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.46485867074102366, |
| "grad_norm": 2.763154155064458, |
| "learning_rate": 1.1619925160836924e-06, |
| "loss": 0.538, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.46524064171123, |
| "grad_norm": 4.0912500612091005, |
| "learning_rate": 1.1607714004817716e-06, |
| "loss": 0.5096, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.46562261268143623, |
| "grad_norm": 3.987704457481841, |
| "learning_rate": 1.1595500387393479e-06, |
| "loss": 0.4758, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.4660045836516425, |
| "grad_norm": 6.550222662482566, |
| "learning_rate": 1.1583284327263225e-06, |
| "loss": 0.4638, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.46638655462184875, |
| "grad_norm": 2.4034258022797865, |
| "learning_rate": 1.1571065843129707e-06, |
| "loss": 0.4397, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.466768525592055, |
| "grad_norm": 3.6897113611530905, |
| "learning_rate": 1.1558844953699378e-06, |
| "loss": 0.5763, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.46715049656226126, |
| "grad_norm": 4.339232174448057, |
| "learning_rate": 1.1546621677682381e-06, |
| "loss": 0.5573, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.4675324675324675, |
| "grad_norm": 3.203860802966495, |
| "learning_rate": 1.1534396033792523e-06, |
| "loss": 0.4413, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.4679144385026738, |
| "grad_norm": 2.628311025178851, |
| "learning_rate": 1.1522168040747216e-06, |
| "loss": 0.5194, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.46829640947288004, |
| "grad_norm": 32.48635309573544, |
| "learning_rate": 1.1509937717267482e-06, |
| "loss": 0.5325, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.46867838044308635, |
| "grad_norm": 6.698122924808716, |
| "learning_rate": 1.1497705082077903e-06, |
| "loss": 0.5143, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.4690603514132926, |
| "grad_norm": 2.9475576684385243, |
| "learning_rate": 1.1485470153906612e-06, |
| "loss": 0.4947, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.46944232238349887, |
| "grad_norm": 3.3350127815414723, |
| "learning_rate": 1.147323295148524e-06, |
| "loss": 0.4642, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.4698242933537051, |
| "grad_norm": 8.905333506931377, |
| "learning_rate": 1.1460993493548905e-06, |
| "loss": 0.5154, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.4702062643239114, |
| "grad_norm": 2.5257033406933744, |
| "learning_rate": 1.144875179883618e-06, |
| "loss": 0.4871, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 4.53327072940148, |
| "learning_rate": 1.143650788608906e-06, |
| "loss": 0.4972, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.4709702062643239, |
| "grad_norm": 5.160150274947944, |
| "learning_rate": 1.1424261774052939e-06, |
| "loss": 0.5123, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.47135217723453016, |
| "grad_norm": 3.0497075066239168, |
| "learning_rate": 1.1412013481476571e-06, |
| "loss": 0.5342, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.4717341482047364, |
| "grad_norm": 3.352396281245782, |
| "learning_rate": 1.1399763027112056e-06, |
| "loss": 0.4841, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.4721161191749427, |
| "grad_norm": 2.4010700114984025, |
| "learning_rate": 1.13875104297148e-06, |
| "loss": 0.4476, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.472498090145149, |
| "grad_norm": 2.1444364956763433, |
| "learning_rate": 1.137525570804349e-06, |
| "loss": 0.4792, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.47288006111535524, |
| "grad_norm": 2.4997522044099676, |
| "learning_rate": 1.1362998880860065e-06, |
| "loss": 0.4552, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.4732620320855615, |
| "grad_norm": 11.869869458123512, |
| "learning_rate": 1.135073996692969e-06, |
| "loss": 0.3982, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.47364400305576776, |
| "grad_norm": 11.08603971046374, |
| "learning_rate": 1.1338478985020726e-06, |
| "loss": 0.5119, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.474025974025974, |
| "grad_norm": 3.5696420483847313, |
| "learning_rate": 1.1326215953904691e-06, |
| "loss": 0.4499, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.4744079449961803, |
| "grad_norm": 4.197385123479996, |
| "learning_rate": 1.131395089235625e-06, |
| "loss": 0.4386, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.47478991596638653, |
| "grad_norm": 4.329632163315956, |
| "learning_rate": 1.1301683819153173e-06, |
| "loss": 0.4617, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.47517188693659285, |
| "grad_norm": 2.4072597666151028, |
| "learning_rate": 1.1289414753076312e-06, |
| "loss": 0.4296, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.4755538579067991, |
| "grad_norm": 4.025047848026513, |
| "learning_rate": 1.1277143712909562e-06, |
| "loss": 0.4778, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.47593582887700536, |
| "grad_norm": 4.91052834287783, |
| "learning_rate": 1.1264870717439854e-06, |
| "loss": 0.4833, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.4763177998472116, |
| "grad_norm": 2.992133174312151, |
| "learning_rate": 1.1252595785457103e-06, |
| "loss": 0.44, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.4766997708174179, |
| "grad_norm": 3.6402161256952534, |
| "learning_rate": 1.1240318935754186e-06, |
| "loss": 0.4952, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.47708174178762414, |
| "grad_norm": 4.552869776517533, |
| "learning_rate": 1.1228040187126925e-06, |
| "loss": 0.5271, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.4774637127578304, |
| "grad_norm": 3.9200079953681772, |
| "learning_rate": 1.1215759558374046e-06, |
| "loss": 0.6368, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.47784568372803665, |
| "grad_norm": 3.1230412445338374, |
| "learning_rate": 1.120347706829715e-06, |
| "loss": 0.4492, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.4782276546982429, |
| "grad_norm": 3.102490869262427, |
| "learning_rate": 1.1191192735700694e-06, |
| "loss": 0.4686, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.4786096256684492, |
| "grad_norm": 4.476682357490567, |
| "learning_rate": 1.1178906579391944e-06, |
| "loss": 0.5489, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.4789915966386555, |
| "grad_norm": 2.8135125812778616, |
| "learning_rate": 1.1166618618180975e-06, |
| "loss": 0.5074, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.47937356760886174, |
| "grad_norm": 40.753479628748224, |
| "learning_rate": 1.1154328870880615e-06, |
| "loss": 0.5075, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.479755538579068, |
| "grad_norm": 12.111904935457673, |
| "learning_rate": 1.1142037356306422e-06, |
| "loss": 0.531, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.48013750954927426, |
| "grad_norm": 17.359688140923335, |
| "learning_rate": 1.112974409327667e-06, |
| "loss": 0.5295, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.4805194805194805, |
| "grad_norm": 5.006833632043221, |
| "learning_rate": 1.111744910061231e-06, |
| "loss": 0.5058, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.48090145148968677, |
| "grad_norm": 4.87277750041395, |
| "learning_rate": 1.1105152397136927e-06, |
| "loss": 0.5186, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.48128342245989303, |
| "grad_norm": 5.03236301201575, |
| "learning_rate": 1.1092854001676743e-06, |
| "loss": 0.4766, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.4816653934300993, |
| "grad_norm": 3.9862712010217707, |
| "learning_rate": 1.108055393306056e-06, |
| "loss": 0.4787, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.4820473644003056, |
| "grad_norm": 4.075868898495065, |
| "learning_rate": 1.106825221011974e-06, |
| "loss": 0.5061, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.48242933537051186, |
| "grad_norm": 3.3826587745482035, |
| "learning_rate": 1.1055948851688186e-06, |
| "loss": 0.4608, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.4828113063407181, |
| "grad_norm": 2.7926168169643404, |
| "learning_rate": 1.10436438766023e-06, |
| "loss": 0.4725, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.4831932773109244, |
| "grad_norm": 4.227679252388823, |
| "learning_rate": 1.1031337303700958e-06, |
| "loss": 0.6015, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.48357524828113063, |
| "grad_norm": 3.187908709583996, |
| "learning_rate": 1.1019029151825488e-06, |
| "loss": 0.5895, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.4839572192513369, |
| "grad_norm": 4.074916916382239, |
| "learning_rate": 1.1006719439819624e-06, |
| "loss": 0.5196, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.48433919022154315, |
| "grad_norm": 8.929845403345944, |
| "learning_rate": 1.0994408186529504e-06, |
| "loss": 0.5266, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.4847211611917494, |
| "grad_norm": 5.391798819468841, |
| "learning_rate": 1.0982095410803613e-06, |
| "loss": 0.6122, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.4851031321619557, |
| "grad_norm": 3.9477864337213378, |
| "learning_rate": 1.0969781131492774e-06, |
| "loss": 0.5211, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.485485103132162, |
| "grad_norm": 4.2021316314091015, |
| "learning_rate": 1.0957465367450106e-06, |
| "loss": 0.5463, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.48586707410236823, |
| "grad_norm": 4.042478751794095, |
| "learning_rate": 1.0945148137531007e-06, |
| "loss": 0.4624, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.4862490450725745, |
| "grad_norm": 4.628945355798975, |
| "learning_rate": 1.0932829460593115e-06, |
| "loss": 0.4688, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.48663101604278075, |
| "grad_norm": 4.84585991908986, |
| "learning_rate": 1.092050935549629e-06, |
| "loss": 0.454, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.487012987012987, |
| "grad_norm": 3.569829003665894, |
| "learning_rate": 1.0908187841102565e-06, |
| "loss": 0.4872, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.48739495798319327, |
| "grad_norm": 3.1539513977942235, |
| "learning_rate": 1.089586493627615e-06, |
| "loss": 0.4405, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.4877769289533995, |
| "grad_norm": 3.707910848260399, |
| "learning_rate": 1.0883540659883366e-06, |
| "loss": 0.5597, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.4881588999236058, |
| "grad_norm": 3.7495327953119078, |
| "learning_rate": 1.0871215030792642e-06, |
| "loss": 0.4309, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.4885408708938121, |
| "grad_norm": 3.587824538383655, |
| "learning_rate": 1.0858888067874477e-06, |
| "loss": 0.4526, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.48892284186401835, |
| "grad_norm": 2.260993818632954, |
| "learning_rate": 1.0846559790001413e-06, |
| "loss": 0.4388, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.4893048128342246, |
| "grad_norm": 3.2144353104907557, |
| "learning_rate": 1.0834230216048004e-06, |
| "loss": 0.5239, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.48968678380443087, |
| "grad_norm": 2.9974050303973296, |
| "learning_rate": 1.0821899364890788e-06, |
| "loss": 0.5605, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.4900687547746371, |
| "grad_norm": 4.173980296608258, |
| "learning_rate": 1.0809567255408258e-06, |
| "loss": 0.5095, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.4904507257448434, |
| "grad_norm": 2.116436796307261, |
| "learning_rate": 1.079723390648084e-06, |
| "loss": 0.424, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.49083269671504964, |
| "grad_norm": 3.6930461747499774, |
| "learning_rate": 1.0784899336990844e-06, |
| "loss": 0.5205, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.4912146676852559, |
| "grad_norm": 4.540039955232487, |
| "learning_rate": 1.077256356582246e-06, |
| "loss": 0.5158, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.49159663865546216, |
| "grad_norm": 6.271353181611177, |
| "learning_rate": 1.0760226611861714e-06, |
| "loss": 0.499, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.4919786096256685, |
| "grad_norm": 3.8796764423218373, |
| "learning_rate": 1.0747888493996447e-06, |
| "loss": 0.5119, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.49236058059587473, |
| "grad_norm": 6.360416011728165, |
| "learning_rate": 1.0735549231116271e-06, |
| "loss": 0.4952, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.492742551566081, |
| "grad_norm": 3.70411516225405, |
| "learning_rate": 1.0723208842112566e-06, |
| "loss": 0.459, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.49312452253628725, |
| "grad_norm": 5.632813688501988, |
| "learning_rate": 1.0710867345878423e-06, |
| "loss": 0.4223, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.4935064935064935, |
| "grad_norm": 3.2511274203262106, |
| "learning_rate": 1.0698524761308633e-06, |
| "loss": 0.4402, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.49388846447669976, |
| "grad_norm": 3.8371267082582534, |
| "learning_rate": 1.0686181107299657e-06, |
| "loss": 0.4357, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.494270435446906, |
| "grad_norm": 5.165651106582903, |
| "learning_rate": 1.067383640274959e-06, |
| "loss": 0.4468, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.4946524064171123, |
| "grad_norm": 13.071303533813465, |
| "learning_rate": 1.0661490666558135e-06, |
| "loss": 0.4457, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.4950343773873186, |
| "grad_norm": 5.755798614832232, |
| "learning_rate": 1.0649143917626572e-06, |
| "loss": 0.4874, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.49541634835752485, |
| "grad_norm": 7.817980213058826, |
| "learning_rate": 1.0636796174857735e-06, |
| "loss": 0.5073, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.4957983193277311, |
| "grad_norm": 130.82970679290145, |
| "learning_rate": 1.0624447457155982e-06, |
| "loss": 0.4831, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.49618029029793737, |
| "grad_norm": 3.885116213979448, |
| "learning_rate": 1.061209778342716e-06, |
| "loss": 0.5214, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.4965622612681436, |
| "grad_norm": 3.878921326386595, |
| "learning_rate": 1.0599747172578579e-06, |
| "loss": 0.4541, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.4969442322383499, |
| "grad_norm": 2.816069956951671, |
| "learning_rate": 1.0587395643518982e-06, |
| "loss": 0.4576, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.49732620320855614, |
| "grad_norm": 3.421688209743285, |
| "learning_rate": 1.0575043215158525e-06, |
| "loss": 0.4258, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.4977081741787624, |
| "grad_norm": 3.6723025808360616, |
| "learning_rate": 1.0562689906408737e-06, |
| "loss": 0.4629, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.49809014514896865, |
| "grad_norm": 2.9379519686044517, |
| "learning_rate": 1.0550335736182491e-06, |
| "loss": 0.4804, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.49847211611917497, |
| "grad_norm": 3.487784717027458, |
| "learning_rate": 1.0537980723393982e-06, |
| "loss": 0.594, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.4988540870893812, |
| "grad_norm": 4.133689843056842, |
| "learning_rate": 1.05256248869587e-06, |
| "loss": 0.4509, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.4992360580595875, |
| "grad_norm": 2.9438677833420535, |
| "learning_rate": 1.0513268245793385e-06, |
| "loss": 0.4707, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.49961802902979374, |
| "grad_norm": 4.23654014930591, |
| "learning_rate": 1.050091081881602e-06, |
| "loss": 0.5688, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 2.8769089170784894, |
| "learning_rate": 1.0488552624945785e-06, |
| "loss": 0.5302, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.5003819709702063, |
| "grad_norm": 2.4982081250306845, |
| "learning_rate": 1.0476193683103036e-06, |
| "loss": 0.5239, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.5007639419404125, |
| "grad_norm": 2.7328193722764405, |
| "learning_rate": 1.0463834012209275e-06, |
| "loss": 0.5111, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.5011459129106188, |
| "grad_norm": 14.462404041377377, |
| "learning_rate": 1.0451473631187114e-06, |
| "loss": 0.5698, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.501527883880825, |
| "grad_norm": 3.8057059746418886, |
| "learning_rate": 1.043911255896026e-06, |
| "loss": 0.5128, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.5019098548510313, |
| "grad_norm": 7.550989422236823, |
| "learning_rate": 1.0426750814453478e-06, |
| "loss": 0.5066, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.5022918258212375, |
| "grad_norm": 21.794350606158684, |
| "learning_rate": 1.0414388416592553e-06, |
| "loss": 0.4053, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.5026737967914439, |
| "grad_norm": 4.964648111005842, |
| "learning_rate": 1.0402025384304282e-06, |
| "loss": 0.525, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.5030557677616501, |
| "grad_norm": 2.704461353663344, |
| "learning_rate": 1.0389661736516427e-06, |
| "loss": 0.4453, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.5034377387318564, |
| "grad_norm": 3.596541442397453, |
| "learning_rate": 1.0377297492157694e-06, |
| "loss": 0.4711, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.5038197097020627, |
| "grad_norm": 2.6982636562854565, |
| "learning_rate": 1.03649326701577e-06, |
| "loss": 0.4962, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.5042016806722689, |
| "grad_norm": 3.5758546196602956, |
| "learning_rate": 1.035256728944695e-06, |
| "loss": 0.5319, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.5045836516424752, |
| "grad_norm": 2.7338650139648855, |
| "learning_rate": 1.0340201368956801e-06, |
| "loss": 0.5238, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.5049656226126814, |
| "grad_norm": 9.011380745224324, |
| "learning_rate": 1.0327834927619438e-06, |
| "loss": 0.4911, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.5053475935828877, |
| "grad_norm": 2.3127010295672226, |
| "learning_rate": 1.0315467984367843e-06, |
| "loss": 0.4739, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.5057295645530939, |
| "grad_norm": 2.873514483067639, |
| "learning_rate": 1.030310055813577e-06, |
| "loss": 0.4333, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.5061115355233002, |
| "grad_norm": 2.529448768846022, |
| "learning_rate": 1.0290732667857705e-06, |
| "loss": 0.4595, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.5064935064935064, |
| "grad_norm": 5.525960825885752, |
| "learning_rate": 1.027836433246885e-06, |
| "loss": 0.4809, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.5068754774637128, |
| "grad_norm": 3.4943325241715852, |
| "learning_rate": 1.0265995570905087e-06, |
| "loss": 0.5377, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.5072574484339191, |
| "grad_norm": 7.5645140214821165, |
| "learning_rate": 1.0253626402102954e-06, |
| "loss": 0.5336, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.5076394194041253, |
| "grad_norm": 3.2696463413539187, |
| "learning_rate": 1.0241256844999604e-06, |
| "loss": 0.4364, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.5080213903743316, |
| "grad_norm": 6.9584160075582595, |
| "learning_rate": 1.0228886918532791e-06, |
| "loss": 0.4351, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.5084033613445378, |
| "grad_norm": 3.003922678403774, |
| "learning_rate": 1.0216516641640835e-06, |
| "loss": 0.4882, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.5087853323147441, |
| "grad_norm": 2.932152882801731, |
| "learning_rate": 1.020414603326259e-06, |
| "loss": 0.46, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.5091673032849503, |
| "grad_norm": 2.381377960281441, |
| "learning_rate": 1.0191775112337419e-06, |
| "loss": 0.5098, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.5095492742551566, |
| "grad_norm": 4.816521334707846, |
| "learning_rate": 1.0179403897805156e-06, |
| "loss": 0.5134, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.5099312452253628, |
| "grad_norm": 5.752920350007644, |
| "learning_rate": 1.0167032408606103e-06, |
| "loss": 0.4723, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.5103132161955691, |
| "grad_norm": 3.292341479456508, |
| "learning_rate": 1.0154660663680962e-06, |
| "loss": 0.4752, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.5106951871657754, |
| "grad_norm": 3.017397605540056, |
| "learning_rate": 1.0142288681970835e-06, |
| "loss": 0.4906, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.5110771581359816, |
| "grad_norm": 3.553899535961395, |
| "learning_rate": 1.0129916482417187e-06, |
| "loss": 0.4463, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.511459129106188, |
| "grad_norm": 2.8101660274095868, |
| "learning_rate": 1.011754408396182e-06, |
| "loss": 0.5076, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.5118411000763942, |
| "grad_norm": 3.1656049296367486, |
| "learning_rate": 1.0105171505546834e-06, |
| "loss": 0.5324, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.5122230710466005, |
| "grad_norm": 4.898764704252674, |
| "learning_rate": 1.0092798766114609e-06, |
| "loss": 0.5099, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.5126050420168067, |
| "grad_norm": 14.139169491621754, |
| "learning_rate": 1.0080425884607766e-06, |
| "loss": 0.5188, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.512987012987013, |
| "grad_norm": 3.505703606474342, |
| "learning_rate": 1.006805287996915e-06, |
| "loss": 0.5271, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.5133689839572193, |
| "grad_norm": 13.945862624979869, |
| "learning_rate": 1.0055679771141793e-06, |
| "loss": 0.5674, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.5137509549274255, |
| "grad_norm": 3.131944114131963, |
| "learning_rate": 1.0043306577068882e-06, |
| "loss": 0.5225, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.5141329258976318, |
| "grad_norm": 2.372018227771199, |
| "learning_rate": 1.003093331669374e-06, |
| "loss": 0.4533, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.514514896867838, |
| "grad_norm": 3.2988067230203337, |
| "learning_rate": 1.001856000895979e-06, |
| "loss": 0.4922, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.5148968678380443, |
| "grad_norm": 4.938589303779806, |
| "learning_rate": 1.0006186672810522e-06, |
| "loss": 0.462, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.5152788388082505, |
| "grad_norm": 4.655576701126217, |
| "learning_rate": 9.99381332718948e-07, |
| "loss": 0.53, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.5156608097784569, |
| "grad_norm": 3.1018231048204505, |
| "learning_rate": 9.981439991040212e-07, |
| "loss": 0.4452, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.516042780748663, |
| "grad_norm": 3.6955327676526917, |
| "learning_rate": 9.96906668330626e-07, |
| "loss": 0.5178, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.5164247517188694, |
| "grad_norm": 7.2642960843934, |
| "learning_rate": 9.95669342293112e-07, |
| "loss": 0.4705, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.5168067226890757, |
| "grad_norm": 4.536834020213876, |
| "learning_rate": 9.944320228858208e-07, |
| "loss": 0.5322, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.5171886936592819, |
| "grad_norm": 3.5410384700532886, |
| "learning_rate": 9.93194712003085e-07, |
| "loss": 0.5278, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.5175706646294882, |
| "grad_norm": 294.2683369886722, |
| "learning_rate": 9.919574115392235e-07, |
| "loss": 0.5625, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.5179526355996944, |
| "grad_norm": 3.182000528960174, |
| "learning_rate": 9.907201233885392e-07, |
| "loss": 0.4204, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.5183346065699007, |
| "grad_norm": 3.1946900622632635, |
| "learning_rate": 9.894828494453167e-07, |
| "loss": 0.4726, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.5187165775401069, |
| "grad_norm": 5.528384455651845, |
| "learning_rate": 9.88245591603818e-07, |
| "loss": 0.4554, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.5190985485103132, |
| "grad_norm": 5.4773425084696665, |
| "learning_rate": 9.870083517582812e-07, |
| "loss": 0.5441, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.5194805194805194, |
| "grad_norm": 7.220271179898539, |
| "learning_rate": 9.857711318029169e-07, |
| "loss": 0.4582, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.5198624904507257, |
| "grad_norm": 4.784686919361763, |
| "learning_rate": 9.845339336319042e-07, |
| "loss": 0.5349, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.5202444614209321, |
| "grad_norm": 3.110800022530207, |
| "learning_rate": 9.832967591393896e-07, |
| "loss": 0.4794, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.5206264323911383, |
| "grad_norm": 2.950061284333217, |
| "learning_rate": 9.820596102194844e-07, |
| "loss": 0.5041, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.5210084033613446, |
| "grad_norm": 20.463942406841767, |
| "learning_rate": 9.808224887662582e-07, |
| "loss": 0.5719, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.5213903743315508, |
| "grad_norm": 17.791314544277856, |
| "learning_rate": 9.79585396673741e-07, |
| "loss": 0.5372, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.5217723453017571, |
| "grad_norm": 10.673350794026165, |
| "learning_rate": 9.783483358359164e-07, |
| "loss": 0.488, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.5221543162719633, |
| "grad_norm": 6.272582296984684, |
| "learning_rate": 9.771113081467208e-07, |
| "loss": 0.4951, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.5225362872421696, |
| "grad_norm": 78.6440162561484, |
| "learning_rate": 9.7587431550004e-07, |
| "loss": 0.4793, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.5229182582123758, |
| "grad_norm": 3.165213133124479, |
| "learning_rate": 9.746373597897048e-07, |
| "loss": 0.444, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.5233002291825821, |
| "grad_norm": 3.1184183716641827, |
| "learning_rate": 9.734004429094912e-07, |
| "loss": 0.5683, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.5236822001527884, |
| "grad_norm": 4.836415402427561, |
| "learning_rate": 9.721635667531152e-07, |
| "loss": 0.537, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.5240641711229946, |
| "grad_norm": 3.646586245251106, |
| "learning_rate": 9.709267332142296e-07, |
| "loss": 0.5344, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.524446142093201, |
| "grad_norm": 25.03775753954483, |
| "learning_rate": 9.696899441864232e-07, |
| "loss": 0.5304, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.5248281130634072, |
| "grad_norm": 2.469508639188065, |
| "learning_rate": 9.684532015632158e-07, |
| "loss": 0.501, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.5252100840336135, |
| "grad_norm": 6.275377375689046, |
| "learning_rate": 9.672165072380564e-07, |
| "loss": 0.4518, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.5255920550038197, |
| "grad_norm": 3.739219533904625, |
| "learning_rate": 9.659798631043202e-07, |
| "loss": 0.4769, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.525974025974026, |
| "grad_norm": 3.2028836058005745, |
| "learning_rate": 9.647432710553051e-07, |
| "loss": 0.5084, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.5263559969442322, |
| "grad_norm": 2.832547689308899, |
| "learning_rate": 9.6350673298423e-07, |
| "loss": 0.5256, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.5267379679144385, |
| "grad_norm": 4.074197073360397, |
| "learning_rate": 9.622702507842307e-07, |
| "loss": 0.5739, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.5271199388846448, |
| "grad_norm": 2.733260110623376, |
| "learning_rate": 9.610338263483572e-07, |
| "loss": 0.4274, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.527501909854851, |
| "grad_norm": 4.257279848392443, |
| "learning_rate": 9.597974615695717e-07, |
| "loss": 0.4869, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.5278838808250573, |
| "grad_norm": 3.6805018016990534, |
| "learning_rate": 9.585611583407446e-07, |
| "loss": 0.4395, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.5282658517952635, |
| "grad_norm": 2.7505903636646405, |
| "learning_rate": 9.573249185546523e-07, |
| "loss": 0.4637, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.5286478227654698, |
| "grad_norm": 2.860561770977799, |
| "learning_rate": 9.560887441039738e-07, |
| "loss": 0.4548, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.529029793735676, |
| "grad_norm": 8.831501484101013, |
| "learning_rate": 9.548526368812887e-07, |
| "loss": 0.5577, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.5294117647058824, |
| "grad_norm": 5.0263480248609955, |
| "learning_rate": 9.536165987790727e-07, |
| "loss": 0.414, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.5297937356760886, |
| "grad_norm": 2.6563122746837617, |
| "learning_rate": 9.523806316896964e-07, |
| "loss": 0.5009, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.5301757066462949, |
| "grad_norm": 2.7201589095964285, |
| "learning_rate": 9.511447375054214e-07, |
| "loss": 0.438, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.5305576776165012, |
| "grad_norm": 5.460884031932415, |
| "learning_rate": 9.499089181183979e-07, |
| "loss": 0.4489, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.5309396485867074, |
| "grad_norm": 3.5615191551239747, |
| "learning_rate": 9.486731754206616e-07, |
| "loss": 0.5108, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.5313216195569137, |
| "grad_norm": 9.30636166134041, |
| "learning_rate": 9.474375113041302e-07, |
| "loss": 0.4708, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.5317035905271199, |
| "grad_norm": 5.449374160647926, |
| "learning_rate": 9.462019276606017e-07, |
| "loss": 0.5523, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.5320855614973262, |
| "grad_norm": 3.49056858716858, |
| "learning_rate": 9.449664263817512e-07, |
| "loss": 0.5377, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.5324675324675324, |
| "grad_norm": 2.902207041979767, |
| "learning_rate": 9.437310093591263e-07, |
| "loss": 0.4954, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.5328495034377387, |
| "grad_norm": 4.677500426124243, |
| "learning_rate": 9.424956784841473e-07, |
| "loss": 0.522, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.533231474407945, |
| "grad_norm": 13.018420099517328, |
| "learning_rate": 9.412604356481018e-07, |
| "loss": 0.5356, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.5336134453781513, |
| "grad_norm": 6.175162131849886, |
| "learning_rate": 9.400252827421421e-07, |
| "loss": 0.4901, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.5339954163483576, |
| "grad_norm": 7.725050355203537, |
| "learning_rate": 9.387902216572841e-07, |
| "loss": 0.5219, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.5343773873185638, |
| "grad_norm": 8.072550646440144, |
| "learning_rate": 9.375552542844016e-07, |
| "loss": 0.5152, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.5347593582887701, |
| "grad_norm": 2.4221302927270743, |
| "learning_rate": 9.363203825142262e-07, |
| "loss": 0.4452, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.5351413292589763, |
| "grad_norm": 3.2307294108351177, |
| "learning_rate": 9.350856082373429e-07, |
| "loss": 0.5134, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.5355233002291826, |
| "grad_norm": 5.350677308214525, |
| "learning_rate": 9.338509333441865e-07, |
| "loss": 0.5055, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.5359052711993888, |
| "grad_norm": 5.664433410289456, |
| "learning_rate": 9.326163597250407e-07, |
| "loss": 0.5005, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.5362872421695951, |
| "grad_norm": 3.102982845081864, |
| "learning_rate": 9.313818892700343e-07, |
| "loss": 0.5239, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.5366692131398014, |
| "grad_norm": 26.69471541348305, |
| "learning_rate": 9.301475238691365e-07, |
| "loss": 0.4663, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.5370511841100076, |
| "grad_norm": 8.868438039533663, |
| "learning_rate": 9.289132654121576e-07, |
| "loss": 0.4662, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.5374331550802139, |
| "grad_norm": 22.121579017983922, |
| "learning_rate": 9.276791157887436e-07, |
| "loss": 0.5445, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.5378151260504201, |
| "grad_norm": 51.58676075333544, |
| "learning_rate": 9.264450768883727e-07, |
| "loss": 0.5196, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.5381970970206265, |
| "grad_norm": 4.527370473189713, |
| "learning_rate": 9.252111506003557e-07, |
| "loss": 0.5098, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.5385790679908327, |
| "grad_norm": 4.090202605819511, |
| "learning_rate": 9.239773388138285e-07, |
| "loss": 0.495, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.538961038961039, |
| "grad_norm": 8.164285139505568, |
| "learning_rate": 9.227436434177539e-07, |
| "loss": 0.575, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.5393430099312452, |
| "grad_norm": 4.586672847633152, |
| "learning_rate": 9.215100663009158e-07, |
| "loss": 0.499, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.5397249809014515, |
| "grad_norm": 9.803378384591609, |
| "learning_rate": 9.202766093519162e-07, |
| "loss": 0.4822, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.5401069518716578, |
| "grad_norm": 3.0597382175916668, |
| "learning_rate": 9.190432744591739e-07, |
| "loss": 0.5386, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.540488922841864, |
| "grad_norm": 4.2626191363582775, |
| "learning_rate": 9.178100635109214e-07, |
| "loss": 0.5004, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.5408708938120703, |
| "grad_norm": 3.103706429369607, |
| "learning_rate": 9.165769783951995e-07, |
| "loss": 0.5113, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.5412528647822765, |
| "grad_norm": 3.0638124770428274, |
| "learning_rate": 9.153440209998589e-07, |
| "loss": 0.4859, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.5416348357524828, |
| "grad_norm": 4.055884011812646, |
| "learning_rate": 9.141111932125524e-07, |
| "loss": 0.4632, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.542016806722689, |
| "grad_norm": 4.701495297937109, |
| "learning_rate": 9.128784969207358e-07, |
| "loss": 0.4976, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.5423987776928954, |
| "grad_norm": 4.138380618201808, |
| "learning_rate": 9.116459340116637e-07, |
| "loss": 0.4545, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.5427807486631016, |
| "grad_norm": 3.306842464831278, |
| "learning_rate": 9.104135063723851e-07, |
| "loss": 0.5213, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.5431627196333079, |
| "grad_norm": 2.428190100888985, |
| "learning_rate": 9.091812158897432e-07, |
| "loss": 0.4542, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.5435446906035142, |
| "grad_norm": 5.055143900923019, |
| "learning_rate": 9.079490644503714e-07, |
| "loss": 0.4704, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.5439266615737204, |
| "grad_norm": 3.442884686476501, |
| "learning_rate": 9.067170539406884e-07, |
| "loss": 0.4659, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.5443086325439267, |
| "grad_norm": 4.6387174303407726, |
| "learning_rate": 9.054851862468994e-07, |
| "loss": 0.4954, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.5446906035141329, |
| "grad_norm": 3.018966390578636, |
| "learning_rate": 9.042534632549897e-07, |
| "loss": 0.4842, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.5450725744843392, |
| "grad_norm": 12.512900108390705, |
| "learning_rate": 9.030218868507227e-07, |
| "loss": 0.5299, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.5454545454545454, |
| "grad_norm": 3.2874335207763608, |
| "learning_rate": 9.017904589196389e-07, |
| "loss": 0.4564, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.5458365164247517, |
| "grad_norm": 3.7038858926146236, |
| "learning_rate": 9.005591813470497e-07, |
| "loss": 0.4896, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.5462184873949579, |
| "grad_norm": 5.546433583514546, |
| "learning_rate": 8.993280560180376e-07, |
| "loss": 0.4317, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.5466004583651642, |
| "grad_norm": 3.5277981501703106, |
| "learning_rate": 8.980970848174515e-07, |
| "loss": 0.4678, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.5469824293353706, |
| "grad_norm": 4.576434077551453, |
| "learning_rate": 8.968662696299041e-07, |
| "loss": 0.5186, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.5473644003055768, |
| "grad_norm": 3.52536301654518, |
| "learning_rate": 8.956356123397701e-07, |
| "loss": 0.5332, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.5477463712757831, |
| "grad_norm": 4.764198850876418, |
| "learning_rate": 8.944051148311816e-07, |
| "loss": 0.5026, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.5481283422459893, |
| "grad_norm": 3.0442696758604746, |
| "learning_rate": 8.931747789880262e-07, |
| "loss": 0.5365, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.5485103132161956, |
| "grad_norm": 4.766070575325928, |
| "learning_rate": 8.919446066939441e-07, |
| "loss": 0.5468, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.5488922841864018, |
| "grad_norm": 9.341427804610916, |
| "learning_rate": 8.907145998323256e-07, |
| "loss": 0.4737, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.5492742551566081, |
| "grad_norm": 2.878781065183578, |
| "learning_rate": 8.894847602863073e-07, |
| "loss": 0.4587, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.5496562261268143, |
| "grad_norm": 2.9168697517060957, |
| "learning_rate": 8.882550899387692e-07, |
| "loss": 0.5013, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.5500381970970206, |
| "grad_norm": 2.440854640917449, |
| "learning_rate": 8.870255906723329e-07, |
| "loss": 0.4878, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.5504201680672269, |
| "grad_norm": 8.104615867570066, |
| "learning_rate": 8.857962643693578e-07, |
| "loss": 0.4979, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.5508021390374331, |
| "grad_norm": 4.602071585566376, |
| "learning_rate": 8.845671129119386e-07, |
| "loss": 0.4816, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.5511841100076394, |
| "grad_norm": 8.216271780031185, |
| "learning_rate": 8.833381381819024e-07, |
| "loss": 0.497, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.5515660809778457, |
| "grad_norm": 3.464263767454113, |
| "learning_rate": 8.821093420608053e-07, |
| "loss": 0.4295, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.551948051948052, |
| "grad_norm": 2.981673706352068, |
| "learning_rate": 8.808807264299308e-07, |
| "loss": 0.491, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.5523300229182582, |
| "grad_norm": 7.051563396141812, |
| "learning_rate": 8.796522931702849e-07, |
| "loss": 0.438, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.5527119938884645, |
| "grad_norm": 2.536412119372569, |
| "learning_rate": 8.784240441625952e-07, |
| "loss": 0.4295, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.5530939648586708, |
| "grad_norm": 3.0183629329186408, |
| "learning_rate": 8.771959812873074e-07, |
| "loss": 0.5156, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.553475935828877, |
| "grad_norm": 4.3370251554163906, |
| "learning_rate": 8.759681064245813e-07, |
| "loss": 0.4973, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.5538579067990833, |
| "grad_norm": 3.2729161328047645, |
| "learning_rate": 8.747404214542901e-07, |
| "loss": 0.4833, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.5542398777692895, |
| "grad_norm": 16.05139909702016, |
| "learning_rate": 8.735129282560146e-07, |
| "loss": 0.5091, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.5546218487394958, |
| "grad_norm": 2.8023413754370843, |
| "learning_rate": 8.722856287090436e-07, |
| "loss": 0.5034, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.555003819709702, |
| "grad_norm": 5.684720848689829, |
| "learning_rate": 8.710585246923692e-07, |
| "loss": 0.4611, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.5553857906799083, |
| "grad_norm": 3.150330041187914, |
| "learning_rate": 8.698316180846828e-07, |
| "loss": 0.5577, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.5557677616501145, |
| "grad_norm": 10.348445852541706, |
| "learning_rate": 8.686049107643749e-07, |
| "loss": 0.4978, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.5561497326203209, |
| "grad_norm": 4.9387801550507575, |
| "learning_rate": 8.673784046095311e-07, |
| "loss": 0.5367, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.5565317035905272, |
| "grad_norm": 2.9088678231936083, |
| "learning_rate": 8.661521014979276e-07, |
| "loss": 0.4076, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.5569136745607334, |
| "grad_norm": 4.516298244096117, |
| "learning_rate": 8.649260033070307e-07, |
| "loss": 0.5148, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.5572956455309397, |
| "grad_norm": 2.5243440271555135, |
| "learning_rate": 8.637001119139936e-07, |
| "loss": 0.5405, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.5576776165011459, |
| "grad_norm": 7.64792718611458, |
| "learning_rate": 8.624744291956509e-07, |
| "loss": 0.5286, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.5580595874713522, |
| "grad_norm": 4.003528589093513, |
| "learning_rate": 8.612489570285202e-07, |
| "loss": 0.4901, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.5584415584415584, |
| "grad_norm": 14.095823837542111, |
| "learning_rate": 8.600236972887944e-07, |
| "loss": 0.5361, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.5588235294117647, |
| "grad_norm": 3.0189204387992765, |
| "learning_rate": 8.587986518523427e-07, |
| "loss": 0.5517, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.5592055003819709, |
| "grad_norm": 5.732847087739884, |
| "learning_rate": 8.575738225947062e-07, |
| "loss": 0.5817, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.5595874713521772, |
| "grad_norm": 3.222690739509161, |
| "learning_rate": 8.563492113910938e-07, |
| "loss": 0.5404, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.5599694423223835, |
| "grad_norm": 4.7202010508339365, |
| "learning_rate": 8.551248201163817e-07, |
| "loss": 0.5231, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.5603514132925898, |
| "grad_norm": 3.7328063733693546, |
| "learning_rate": 8.539006506451096e-07, |
| "loss": 0.5047, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.5607333842627961, |
| "grad_norm": 5.0152413524387995, |
| "learning_rate": 8.52676704851476e-07, |
| "loss": 0.439, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.5611153552330023, |
| "grad_norm": 4.647406415540796, |
| "learning_rate": 8.514529846093387e-07, |
| "loss": 0.5324, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.5614973262032086, |
| "grad_norm": 4.6967314628587316, |
| "learning_rate": 8.502294917922097e-07, |
| "loss": 0.535, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.5618792971734148, |
| "grad_norm": 2.7305578834315853, |
| "learning_rate": 8.49006228273252e-07, |
| "loss": 0.5271, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.5622612681436211, |
| "grad_norm": 3.0620635795628792, |
| "learning_rate": 8.477831959252787e-07, |
| "loss": 0.517, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.5626432391138273, |
| "grad_norm": 2.5787028218139314, |
| "learning_rate": 8.465603966207479e-07, |
| "loss": 0.5051, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.5630252100840336, |
| "grad_norm": 4.592223182467329, |
| "learning_rate": 8.453378322317616e-07, |
| "loss": 0.4816, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.5634071810542399, |
| "grad_norm": 5.89203087141326, |
| "learning_rate": 8.441155046300625e-07, |
| "loss": 0.5154, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.5637891520244461, |
| "grad_norm": 3.435600065604833, |
| "learning_rate": 8.428934156870295e-07, |
| "loss": 0.4965, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.5641711229946524, |
| "grad_norm": 4.253808352654484, |
| "learning_rate": 8.416715672736774e-07, |
| "loss": 0.4058, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.5645530939648586, |
| "grad_norm": 2.9270004141326784, |
| "learning_rate": 8.404499612606524e-07, |
| "loss": 0.5441, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.564935064935065, |
| "grad_norm": 3.7766543211689836, |
| "learning_rate": 8.392285995182287e-07, |
| "loss": 0.5681, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.5653170359052712, |
| "grad_norm": 4.8245193053143485, |
| "learning_rate": 8.380074839163081e-07, |
| "loss": 0.4787, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.5656990068754775, |
| "grad_norm": 45.03903991958585, |
| "learning_rate": 8.367866163244137e-07, |
| "loss": 0.5358, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.5660809778456837, |
| "grad_norm": 2.9278826442105803, |
| "learning_rate": 8.355659986116906e-07, |
| "loss": 0.4661, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.56646294881589, |
| "grad_norm": 4.0805391091478205, |
| "learning_rate": 8.343456326469008e-07, |
| "loss": 0.5673, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.5668449197860963, |
| "grad_norm": 2.307755692837947, |
| "learning_rate": 8.331255202984201e-07, |
| "loss": 0.4078, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.5672268907563025, |
| "grad_norm": 4.828143841675473, |
| "learning_rate": 8.319056634342371e-07, |
| "loss": 0.5266, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.5676088617265088, |
| "grad_norm": 3.561454752977373, |
| "learning_rate": 8.306860639219487e-07, |
| "loss": 0.4747, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.567990832696715, |
| "grad_norm": 5.006142807023533, |
| "learning_rate": 8.294667236287574e-07, |
| "loss": 0.5029, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.5683728036669213, |
| "grad_norm": 7.7399372091191285, |
| "learning_rate": 8.282476444214699e-07, |
| "loss": 0.5099, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.5687547746371275, |
| "grad_norm": 4.596170322745298, |
| "learning_rate": 8.270288281664924e-07, |
| "loss": 0.4914, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.5691367456073338, |
| "grad_norm": 3.191470484566011, |
| "learning_rate": 8.258102767298287e-07, |
| "loss": 0.6249, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.56951871657754, |
| "grad_norm": 2.7721116349072727, |
| "learning_rate": 8.245919919770771e-07, |
| "loss": 0.4786, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.5699006875477464, |
| "grad_norm": 10.296186342257238, |
| "learning_rate": 8.233739757734278e-07, |
| "loss": 0.473, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.5702826585179527, |
| "grad_norm": 8.707234167735932, |
| "learning_rate": 8.221562299836596e-07, |
| "loss": 0.5692, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.5706646294881589, |
| "grad_norm": 7.119849279027865, |
| "learning_rate": 8.209387564721375e-07, |
| "loss": 0.5222, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.5710466004583652, |
| "grad_norm": 3.271163811284525, |
| "learning_rate": 8.197215571028097e-07, |
| "loss": 0.4784, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 3.474129399231328, |
| "learning_rate": 8.185046337392041e-07, |
| "loss": 0.486, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.5718105423987777, |
| "grad_norm": 7.702458839905247, |
| "learning_rate": 8.172879882444272e-07, |
| "loss": 0.466, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.5721925133689839, |
| "grad_norm": 2.3986718979238626, |
| "learning_rate": 8.160716224811587e-07, |
| "loss": 0.4286, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.5725744843391902, |
| "grad_norm": 2.074505714324364, |
| "learning_rate": 8.148555383116513e-07, |
| "loss": 0.4688, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.5729564553093965, |
| "grad_norm": 3.4524797955782005, |
| "learning_rate": 8.136397375977257e-07, |
| "loss": 0.5067, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5733384262796027, |
| "grad_norm": 4.116027578701245, |
| "learning_rate": 8.124242222007688e-07, |
| "loss": 0.441, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.573720397249809, |
| "grad_norm": 7.8122892836416336, |
| "learning_rate": 8.112089939817311e-07, |
| "loss": 0.524, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.5741023682200153, |
| "grad_norm": 2.8876612966433948, |
| "learning_rate": 8.099940548011229e-07, |
| "loss": 0.4565, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.5744843391902216, |
| "grad_norm": 2.944620818559759, |
| "learning_rate": 8.087794065190121e-07, |
| "loss": 0.4721, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.5748663101604278, |
| "grad_norm": 3.3277066539267253, |
| "learning_rate": 8.075650509950222e-07, |
| "loss": 0.4696, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.5752482811306341, |
| "grad_norm": 10.689471168670488, |
| "learning_rate": 8.063509900883263e-07, |
| "loss": 0.493, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.5756302521008403, |
| "grad_norm": 2.7181430629970813, |
| "learning_rate": 8.051372256576487e-07, |
| "loss": 0.4505, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.5760122230710466, |
| "grad_norm": 3.3918780563240767, |
| "learning_rate": 8.03923759561259e-07, |
| "loss": 0.4509, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.5763941940412529, |
| "grad_norm": 4.488221465490028, |
| "learning_rate": 8.027105936569693e-07, |
| "loss": 0.4735, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.5767761650114591, |
| "grad_norm": 3.135011104593734, |
| "learning_rate": 8.014977298021329e-07, |
| "loss": 0.4989, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.5771581359816654, |
| "grad_norm": 5.773036750506569, |
| "learning_rate": 8.002851698536412e-07, |
| "loss": 0.5428, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.5775401069518716, |
| "grad_norm": 5.036394063568907, |
| "learning_rate": 7.990729156679185e-07, |
| "loss": 0.5348, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.577922077922078, |
| "grad_norm": 3.6336645286975044, |
| "learning_rate": 7.978609691009232e-07, |
| "loss": 0.4651, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.5783040488922842, |
| "grad_norm": 3.638075424158412, |
| "learning_rate": 7.966493320081408e-07, |
| "loss": 0.5198, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.5786860198624905, |
| "grad_norm": 6.900345004815312, |
| "learning_rate": 7.95438006244584e-07, |
| "loss": 0.5217, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.5790679908326967, |
| "grad_norm": 3.9858375649847186, |
| "learning_rate": 7.942269936647893e-07, |
| "loss": 0.5086, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.579449961802903, |
| "grad_norm": 3.8240778432496136, |
| "learning_rate": 7.930162961228122e-07, |
| "loss": 0.441, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.5798319327731093, |
| "grad_norm": 3.4529948281678147, |
| "learning_rate": 7.918059154722273e-07, |
| "loss": 0.4703, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.5802139037433155, |
| "grad_norm": 3.939480421094444, |
| "learning_rate": 7.905958535661239e-07, |
| "loss": 0.5244, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.5805958747135218, |
| "grad_norm": 2.1763912574345516, |
| "learning_rate": 7.89386112257102e-07, |
| "loss": 0.4816, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.580977845683728, |
| "grad_norm": 2.6665121976680437, |
| "learning_rate": 7.881766933972722e-07, |
| "loss": 0.4728, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.5813598166539343, |
| "grad_norm": 2.777402183353908, |
| "learning_rate": 7.869675988382514e-07, |
| "loss": 0.4844, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.5817417876241405, |
| "grad_norm": 3.588539547594645, |
| "learning_rate": 7.857588304311584e-07, |
| "loss": 0.4338, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.5821237585943468, |
| "grad_norm": 4.32282386026526, |
| "learning_rate": 7.84550390026615e-07, |
| "loss": 0.4775, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.582505729564553, |
| "grad_norm": 2.931593889458715, |
| "learning_rate": 7.833422794747386e-07, |
| "loss": 0.4569, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.5828877005347594, |
| "grad_norm": 13.694014402650298, |
| "learning_rate": 7.82134500625143e-07, |
| "loss": 0.4663, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.5832696715049657, |
| "grad_norm": 8.00099392041001, |
| "learning_rate": 7.809270553269344e-07, |
| "loss": 0.5448, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.5836516424751719, |
| "grad_norm": 4.911152661343212, |
| "learning_rate": 7.797199454287065e-07, |
| "loss": 0.5383, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.5840336134453782, |
| "grad_norm": 3.2819713397230283, |
| "learning_rate": 7.785131727785414e-07, |
| "loss": 0.4545, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.5844155844155844, |
| "grad_norm": 3.2090755873591106, |
| "learning_rate": 7.773067392240047e-07, |
| "loss": 0.4786, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.5847975553857907, |
| "grad_norm": 2.841300924397596, |
| "learning_rate": 7.761006466121412e-07, |
| "loss": 0.4764, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.5851795263559969, |
| "grad_norm": 2.924587185056088, |
| "learning_rate": 7.748948967894754e-07, |
| "loss": 0.4458, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.5855614973262032, |
| "grad_norm": 2.535663564324216, |
| "learning_rate": 7.73689491602007e-07, |
| "loss": 0.4888, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.5859434682964094, |
| "grad_norm": 6.512219722601765, |
| "learning_rate": 7.724844328952064e-07, |
| "loss": 0.4775, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.5863254392666157, |
| "grad_norm": 4.974082818782232, |
| "learning_rate": 7.712797225140158e-07, |
| "loss": 0.4751, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.586707410236822, |
| "grad_norm": 4.7343204793893845, |
| "learning_rate": 7.700753623028418e-07, |
| "loss": 0.4987, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.5870893812070282, |
| "grad_norm": 2.799580518820704, |
| "learning_rate": 7.688713541055567e-07, |
| "loss": 0.4883, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.5874713521772346, |
| "grad_norm": 3.2897436593561826, |
| "learning_rate": 7.676676997654936e-07, |
| "loss": 0.495, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.5878533231474408, |
| "grad_norm": 9.50610746090765, |
| "learning_rate": 7.664644011254425e-07, |
| "loss": 0.5041, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 3.969125093077795, |
| "learning_rate": 7.652614600276504e-07, |
| "loss": 0.542, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.5886172650878533, |
| "grad_norm": 6.534517882111242, |
| "learning_rate": 7.640588783138165e-07, |
| "loss": 0.52, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.5889992360580596, |
| "grad_norm": 3.4968680728699995, |
| "learning_rate": 7.62856657825089e-07, |
| "loss": 0.4587, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.5893812070282658, |
| "grad_norm": 3.5279074975526816, |
| "learning_rate": 7.616548004020642e-07, |
| "loss": 0.5227, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.5897631779984721, |
| "grad_norm": 4.029187714342275, |
| "learning_rate": 7.604533078847815e-07, |
| "loss": 0.4126, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.5901451489686784, |
| "grad_norm": 6.315149413504649, |
| "learning_rate": 7.592521821127222e-07, |
| "loss": 0.4608, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.5905271199388846, |
| "grad_norm": 3.0511915016198174, |
| "learning_rate": 7.580514249248064e-07, |
| "loss": 0.5158, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.5909090909090909, |
| "grad_norm": 4.288807788346786, |
| "learning_rate": 7.568510381593891e-07, |
| "loss": 0.5374, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.5912910618792971, |
| "grad_norm": 2.9619735092858104, |
| "learning_rate": 7.556510236542591e-07, |
| "loss": 0.5043, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.5916730328495035, |
| "grad_norm": 3.853484044967723, |
| "learning_rate": 7.544513832466343e-07, |
| "loss": 0.554, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.5920550038197097, |
| "grad_norm": 3.4507767530399764, |
| "learning_rate": 7.532521187731607e-07, |
| "loss": 0.4707, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.592436974789916, |
| "grad_norm": 2.8676870221969106, |
| "learning_rate": 7.520532320699079e-07, |
| "loss": 0.4643, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.5928189457601223, |
| "grad_norm": 3.4993934273529423, |
| "learning_rate": 7.508547249723683e-07, |
| "loss": 0.484, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.5932009167303285, |
| "grad_norm": 4.974887413665064, |
| "learning_rate": 7.49656599315452e-07, |
| "loss": 0.4409, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.5935828877005348, |
| "grad_norm": 3.0087772046730805, |
| "learning_rate": 7.484588569334857e-07, |
| "loss": 0.4802, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.593964858670741, |
| "grad_norm": 3.556085540496316, |
| "learning_rate": 7.472614996602094e-07, |
| "loss": 0.458, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.5943468296409473, |
| "grad_norm": 2.4299546193349637, |
| "learning_rate": 7.460645293287727e-07, |
| "loss": 0.509, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.5947288006111535, |
| "grad_norm": 3.3761103144904068, |
| "learning_rate": 7.448679477717339e-07, |
| "loss": 0.4775, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.5951107715813598, |
| "grad_norm": 3.189320341493309, |
| "learning_rate": 7.436717568210555e-07, |
| "loss": 0.5356, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.595492742551566, |
| "grad_norm": 3.013388009140299, |
| "learning_rate": 7.424759583081016e-07, |
| "loss": 0.4728, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.5958747135217723, |
| "grad_norm": 3.046578423463988, |
| "learning_rate": 7.412805540636366e-07, |
| "loss": 0.5452, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.5962566844919787, |
| "grad_norm": 2.2952147418741977, |
| "learning_rate": 7.4008554591782e-07, |
| "loss": 0.4767, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.5966386554621849, |
| "grad_norm": 7.486369396821942, |
| "learning_rate": 7.388909357002056e-07, |
| "loss": 0.473, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.5970206264323912, |
| "grad_norm": 3.1043359849327836, |
| "learning_rate": 7.376967252397384e-07, |
| "loss": 0.4619, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.5974025974025974, |
| "grad_norm": 4.387374665190372, |
| "learning_rate": 7.365029163647498e-07, |
| "loss": 0.4387, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.5977845683728037, |
| "grad_norm": 5.097455794718711, |
| "learning_rate": 7.353095109029586e-07, |
| "loss": 0.4869, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.5981665393430099, |
| "grad_norm": 4.110723581933108, |
| "learning_rate": 7.341165106814635e-07, |
| "loss": 0.5648, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.5985485103132162, |
| "grad_norm": 3.0250820237041167, |
| "learning_rate": 7.329239175267447e-07, |
| "loss": 0.4695, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.5989304812834224, |
| "grad_norm": 2.9943913573090417, |
| "learning_rate": 7.31731733264659e-07, |
| "loss": 0.4694, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.5993124522536287, |
| "grad_norm": 4.117308546393739, |
| "learning_rate": 7.305399597204357e-07, |
| "loss": 0.435, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.599694423223835, |
| "grad_norm": 2.55250388950113, |
| "learning_rate": 7.293485987186768e-07, |
| "loss": 0.4719, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.6000763941940412, |
| "grad_norm": 5.6760675149597395, |
| "learning_rate": 7.281576520833527e-07, |
| "loss": 0.4344, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.6004583651642476, |
| "grad_norm": 2.6623917208637913, |
| "learning_rate": 7.26967121637798e-07, |
| "loss": 0.4687, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.6008403361344538, |
| "grad_norm": 4.649288674104955, |
| "learning_rate": 7.257770092047113e-07, |
| "loss": 0.4796, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.6012223071046601, |
| "grad_norm": 4.608891882942033, |
| "learning_rate": 7.245873166061516e-07, |
| "loss": 0.5011, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.6016042780748663, |
| "grad_norm": 4.050761013211949, |
| "learning_rate": 7.233980456635336e-07, |
| "loss": 0.526, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.6019862490450726, |
| "grad_norm": 2.9653108536504873, |
| "learning_rate": 7.222091981976279e-07, |
| "loss": 0.5106, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.6023682200152788, |
| "grad_norm": 4.626014268253724, |
| "learning_rate": 7.210207760285559e-07, |
| "loss": 0.551, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.6027501909854851, |
| "grad_norm": 4.259716690883014, |
| "learning_rate": 7.198327809757881e-07, |
| "loss": 0.505, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.6031321619556914, |
| "grad_norm": 6.6601277749781325, |
| "learning_rate": 7.186452148581416e-07, |
| "loss": 0.5592, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.6035141329258976, |
| "grad_norm": 3.968490857184638, |
| "learning_rate": 7.174580794937757e-07, |
| "loss": 0.4628, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.6038961038961039, |
| "grad_norm": 6.682935705440368, |
| "learning_rate": 7.162713767001913e-07, |
| "loss": 0.5647, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.6042780748663101, |
| "grad_norm": 3.108425341167812, |
| "learning_rate": 7.150851082942269e-07, |
| "loss": 0.4292, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.6046600458365164, |
| "grad_norm": 3.615216176361843, |
| "learning_rate": 7.138992760920547e-07, |
| "loss": 0.4771, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.6050420168067226, |
| "grad_norm": 2.41401307253289, |
| "learning_rate": 7.127138819091807e-07, |
| "loss": 0.4679, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.605423987776929, |
| "grad_norm": 2.975265304410035, |
| "learning_rate": 7.115289275604399e-07, |
| "loss": 0.5113, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.6058059587471352, |
| "grad_norm": 2.4670213978756057, |
| "learning_rate": 7.103444148599928e-07, |
| "loss": 0.4444, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.6061879297173415, |
| "grad_norm": 2.721859542053092, |
| "learning_rate": 7.091603456213256e-07, |
| "loss": 0.494, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.6065699006875478, |
| "grad_norm": 3.1899384268225637, |
| "learning_rate": 7.079767216572435e-07, |
| "loss": 0.5085, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.606951871657754, |
| "grad_norm": 5.541235083933573, |
| "learning_rate": 7.067935447798715e-07, |
| "loss": 0.4959, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.6073338426279603, |
| "grad_norm": 2.276351070925125, |
| "learning_rate": 7.056108168006501e-07, |
| "loss": 0.416, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.6077158135981665, |
| "grad_norm": 5.658392749534633, |
| "learning_rate": 7.044285395303311e-07, |
| "loss": 0.5796, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.6080977845683728, |
| "grad_norm": 6.379675852008344, |
| "learning_rate": 7.032467147789777e-07, |
| "loss": 0.5095, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.608479755538579, |
| "grad_norm": 4.131299850365157, |
| "learning_rate": 7.020653443559603e-07, |
| "loss": 0.5488, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.6088617265087853, |
| "grad_norm": 2.9944028762199797, |
| "learning_rate": 7.008844300699524e-07, |
| "loss": 0.4771, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.6092436974789915, |
| "grad_norm": 5.575470624436518, |
| "learning_rate": 6.997039737289306e-07, |
| "loss": 0.4635, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.6096256684491979, |
| "grad_norm": 3.8486467232113863, |
| "learning_rate": 6.9852397714017e-07, |
| "loss": 0.4916, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.6100076394194042, |
| "grad_norm": 11.050978921728946, |
| "learning_rate": 6.973444421102407e-07, |
| "loss": 0.4811, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.6103896103896104, |
| "grad_norm": 4.822958194483858, |
| "learning_rate": 6.961653704450083e-07, |
| "loss": 0.5356, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.6107715813598167, |
| "grad_norm": 2.817834665524706, |
| "learning_rate": 6.949867639496266e-07, |
| "loss": 0.4756, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.6111535523300229, |
| "grad_norm": 3.1957213238441144, |
| "learning_rate": 6.938086244285389e-07, |
| "loss": 0.4073, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.6115355233002292, |
| "grad_norm": 5.636868095973053, |
| "learning_rate": 6.926309536854736e-07, |
| "loss": 0.4167, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.6119174942704354, |
| "grad_norm": 30.090375746305387, |
| "learning_rate": 6.914537535234398e-07, |
| "loss": 0.4582, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.6122994652406417, |
| "grad_norm": 5.184037798420374, |
| "learning_rate": 6.902770257447281e-07, |
| "loss": 0.4662, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.612681436210848, |
| "grad_norm": 3.671632410757147, |
| "learning_rate": 6.891007721509044e-07, |
| "loss": 0.4836, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.6130634071810542, |
| "grad_norm": 2.950958986784177, |
| "learning_rate": 6.879249945428096e-07, |
| "loss": 0.5001, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.6134453781512605, |
| "grad_norm": 4.3157545193073, |
| "learning_rate": 6.867496947205552e-07, |
| "loss": 0.5153, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.6138273491214667, |
| "grad_norm": 2.250749577401553, |
| "learning_rate": 6.855748744835215e-07, |
| "loss": 0.4348, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.6142093200916731, |
| "grad_norm": 2.2917709378525064, |
| "learning_rate": 6.844005356303548e-07, |
| "loss": 0.4505, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.6145912910618793, |
| "grad_norm": 2.438149237331104, |
| "learning_rate": 6.83226679958964e-07, |
| "loss": 0.43, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.6149732620320856, |
| "grad_norm": 3.02391998302188, |
| "learning_rate": 6.820533092665184e-07, |
| "loss": 0.4346, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.6153552330022918, |
| "grad_norm": 4.366893675828903, |
| "learning_rate": 6.808804253494447e-07, |
| "loss": 0.4641, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.6157372039724981, |
| "grad_norm": 3.2359788318982434, |
| "learning_rate": 6.797080300034246e-07, |
| "loss": 0.4959, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.6161191749427044, |
| "grad_norm": 4.715978801891636, |
| "learning_rate": 6.785361250233916e-07, |
| "loss": 0.5739, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.6165011459129106, |
| "grad_norm": 2.038495378912477, |
| "learning_rate": 6.773647122035282e-07, |
| "loss": 0.4311, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.6168831168831169, |
| "grad_norm": 13.318932449928443, |
| "learning_rate": 6.761937933372646e-07, |
| "loss": 0.4765, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.6172650878533231, |
| "grad_norm": 3.3038029184889157, |
| "learning_rate": 6.750233702172725e-07, |
| "loss": 0.4834, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.6176470588235294, |
| "grad_norm": 3.961278587601363, |
| "learning_rate": 6.738534446354671e-07, |
| "loss": 0.517, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.6180290297937356, |
| "grad_norm": 3.3010476421667527, |
| "learning_rate": 6.726840183830005e-07, |
| "loss": 0.4803, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.618411000763942, |
| "grad_norm": 4.1461864571109555, |
| "learning_rate": 6.7151509325026e-07, |
| "loss": 0.504, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.6187929717341482, |
| "grad_norm": 6.0874786021177, |
| "learning_rate": 6.703466710268672e-07, |
| "loss": 0.4243, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.6191749427043545, |
| "grad_norm": 2.844071999977886, |
| "learning_rate": 6.691787535016719e-07, |
| "loss": 0.5363, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.6195569136745608, |
| "grad_norm": 7.493734331551172, |
| "learning_rate": 6.680113424627525e-07, |
| "loss": 0.4752, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.619938884644767, |
| "grad_norm": 5.034722454254198, |
| "learning_rate": 6.668444396974118e-07, |
| "loss": 0.4612, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.6203208556149733, |
| "grad_norm": 4.666207523543107, |
| "learning_rate": 6.656780469921739e-07, |
| "loss": 0.4451, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.6207028265851795, |
| "grad_norm": 4.324324403295682, |
| "learning_rate": 6.645121661327823e-07, |
| "loss": 0.5515, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.6210847975553858, |
| "grad_norm": 10.74233453054578, |
| "learning_rate": 6.633467989041974e-07, |
| "loss": 0.4634, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.621466768525592, |
| "grad_norm": 3.9368757976763886, |
| "learning_rate": 6.621819470905919e-07, |
| "loss": 0.4891, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.6218487394957983, |
| "grad_norm": 4.195890839175386, |
| "learning_rate": 6.610176124753512e-07, |
| "loss": 0.5334, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.6222307104660045, |
| "grad_norm": 7.033789239854464, |
| "learning_rate": 6.598537968410669e-07, |
| "loss": 0.4981, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.6226126814362108, |
| "grad_norm": 3.670753713023111, |
| "learning_rate": 6.586905019695374e-07, |
| "loss": 0.4785, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.6229946524064172, |
| "grad_norm": 6.79374173212643, |
| "learning_rate": 6.575277296417641e-07, |
| "loss": 0.5383, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.6233766233766234, |
| "grad_norm": 2.961589738049803, |
| "learning_rate": 6.563654816379467e-07, |
| "loss": 0.5909, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.6237585943468297, |
| "grad_norm": 4.548882294306666, |
| "learning_rate": 6.552037597374835e-07, |
| "loss": 0.528, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.6241405653170359, |
| "grad_norm": 5.588629780486591, |
| "learning_rate": 6.540425657189679e-07, |
| "loss": 0.5701, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.6245225362872422, |
| "grad_norm": 3.2222767903382583, |
| "learning_rate": 6.52881901360183e-07, |
| "loss": 0.5252, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.6249045072574484, |
| "grad_norm": 2.708765525352287, |
| "learning_rate": 6.517217684381027e-07, |
| "loss": 0.4311, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.6252864782276547, |
| "grad_norm": 2.4373598394053366, |
| "learning_rate": 6.505621687288874e-07, |
| "loss": 0.4887, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.6256684491978609, |
| "grad_norm": 2.35629298170874, |
| "learning_rate": 6.494031040078796e-07, |
| "loss": 0.4235, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.6260504201680672, |
| "grad_norm": 5.006114811608024, |
| "learning_rate": 6.482445760496047e-07, |
| "loss": 0.4431, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.6264323911382735, |
| "grad_norm": 3.9758100753984205, |
| "learning_rate": 6.470865866277643e-07, |
| "loss": 0.4922, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.6268143621084797, |
| "grad_norm": 3.942533295869188, |
| "learning_rate": 6.459291375152371e-07, |
| "loss": 0.5151, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.627196333078686, |
| "grad_norm": 5.7439273904282935, |
| "learning_rate": 6.447722304840742e-07, |
| "loss": 0.4903, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.6275783040488923, |
| "grad_norm": 6.767150703832732, |
| "learning_rate": 6.436158673054959e-07, |
| "loss": 0.5221, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.6279602750190986, |
| "grad_norm": 3.4573716609857184, |
| "learning_rate": 6.424600497498909e-07, |
| "loss": 0.4903, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.6283422459893048, |
| "grad_norm": 4.3338546356819245, |
| "learning_rate": 6.413047795868128e-07, |
| "loss": 0.5131, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.6287242169595111, |
| "grad_norm": 2.337049188668566, |
| "learning_rate": 6.401500585849755e-07, |
| "loss": 0.4417, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.6291061879297173, |
| "grad_norm": 4.623304772516744, |
| "learning_rate": 6.389958885122537e-07, |
| "loss": 0.513, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.6294881588999236, |
| "grad_norm": 2.825474625036937, |
| "learning_rate": 6.378422711356784e-07, |
| "loss": 0.5328, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.6298701298701299, |
| "grad_norm": 3.183544201518079, |
| "learning_rate": 6.366892082214335e-07, |
| "loss": 0.4569, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.6302521008403361, |
| "grad_norm": 3.6534350262559063, |
| "learning_rate": 6.355367015348554e-07, |
| "loss": 0.5391, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.6306340718105424, |
| "grad_norm": 3.38465233341402, |
| "learning_rate": 6.343847528404272e-07, |
| "loss": 0.5551, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.6310160427807486, |
| "grad_norm": 5.682440315836746, |
| "learning_rate": 6.332333639017793e-07, |
| "loss": 0.4981, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.631398013750955, |
| "grad_norm": 3.1677497208065373, |
| "learning_rate": 6.320825364816849e-07, |
| "loss": 0.5261, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.6317799847211611, |
| "grad_norm": 4.803410013797171, |
| "learning_rate": 6.309322723420562e-07, |
| "loss": 0.5549, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.6321619556913675, |
| "grad_norm": 5.027427394401916, |
| "learning_rate": 6.297825732439443e-07, |
| "loss": 0.5482, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.6325439266615738, |
| "grad_norm": 3.8853463499253422, |
| "learning_rate": 6.286334409475355e-07, |
| "loss": 0.501, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.63292589763178, |
| "grad_norm": 3.8428389865824224, |
| "learning_rate": 6.274848772121466e-07, |
| "loss": 0.5422, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.6333078686019863, |
| "grad_norm": 2.731374604475085, |
| "learning_rate": 6.263368837962257e-07, |
| "loss": 0.455, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.6336898395721925, |
| "grad_norm": 3.4802534529869664, |
| "learning_rate": 6.251894624573471e-07, |
| "loss": 0.473, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.6340718105423988, |
| "grad_norm": 2.9929345370113176, |
| "learning_rate": 6.240426149522089e-07, |
| "loss": 0.5272, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.634453781512605, |
| "grad_norm": 3.861558519836529, |
| "learning_rate": 6.228963430366314e-07, |
| "loss": 0.533, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.6348357524828113, |
| "grad_norm": 4.272779799163776, |
| "learning_rate": 6.217506484655525e-07, |
| "loss": 0.5985, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.6352177234530175, |
| "grad_norm": 4.963715926584021, |
| "learning_rate": 6.206055329930277e-07, |
| "loss": 0.4597, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.6355996944232238, |
| "grad_norm": 7.03579269952758, |
| "learning_rate": 6.19460998372225e-07, |
| "loss": 0.449, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.6359816653934302, |
| "grad_norm": 3.429746839420197, |
| "learning_rate": 6.18317046355423e-07, |
| "loss": 0.4666, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.6363636363636364, |
| "grad_norm": 4.840340168121579, |
| "learning_rate": 6.171736786940086e-07, |
| "loss": 0.4849, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.6367456073338427, |
| "grad_norm": 3.248146554075291, |
| "learning_rate": 6.160308971384743e-07, |
| "loss": 0.4846, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.6371275783040489, |
| "grad_norm": 6.126038353964562, |
| "learning_rate": 6.148887034384151e-07, |
| "loss": 0.4926, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.6375095492742552, |
| "grad_norm": 3.4897863205009316, |
| "learning_rate": 6.137470993425255e-07, |
| "loss": 0.4311, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.6378915202444614, |
| "grad_norm": 3.7539254782155225, |
| "learning_rate": 6.12606086598598e-07, |
| "loss": 0.4978, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.6382734912146677, |
| "grad_norm": 3.6834259647704295, |
| "learning_rate": 6.114656669535195e-07, |
| "loss": 0.4303, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.6386554621848739, |
| "grad_norm": 2.463752930446396, |
| "learning_rate": 6.103258421532688e-07, |
| "loss": 0.4979, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.6390374331550802, |
| "grad_norm": 5.572518032338645, |
| "learning_rate": 6.091866139429141e-07, |
| "loss": 0.5014, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.6394194041252865, |
| "grad_norm": 3.4711321697402036, |
| "learning_rate": 6.080479840666099e-07, |
| "loss": 0.4709, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.6398013750954927, |
| "grad_norm": 13.368561355412897, |
| "learning_rate": 6.069099542675955e-07, |
| "loss": 0.5457, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.640183346065699, |
| "grad_norm": 3.860804853293634, |
| "learning_rate": 6.057725262881901e-07, |
| "loss": 0.5455, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.6405653170359052, |
| "grad_norm": 13.287050371437967, |
| "learning_rate": 6.046357018697927e-07, |
| "loss": 0.4535, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.6409472880061116, |
| "grad_norm": 3.2079302137607826, |
| "learning_rate": 6.034994827528785e-07, |
| "loss": 0.4981, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.6413292589763178, |
| "grad_norm": 2.829754729501561, |
| "learning_rate": 6.023638706769943e-07, |
| "loss": 0.4729, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.6417112299465241, |
| "grad_norm": 2.022832894555702, |
| "learning_rate": 6.012288673807595e-07, |
| "loss": 0.4926, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.6420932009167303, |
| "grad_norm": 2.778956787238978, |
| "learning_rate": 6.000944746018596e-07, |
| "loss": 0.4604, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.6424751718869366, |
| "grad_norm": 3.7374003607417228, |
| "learning_rate": 5.989606940770469e-07, |
| "loss": 0.4772, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.6428571428571429, |
| "grad_norm": 2.7171406940786635, |
| "learning_rate": 5.97827527542136e-07, |
| "loss": 0.4831, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.6432391138273491, |
| "grad_norm": 8.127558067294217, |
| "learning_rate": 5.966949767320004e-07, |
| "loss": 0.5173, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.6436210847975554, |
| "grad_norm": 3.480736121288153, |
| "learning_rate": 5.955630433805726e-07, |
| "loss": 0.4912, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.6440030557677616, |
| "grad_norm": 3.8094185592179697, |
| "learning_rate": 5.944317292208389e-07, |
| "loss": 0.5787, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.6443850267379679, |
| "grad_norm": 2.405101421994293, |
| "learning_rate": 5.933010359848374e-07, |
| "loss": 0.4991, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.6447669977081741, |
| "grad_norm": 2.7304298777313787, |
| "learning_rate": 5.921709654036556e-07, |
| "loss": 0.5103, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.6451489686783805, |
| "grad_norm": 4.805287774289694, |
| "learning_rate": 5.910415192074288e-07, |
| "loss": 0.5227, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.6455309396485867, |
| "grad_norm": 4.1700339428573745, |
| "learning_rate": 5.899126991253347e-07, |
| "loss": 0.5112, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.645912910618793, |
| "grad_norm": 10.044750000664395, |
| "learning_rate": 5.887845068855939e-07, |
| "loss": 0.5563, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.6462948815889993, |
| "grad_norm": 3.469159628822302, |
| "learning_rate": 5.876569442154644e-07, |
| "loss": 0.4598, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.6466768525592055, |
| "grad_norm": 10.365553823123852, |
| "learning_rate": 5.865300128412415e-07, |
| "loss": 0.4509, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.6470588235294118, |
| "grad_norm": 4.032808280921916, |
| "learning_rate": 5.85403714488254e-07, |
| "loss": 0.4741, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.647440794499618, |
| "grad_norm": 5.11652523829025, |
| "learning_rate": 5.8427805088086e-07, |
| "loss": 0.4334, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.6478227654698243, |
| "grad_norm": 4.621337953148375, |
| "learning_rate": 5.831530237424477e-07, |
| "loss": 0.4143, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.6482047364400305, |
| "grad_norm": 3.557162790697937, |
| "learning_rate": 5.820286347954302e-07, |
| "loss": 0.4195, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.6485867074102368, |
| "grad_norm": 4.624049852239171, |
| "learning_rate": 5.809048857612427e-07, |
| "loss": 0.4501, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.648968678380443, |
| "grad_norm": 3.929897503465146, |
| "learning_rate": 5.797817783603418e-07, |
| "loss": 0.505, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.6493506493506493, |
| "grad_norm": 3.1054123771508833, |
| "learning_rate": 5.786593143122016e-07, |
| "loss": 0.4928, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.6497326203208557, |
| "grad_norm": 3.6791158790205816, |
| "learning_rate": 5.775374953353105e-07, |
| "loss": 0.4264, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.6501145912910619, |
| "grad_norm": 3.7861539692813144, |
| "learning_rate": 5.764163231471704e-07, |
| "loss": 0.5176, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.6504965622612682, |
| "grad_norm": 3.1921624515477176, |
| "learning_rate": 5.752957994642915e-07, |
| "loss": 0.4663, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.6508785332314744, |
| "grad_norm": 2.7243804580628055, |
| "learning_rate": 5.741759260021925e-07, |
| "loss": 0.4736, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.6512605042016807, |
| "grad_norm": 2.054999205122801, |
| "learning_rate": 5.730567044753964e-07, |
| "loss": 0.4609, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.6516424751718869, |
| "grad_norm": 4.172156644093354, |
| "learning_rate": 5.719381365974272e-07, |
| "loss": 0.4597, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.6520244461420932, |
| "grad_norm": 2.973154086619092, |
| "learning_rate": 5.708202240808088e-07, |
| "loss": 0.5187, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.6524064171122995, |
| "grad_norm": 29.985463499899154, |
| "learning_rate": 5.697029686370625e-07, |
| "loss": 0.4641, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.6527883880825057, |
| "grad_norm": 4.452631031963895, |
| "learning_rate": 5.685863719767019e-07, |
| "loss": 0.54, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.653170359052712, |
| "grad_norm": 70.46307864789232, |
| "learning_rate": 5.674704358092331e-07, |
| "loss": 0.5315, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.6535523300229182, |
| "grad_norm": 3.2095941246382793, |
| "learning_rate": 5.663551618431516e-07, |
| "loss": 0.4697, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.6539343009931246, |
| "grad_norm": 2.7202164393697963, |
| "learning_rate": 5.652405517859372e-07, |
| "loss": 0.5036, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.6543162719633308, |
| "grad_norm": 6.483848952905334, |
| "learning_rate": 5.641266073440553e-07, |
| "loss": 0.4534, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.6546982429335371, |
| "grad_norm": 3.8732081112384886, |
| "learning_rate": 5.630133302229505e-07, |
| "loss": 0.4985, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.6550802139037433, |
| "grad_norm": 3.4979767733986313, |
| "learning_rate": 5.619007221270468e-07, |
| "loss": 0.5346, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.6554621848739496, |
| "grad_norm": 4.709621876577418, |
| "learning_rate": 5.607887847597443e-07, |
| "loss": 0.4209, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.6558441558441559, |
| "grad_norm": 5.503269999420802, |
| "learning_rate": 5.596775198234145e-07, |
| "loss": 0.5286, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.6562261268143621, |
| "grad_norm": 7.195878519228084, |
| "learning_rate": 5.585669290194009e-07, |
| "loss": 0.4672, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.6566080977845684, |
| "grad_norm": 2.365938759646021, |
| "learning_rate": 5.574570140480151e-07, |
| "loss": 0.4023, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.6569900687547746, |
| "grad_norm": 6.8990648285289655, |
| "learning_rate": 5.563477766085325e-07, |
| "loss": 0.4698, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.6573720397249809, |
| "grad_norm": 3.2125838369759263, |
| "learning_rate": 5.552392183991928e-07, |
| "loss": 0.4959, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.6577540106951871, |
| "grad_norm": 5.146019162532558, |
| "learning_rate": 5.541313411171944e-07, |
| "loss": 0.4607, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.6581359816653934, |
| "grad_norm": 3.421511033601826, |
| "learning_rate": 5.530241464586944e-07, |
| "loss": 0.4918, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.6585179526355996, |
| "grad_norm": 2.058183092172318, |
| "learning_rate": 5.519176361188043e-07, |
| "loss": 0.4409, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.658899923605806, |
| "grad_norm": 2.885911311024911, |
| "learning_rate": 5.508118117915874e-07, |
| "loss": 0.5268, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.6592818945760123, |
| "grad_norm": 2.7223463912778105, |
| "learning_rate": 5.497066751700577e-07, |
| "loss": 0.3902, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.6596638655462185, |
| "grad_norm": 4.6121095889796635, |
| "learning_rate": 5.486022279461762e-07, |
| "loss": 0.5185, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.6600458365164248, |
| "grad_norm": 5.173898079109986, |
| "learning_rate": 5.474984718108471e-07, |
| "loss": 0.4709, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.660427807486631, |
| "grad_norm": 2.7477603466877434, |
| "learning_rate": 5.463954084539181e-07, |
| "loss": 0.4943, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.6608097784568373, |
| "grad_norm": 4.093441308970811, |
| "learning_rate": 5.45293039564176e-07, |
| "loss": 0.5402, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.6611917494270435, |
| "grad_norm": 3.9339067289199883, |
| "learning_rate": 5.441913668293434e-07, |
| "loss": 0.4398, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.6615737203972498, |
| "grad_norm": 10.141199390696602, |
| "learning_rate": 5.430903919360783e-07, |
| "loss": 0.5275, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.661955691367456, |
| "grad_norm": 2.5647842976467383, |
| "learning_rate": 5.419901165699693e-07, |
| "loss": 0.4711, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.6623376623376623, |
| "grad_norm": 2.449797827042685, |
| "learning_rate": 5.408905424155345e-07, |
| "loss": 0.4447, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.6627196333078686, |
| "grad_norm": 2.885197674622287, |
| "learning_rate": 5.397916711562194e-07, |
| "loss": 0.4709, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.6631016042780749, |
| "grad_norm": 3.632198309075157, |
| "learning_rate": 5.38693504474391e-07, |
| "loss": 0.4712, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.6634835752482812, |
| "grad_norm": 3.786664182131556, |
| "learning_rate": 5.375960440513396e-07, |
| "loss": 0.5153, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.6638655462184874, |
| "grad_norm": 2.7266680685160916, |
| "learning_rate": 5.364992915672741e-07, |
| "loss": 0.5478, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.6642475171886937, |
| "grad_norm": 5.075336631040644, |
| "learning_rate": 5.354032487013182e-07, |
| "loss": 0.5136, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.6646294881588999, |
| "grad_norm": 4.790177542890202, |
| "learning_rate": 5.343079171315106e-07, |
| "loss": 0.4897, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.6650114591291062, |
| "grad_norm": 4.956713166422229, |
| "learning_rate": 5.332132985348006e-07, |
| "loss": 0.5102, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.6653934300993124, |
| "grad_norm": 3.7351581764257396, |
| "learning_rate": 5.32119394587045e-07, |
| "loss": 0.4859, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.6657754010695187, |
| "grad_norm": 2.3918201666827517, |
| "learning_rate": 5.310262069630083e-07, |
| "loss": 0.4508, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.666157372039725, |
| "grad_norm": 2.618140032137314, |
| "learning_rate": 5.299337373363563e-07, |
| "loss": 0.4534, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.6665393430099312, |
| "grad_norm": 9.82913689806301, |
| "learning_rate": 5.28841987379657e-07, |
| "loss": 0.4735, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.6669213139801375, |
| "grad_norm": 3.2431944892498827, |
| "learning_rate": 5.277509587643764e-07, |
| "loss": 0.4929, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.6673032849503437, |
| "grad_norm": 6.875034105299384, |
| "learning_rate": 5.266606531608752e-07, |
| "loss": 0.4888, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.6676852559205501, |
| "grad_norm": 2.84407410286601, |
| "learning_rate": 5.255710722384084e-07, |
| "loss": 0.4616, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.6680672268907563, |
| "grad_norm": 8.091849851885527, |
| "learning_rate": 5.244822176651203e-07, |
| "loss": 0.5232, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.6684491978609626, |
| "grad_norm": 4.373080856545653, |
| "learning_rate": 5.233940911080442e-07, |
| "loss": 0.4334, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.6688311688311688, |
| "grad_norm": 3.210487779087646, |
| "learning_rate": 5.223066942330987e-07, |
| "loss": 0.571, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.6692131398013751, |
| "grad_norm": 2.9220362455277082, |
| "learning_rate": 5.212200287050841e-07, |
| "loss": 0.4912, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.6695951107715814, |
| "grad_norm": 4.521842055951422, |
| "learning_rate": 5.201340961876828e-07, |
| "loss": 0.551, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.6699770817417876, |
| "grad_norm": 4.503953851845752, |
| "learning_rate": 5.190488983434532e-07, |
| "loss": 0.477, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.6703590527119939, |
| "grad_norm": 2.6664984094438418, |
| "learning_rate": 5.179644368338305e-07, |
| "loss": 0.5084, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.6707410236822001, |
| "grad_norm": 3.8997783146483314, |
| "learning_rate": 5.16880713319121e-07, |
| "loss": 0.5086, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.6711229946524064, |
| "grad_norm": 3.3464111874272717, |
| "learning_rate": 5.157977294585026e-07, |
| "loss": 0.5331, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.6715049656226126, |
| "grad_norm": 2.6820956918396814, |
| "learning_rate": 5.147154869100201e-07, |
| "loss": 0.4768, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.671886936592819, |
| "grad_norm": 42.88709117437861, |
| "learning_rate": 5.136339873305831e-07, |
| "loss": 0.4841, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.6722689075630253, |
| "grad_norm": 5.276124406268703, |
| "learning_rate": 5.125532323759643e-07, |
| "loss": 0.4874, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.6726508785332315, |
| "grad_norm": 3.229214292636012, |
| "learning_rate": 5.114732237007957e-07, |
| "loss": 0.4697, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.6730328495034378, |
| "grad_norm": 2.4631829837493293, |
| "learning_rate": 5.103939629585674e-07, |
| "loss": 0.4387, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.673414820473644, |
| "grad_norm": 3.009261835474479, |
| "learning_rate": 5.093154518016245e-07, |
| "loss": 0.4609, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.6737967914438503, |
| "grad_norm": 4.0578299577285915, |
| "learning_rate": 5.082376918811635e-07, |
| "loss": 0.4755, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.6741787624140565, |
| "grad_norm": 41.392065884095885, |
| "learning_rate": 5.071606848472321e-07, |
| "loss": 0.5212, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.6745607333842628, |
| "grad_norm": 8.192046614990442, |
| "learning_rate": 5.060844323487238e-07, |
| "loss": 0.4903, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.674942704354469, |
| "grad_norm": 4.99344291784436, |
| "learning_rate": 5.050089360333782e-07, |
| "loss": 0.5053, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.6753246753246753, |
| "grad_norm": 9.403045030732164, |
| "learning_rate": 5.039341975477773e-07, |
| "loss": 0.5111, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.6757066462948816, |
| "grad_norm": 3.869313946071402, |
| "learning_rate": 5.028602185373413e-07, |
| "loss": 0.5385, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.6760886172650878, |
| "grad_norm": 3.2039531425470464, |
| "learning_rate": 5.017870006463292e-07, |
| "loss": 0.4524, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.6764705882352942, |
| "grad_norm": 3.348277780697525, |
| "learning_rate": 5.007145455178343e-07, |
| "loss": 0.5328, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.6768525592055004, |
| "grad_norm": 3.535283899258063, |
| "learning_rate": 4.996428547937814e-07, |
| "loss": 0.4704, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.6772345301757067, |
| "grad_norm": 2.9033639422271107, |
| "learning_rate": 4.985719301149261e-07, |
| "loss": 0.5007, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.6776165011459129, |
| "grad_norm": 2.718338904911866, |
| "learning_rate": 4.975017731208508e-07, |
| "loss": 0.4864, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.6779984721161192, |
| "grad_norm": 2.5266371179234235, |
| "learning_rate": 4.964323854499619e-07, |
| "loss": 0.4287, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.6783804430863254, |
| "grad_norm": 2.9392030294015625, |
| "learning_rate": 4.953637687394891e-07, |
| "loss": 0.5271, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.6787624140565317, |
| "grad_norm": 5.303562101025649, |
| "learning_rate": 4.942959246254807e-07, |
| "loss": 0.4197, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.679144385026738, |
| "grad_norm": 3.775507561159994, |
| "learning_rate": 4.932288547428026e-07, |
| "loss": 0.4597, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.6795263559969442, |
| "grad_norm": 3.6320980633601905, |
| "learning_rate": 4.921625607251362e-07, |
| "loss": 0.4709, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.6799083269671505, |
| "grad_norm": 2.104047258371886, |
| "learning_rate": 4.910970442049732e-07, |
| "loss": 0.4822, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.6802902979373567, |
| "grad_norm": 3.5998342666828607, |
| "learning_rate": 4.900323068136165e-07, |
| "loss": 0.5084, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.680672268907563, |
| "grad_norm": 3.882898421757521, |
| "learning_rate": 4.889683501811761e-07, |
| "loss": 0.4657, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.6810542398777693, |
| "grad_norm": 2.623050039734026, |
| "learning_rate": 4.879051759365653e-07, |
| "loss": 0.4837, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.6814362108479756, |
| "grad_norm": 3.2165127332094583, |
| "learning_rate": 4.868427857075013e-07, |
| "loss": 0.4393, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.6818181818181818, |
| "grad_norm": 2.7033619904930237, |
| "learning_rate": 4.857811811204996e-07, |
| "loss": 0.4995, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.6822001527883881, |
| "grad_norm": 4.353397989819022, |
| "learning_rate": 4.847203638008735e-07, |
| "loss": 0.5109, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.6825821237585944, |
| "grad_norm": 2.745760901165819, |
| "learning_rate": 4.836603353727316e-07, |
| "loss": 0.4754, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.6829640947288006, |
| "grad_norm": 7.346443776679196, |
| "learning_rate": 4.826010974589731e-07, |
| "loss": 0.4906, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.6833460656990069, |
| "grad_norm": 2.3502944826903223, |
| "learning_rate": 4.815426516812883e-07, |
| "loss": 0.4692, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.6837280366692131, |
| "grad_norm": 3.211455722844151, |
| "learning_rate": 4.804849996601547e-07, |
| "loss": 0.4798, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.6841100076394194, |
| "grad_norm": 2.3266255130897493, |
| "learning_rate": 4.794281430148336e-07, |
| "loss": 0.4615, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.6844919786096256, |
| "grad_norm": 3.901880544682035, |
| "learning_rate": 4.783720833633692e-07, |
| "loss": 0.4488, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.6848739495798319, |
| "grad_norm": 11.162179353153887, |
| "learning_rate": 4.773168223225861e-07, |
| "loss": 0.4631, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.6852559205500381, |
| "grad_norm": 4.62060167380066, |
| "learning_rate": 4.7626236150808487e-07, |
| "loss": 0.4421, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.6856378915202445, |
| "grad_norm": 2.661341939327642, |
| "learning_rate": 4.752087025342422e-07, |
| "loss": 0.4958, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.6860198624904508, |
| "grad_norm": 3.355248941702287, |
| "learning_rate": 4.741558470142061e-07, |
| "loss": 0.4309, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.686401833460657, |
| "grad_norm": 2.581711751984442, |
| "learning_rate": 4.731037965598952e-07, |
| "loss": 0.4278, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.6867838044308633, |
| "grad_norm": 2.9895776914154153, |
| "learning_rate": 4.7205255278199584e-07, |
| "loss": 0.492, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.6871657754010695, |
| "grad_norm": 7.478761437708342, |
| "learning_rate": 4.710021172899582e-07, |
| "loss": 0.4672, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.6875477463712758, |
| "grad_norm": 3.3459084056641393, |
| "learning_rate": 4.6995249169199604e-07, |
| "loss": 0.4796, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.687929717341482, |
| "grad_norm": 3.2181859858157433, |
| "learning_rate": 4.689036775950832e-07, |
| "loss": 0.4503, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.6883116883116883, |
| "grad_norm": 4.442300903603122, |
| "learning_rate": 4.6785567660494987e-07, |
| "loss": 0.6058, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.6886936592818945, |
| "grad_norm": 4.49731712760092, |
| "learning_rate": 4.668084903260827e-07, |
| "loss": 0.5145, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.6890756302521008, |
| "grad_norm": 3.0556267963416066, |
| "learning_rate": 4.657621203617209e-07, |
| "loss": 0.4599, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.6894576012223071, |
| "grad_norm": 4.244715633338391, |
| "learning_rate": 4.6471656831385285e-07, |
| "loss": 0.462, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.6898395721925134, |
| "grad_norm": 4.128236164165927, |
| "learning_rate": 4.636718357832161e-07, |
| "loss": 0.5331, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.6902215431627197, |
| "grad_norm": 4.0861345040191255, |
| "learning_rate": 4.626279243692922e-07, |
| "loss": 0.4439, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.6906035141329259, |
| "grad_norm": 5.750611707089236, |
| "learning_rate": 4.6158483567030635e-07, |
| "loss": 0.4304, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.6909854851031322, |
| "grad_norm": 7.196354964764559, |
| "learning_rate": 4.605425712832246e-07, |
| "loss": 0.4915, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.6913674560733384, |
| "grad_norm": 5.987041811737305, |
| "learning_rate": 4.595011328037496e-07, |
| "loss": 0.5501, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.6917494270435447, |
| "grad_norm": 4.934071242682319, |
| "learning_rate": 4.584605218263207e-07, |
| "loss": 0.58, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.692131398013751, |
| "grad_norm": 3.3954147958369374, |
| "learning_rate": 4.5742073994411045e-07, |
| "loss": 0.5062, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.6925133689839572, |
| "grad_norm": 2.2667132893887794, |
| "learning_rate": 4.563817887490207e-07, |
| "loss": 0.4208, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.6928953399541635, |
| "grad_norm": 3.929797491140602, |
| "learning_rate": 4.5534366983168293e-07, |
| "loss": 0.4252, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.6932773109243697, |
| "grad_norm": 3.022047260374716, |
| "learning_rate": 4.5430638478145434e-07, |
| "loss": 0.5903, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.693659281894576, |
| "grad_norm": 6.51145142891472, |
| "learning_rate": 4.532699351864141e-07, |
| "loss": 0.4997, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.6940412528647822, |
| "grad_norm": 2.4956649196707836, |
| "learning_rate": 4.5223432263336404e-07, |
| "loss": 0.4496, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.6944232238349886, |
| "grad_norm": 116.55753178803175, |
| "learning_rate": 4.5119954870782305e-07, |
| "loss": 0.4883, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.6948051948051948, |
| "grad_norm": 3.5845588926175225, |
| "learning_rate": 4.5016561499402703e-07, |
| "loss": 0.4719, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.6951871657754011, |
| "grad_norm": 4.438982485708564, |
| "learning_rate": 4.4913252307492556e-07, |
| "loss": 0.4869, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.6955691367456074, |
| "grad_norm": 3.5818159150843094, |
| "learning_rate": 4.4810027453217834e-07, |
| "loss": 0.4822, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.6959511077158136, |
| "grad_norm": 2.593057013601486, |
| "learning_rate": 4.47068870946155e-07, |
| "loss": 0.4946, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.6963330786860199, |
| "grad_norm": 7.773289890571107, |
| "learning_rate": 4.460383138959315e-07, |
| "loss": 0.5188, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.6967150496562261, |
| "grad_norm": 4.718899974607956, |
| "learning_rate": 4.4500860495928663e-07, |
| "loss": 0.4838, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.6970970206264324, |
| "grad_norm": 3.8665984652103607, |
| "learning_rate": 4.439797457127019e-07, |
| "loss": 0.4639, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.6974789915966386, |
| "grad_norm": 4.622011857988872, |
| "learning_rate": 4.42951737731358e-07, |
| "loss": 0.456, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.6978609625668449, |
| "grad_norm": 6.110123882313781, |
| "learning_rate": 4.4192458258913103e-07, |
| "loss": 0.5425, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.6982429335370511, |
| "grad_norm": 28.584952836114386, |
| "learning_rate": 4.408982818585929e-07, |
| "loss": 0.4163, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.6986249045072574, |
| "grad_norm": 3.081629631190153, |
| "learning_rate": 4.398728371110063e-07, |
| "loss": 0.4653, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.6990068754774638, |
| "grad_norm": 3.8023416610110368, |
| "learning_rate": 4.38848249916324e-07, |
| "loss": 0.5274, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.69938884644767, |
| "grad_norm": 13.68679754109722, |
| "learning_rate": 4.378245218431862e-07, |
| "loss": 0.4907, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.6997708174178763, |
| "grad_norm": 2.989676329168659, |
| "learning_rate": 4.368016544589166e-07, |
| "loss": 0.5026, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.7001527883880825, |
| "grad_norm": 2.456891595424161, |
| "learning_rate": 4.357796493295222e-07, |
| "loss": 0.4148, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.7005347593582888, |
| "grad_norm": 3.01597433449375, |
| "learning_rate": 4.3475850801969007e-07, |
| "loss": 0.4704, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.700916730328495, |
| "grad_norm": 2.6339512449755786, |
| "learning_rate": 4.3373823209278336e-07, |
| "loss": 0.5132, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.7012987012987013, |
| "grad_norm": 2.9970556970320157, |
| "learning_rate": 4.32718823110842e-07, |
| "loss": 0.5798, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.7016806722689075, |
| "grad_norm": 2.2711103632944103, |
| "learning_rate": 4.31700282634578e-07, |
| "loss": 0.4762, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.7020626432391138, |
| "grad_norm": 3.0551038729760784, |
| "learning_rate": 4.306826122233729e-07, |
| "loss": 0.5223, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.7024446142093201, |
| "grad_norm": 6.796489619512551, |
| "learning_rate": 4.2966581343527765e-07, |
| "loss": 0.5409, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.7028265851795263, |
| "grad_norm": 4.240579878349909, |
| "learning_rate": 4.2864988782700716e-07, |
| "loss": 0.3891, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.7032085561497327, |
| "grad_norm": 5.563576383242488, |
| "learning_rate": 4.276348369539408e-07, |
| "loss": 0.4891, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.7035905271199389, |
| "grad_norm": 2.566057792322101, |
| "learning_rate": 4.266206623701183e-07, |
| "loss": 0.4866, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.7039724980901452, |
| "grad_norm": 3.7630909631063134, |
| "learning_rate": 4.256073656282373e-07, |
| "loss": 0.4461, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.7043544690603514, |
| "grad_norm": 3.0635270757473827, |
| "learning_rate": 4.2459494827965213e-07, |
| "loss": 0.4605, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.7047364400305577, |
| "grad_norm": 4.764068162210865, |
| "learning_rate": 4.2358341187437085e-07, |
| "loss": 0.4986, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.7051184110007639, |
| "grad_norm": 2.8256747931679063, |
| "learning_rate": 4.2257275796105184e-07, |
| "loss": 0.4251, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.7055003819709702, |
| "grad_norm": 3.9070473949332203, |
| "learning_rate": 4.2156298808700374e-07, |
| "loss": 0.5431, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "grad_norm": 3.836471191964947, |
| "learning_rate": 4.205541037981802e-07, |
| "loss": 0.4255, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.7062643239113827, |
| "grad_norm": 6.280578101022665, |
| "learning_rate": 4.1954610663918046e-07, |
| "loss": 0.4583, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.706646294881589, |
| "grad_norm": 5.874313472902486, |
| "learning_rate": 4.18538998153245e-07, |
| "loss": 0.4825, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.7070282658517952, |
| "grad_norm": 2.722705730108688, |
| "learning_rate": 4.175327798822531e-07, |
| "loss": 0.4786, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.7074102368220015, |
| "grad_norm": 4.555491048283832, |
| "learning_rate": 4.1652745336672224e-07, |
| "loss": 0.4718, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.7077922077922078, |
| "grad_norm": 2.684682727559695, |
| "learning_rate": 4.1552302014580433e-07, |
| "loss": 0.4009, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.7081741787624141, |
| "grad_norm": 2.820889863379384, |
| "learning_rate": 4.1451948175728267e-07, |
| "loss": 0.4717, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.7085561497326203, |
| "grad_norm": 2.6265538016576713, |
| "learning_rate": 4.135168397375718e-07, |
| "loss": 0.429, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.7089381207028266, |
| "grad_norm": 3.7741584067315914, |
| "learning_rate": 4.125150956217138e-07, |
| "loss": 0.5093, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.7093200916730329, |
| "grad_norm": 5.30058124522937, |
| "learning_rate": 4.1151425094337513e-07, |
| "loss": 0.499, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.7097020626432391, |
| "grad_norm": 5.4902094944883055, |
| "learning_rate": 4.1051430723484623e-07, |
| "loss": 0.4655, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.7100840336134454, |
| "grad_norm": 6.162461259611622, |
| "learning_rate": 4.0951526602703735e-07, |
| "loss": 0.4572, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.7104660045836516, |
| "grad_norm": 3.500319460088784, |
| "learning_rate": 4.085171288494774e-07, |
| "loss": 0.5129, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.7108479755538579, |
| "grad_norm": 2.7923411056600984, |
| "learning_rate": 4.07519897230312e-07, |
| "loss": 0.5116, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.7112299465240641, |
| "grad_norm": 3.094231599936001, |
| "learning_rate": 4.0652357269629857e-07, |
| "loss": 0.4578, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.7116119174942704, |
| "grad_norm": 4.952507106482637, |
| "learning_rate": 4.055281567728076e-07, |
| "loss": 0.4675, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.7119938884644768, |
| "grad_norm": 3.158969980474859, |
| "learning_rate": 4.0453365098381695e-07, |
| "loss": 0.4729, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.712375859434683, |
| "grad_norm": 4.0128858021274425, |
| "learning_rate": 4.035400568519122e-07, |
| "loss": 0.5799, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.7127578304048893, |
| "grad_norm": 5.726579023569035, |
| "learning_rate": 4.0254737589828323e-07, |
| "loss": 0.5642, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.7131398013750955, |
| "grad_norm": 4.8738224391570135, |
| "learning_rate": 4.015556096427206e-07, |
| "loss": 0.4978, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.7135217723453018, |
| "grad_norm": 1.9393008560746685, |
| "learning_rate": 4.0056475960361615e-07, |
| "loss": 0.4016, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.713903743315508, |
| "grad_norm": 11.109760924089572, |
| "learning_rate": 3.9957482729795735e-07, |
| "loss": 0.5351, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 3.5030857014942542, |
| "learning_rate": 3.9858581424132766e-07, |
| "loss": 0.5276, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.7146676852559205, |
| "grad_norm": 18.94949727094923, |
| "learning_rate": 3.975977219479033e-07, |
| "loss": 0.4927, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.7150496562261268, |
| "grad_norm": 2.985867695957091, |
| "learning_rate": 3.966105519304499e-07, |
| "loss": 0.4805, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.7154316271963331, |
| "grad_norm": 11.239709251982658, |
| "learning_rate": 3.956243057003222e-07, |
| "loss": 0.4813, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.7158135981665393, |
| "grad_norm": 2.3920035236201294, |
| "learning_rate": 3.946389847674594e-07, |
| "loss": 0.4808, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.7161955691367456, |
| "grad_norm": 3.606251827873003, |
| "learning_rate": 3.936545906403853e-07, |
| "loss": 0.5318, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.7165775401069518, |
| "grad_norm": 3.0718763403473455, |
| "learning_rate": 3.9267112482620344e-07, |
| "loss": 0.4164, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.7169595110771582, |
| "grad_norm": 7.259618461181073, |
| "learning_rate": 3.9168858883059743e-07, |
| "loss": 0.444, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.7173414820473644, |
| "grad_norm": 12.93592963046385, |
| "learning_rate": 3.90706984157827e-07, |
| "loss": 0.5208, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.7177234530175707, |
| "grad_norm": 2.4143723426987047, |
| "learning_rate": 3.8972631231072493e-07, |
| "loss": 0.4594, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.7181054239877769, |
| "grad_norm": 3.2086077522932284, |
| "learning_rate": 3.8874657479069763e-07, |
| "loss": 0.5383, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.7184873949579832, |
| "grad_norm": 10.33731462373807, |
| "learning_rate": 3.8776777309771934e-07, |
| "loss": 0.4971, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.7188693659281895, |
| "grad_norm": 3.9050481213749215, |
| "learning_rate": 3.867899087303326e-07, |
| "loss": 0.5348, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.7192513368983957, |
| "grad_norm": 28.55017424878132, |
| "learning_rate": 3.85812983185645e-07, |
| "loss": 0.4986, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.719633307868602, |
| "grad_norm": 8.08468107153252, |
| "learning_rate": 3.848369979593259e-07, |
| "loss": 0.4773, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.7200152788388082, |
| "grad_norm": 3.407078286876332, |
| "learning_rate": 3.838619545456059e-07, |
| "loss": 0.4837, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.7203972498090145, |
| "grad_norm": 3.383788660732898, |
| "learning_rate": 3.8288785443727357e-07, |
| "loss": 0.5097, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.7207792207792207, |
| "grad_norm": 2.5435503955321086, |
| "learning_rate": 3.8191469912567243e-07, |
| "loss": 0.4634, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.721161191749427, |
| "grad_norm": 3.0284976642181256, |
| "learning_rate": 3.8094249010070047e-07, |
| "loss": 0.4529, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.7215431627196333, |
| "grad_norm": 2.422343762556238, |
| "learning_rate": 3.799712288508071e-07, |
| "loss": 0.5196, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.7219251336898396, |
| "grad_norm": 7.258011180896353, |
| "learning_rate": 3.790009168629895e-07, |
| "loss": 0.4744, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.7223071046600459, |
| "grad_norm": 3.660883946533744, |
| "learning_rate": 3.7803155562279276e-07, |
| "loss": 0.4803, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.7226890756302521, |
| "grad_norm": 2.473442719558735, |
| "learning_rate": 3.770631466143054e-07, |
| "loss": 0.4593, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.7230710466004584, |
| "grad_norm": 5.950510676950638, |
| "learning_rate": 3.7609569132015863e-07, |
| "loss": 0.494, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.7234530175706646, |
| "grad_norm": 2.237081011958015, |
| "learning_rate": 3.7512919122152397e-07, |
| "loss": 0.4394, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.7238349885408709, |
| "grad_norm": 2.7709843570858714, |
| "learning_rate": 3.741636477981092e-07, |
| "loss": 0.5006, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.7242169595110771, |
| "grad_norm": 2.6252764983751344, |
| "learning_rate": 3.7319906252815857e-07, |
| "loss": 0.4693, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.7245989304812834, |
| "grad_norm": 5.617419755723076, |
| "learning_rate": 3.722354368884495e-07, |
| "loss": 0.4652, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.7249809014514896, |
| "grad_norm": 3.5361263292557354, |
| "learning_rate": 3.71272772354289e-07, |
| "loss": 0.5213, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.725362872421696, |
| "grad_norm": 2.5949928089139176, |
| "learning_rate": 3.703110703995137e-07, |
| "loss": 0.5325, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.7257448433919023, |
| "grad_norm": 19.664846071662925, |
| "learning_rate": 3.693503324964867e-07, |
| "loss": 0.5319, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.7261268143621085, |
| "grad_norm": 4.070155434112447, |
| "learning_rate": 3.68390560116094e-07, |
| "loss": 0.4306, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.7265087853323148, |
| "grad_norm": 13.811969565605493, |
| "learning_rate": 3.674317547277447e-07, |
| "loss": 0.5101, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.726890756302521, |
| "grad_norm": 14.984568645747874, |
| "learning_rate": 3.66473917799366e-07, |
| "loss": 0.4773, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.7272727272727273, |
| "grad_norm": 2.608868304564764, |
| "learning_rate": 3.655170507974037e-07, |
| "loss": 0.4502, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.7276546982429335, |
| "grad_norm": 3.160544844419424, |
| "learning_rate": 3.645611551868184e-07, |
| "loss": 0.5006, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.7280366692131398, |
| "grad_norm": 4.278119381822793, |
| "learning_rate": 3.636062324310826e-07, |
| "loss": 0.4901, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.728418640183346, |
| "grad_norm": 4.951157730675554, |
| "learning_rate": 3.626522839921803e-07, |
| "loss": 0.4416, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.7288006111535523, |
| "grad_norm": 4.634081370664527, |
| "learning_rate": 3.6169931133060385e-07, |
| "loss": 0.3973, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.7291825821237586, |
| "grad_norm": 3.7413225541810857, |
| "learning_rate": 3.607473159053507e-07, |
| "loss": 0.4926, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.7295645530939648, |
| "grad_norm": 3.690256429087379, |
| "learning_rate": 3.597962991739235e-07, |
| "loss": 0.4893, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.7299465240641712, |
| "grad_norm": 2.7298792210542415, |
| "learning_rate": 3.588462625923252e-07, |
| "loss": 0.4719, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.7303284950343774, |
| "grad_norm": 9.765728465247214, |
| "learning_rate": 3.5789720761505913e-07, |
| "loss": 0.5138, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.7307104660045837, |
| "grad_norm": 2.328736820943766, |
| "learning_rate": 3.5694913569512565e-07, |
| "loss": 0.475, |
| "step": 1913 |
| }, |
| { |
| "epoch": 0.7310924369747899, |
| "grad_norm": 2.562188291710903, |
| "learning_rate": 3.560020482840194e-07, |
| "loss": 0.5071, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.7314744079449962, |
| "grad_norm": 3.682707847406333, |
| "learning_rate": 3.5505594683172824e-07, |
| "loss": 0.478, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.7318563789152025, |
| "grad_norm": 16.26499522607614, |
| "learning_rate": 3.54110832786731e-07, |
| "loss": 0.5525, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.7322383498854087, |
| "grad_norm": 6.995416001335222, |
| "learning_rate": 3.5316670759599366e-07, |
| "loss": 0.4483, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.732620320855615, |
| "grad_norm": 27.820129981944024, |
| "learning_rate": 3.5222357270496906e-07, |
| "loss": 0.4273, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.7330022918258212, |
| "grad_norm": 3.6945885753820793, |
| "learning_rate": 3.512814295575942e-07, |
| "loss": 0.5197, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.7333842627960275, |
| "grad_norm": 2.9068072278616364, |
| "learning_rate": 3.5034027959628653e-07, |
| "loss": 0.5252, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.7337662337662337, |
| "grad_norm": 5.906982103662624, |
| "learning_rate": 3.494001242619442e-07, |
| "loss": 0.469, |
| "step": 1921 |
| }, |
| { |
| "epoch": 0.73414820473644, |
| "grad_norm": 4.279419872340727, |
| "learning_rate": 3.484609649939415e-07, |
| "loss": 0.466, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.7345301757066462, |
| "grad_norm": 9.103811203425218, |
| "learning_rate": 3.475228032301286e-07, |
| "loss": 0.542, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.7349121466768526, |
| "grad_norm": 2.407127382987992, |
| "learning_rate": 3.465856404068285e-07, |
| "loss": 0.4808, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.7352941176470589, |
| "grad_norm": 7.60242894892141, |
| "learning_rate": 3.456494779588337e-07, |
| "loss": 0.5326, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.7356760886172651, |
| "grad_norm": 2.572693667995342, |
| "learning_rate": 3.447143173194065e-07, |
| "loss": 0.4897, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.7360580595874714, |
| "grad_norm": 3.622211923018903, |
| "learning_rate": 3.43780159920275e-07, |
| "loss": 0.5363, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.7364400305576776, |
| "grad_norm": 3.0667032122854327, |
| "learning_rate": 3.4284700719163064e-07, |
| "loss": 0.4805, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.7368220015278839, |
| "grad_norm": 3.450568284892199, |
| "learning_rate": 3.419148605621276e-07, |
| "loss": 0.5734, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.7372039724980901, |
| "grad_norm": 2.5215067484094273, |
| "learning_rate": 3.4098372145887975e-07, |
| "loss": 0.478, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.7375859434682964, |
| "grad_norm": 2.739279615526753, |
| "learning_rate": 3.400535913074574e-07, |
| "loss": 0.595, |
| "step": 1931 |
| }, |
| { |
| "epoch": 0.7379679144385026, |
| "grad_norm": 2.549133107179413, |
| "learning_rate": 3.391244715318875e-07, |
| "loss": 0.4223, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.7383498854087089, |
| "grad_norm": 3.3906537717385126, |
| "learning_rate": 3.3819636355464875e-07, |
| "loss": 0.4583, |
| "step": 1933 |
| }, |
| { |
| "epoch": 0.7387318563789153, |
| "grad_norm": 3.1669669812165693, |
| "learning_rate": 3.3726926879667207e-07, |
| "loss": 0.4301, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.7391138273491215, |
| "grad_norm": 3.609455837206172, |
| "learning_rate": 3.363431886773367e-07, |
| "loss": 0.4509, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.7394957983193278, |
| "grad_norm": 3.2263686183138103, |
| "learning_rate": 3.354181246144677e-07, |
| "loss": 0.47, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.739877769289534, |
| "grad_norm": 7.8689314970635325, |
| "learning_rate": 3.3449407802433573e-07, |
| "loss": 0.4766, |
| "step": 1937 |
| }, |
| { |
| "epoch": 0.7402597402597403, |
| "grad_norm": 12.297816660666351, |
| "learning_rate": 3.3357105032165323e-07, |
| "loss": 0.4759, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.7406417112299465, |
| "grad_norm": 3.278150809156143, |
| "learning_rate": 3.326490429195723e-07, |
| "loss": 0.5303, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.7410236822001528, |
| "grad_norm": 2.5949186267966082, |
| "learning_rate": 3.317280572296834e-07, |
| "loss": 0.527, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.741405653170359, |
| "grad_norm": 2.1671563182970455, |
| "learning_rate": 3.308080946620133e-07, |
| "loss": 0.4465, |
| "step": 1941 |
| }, |
| { |
| "epoch": 0.7417876241405653, |
| "grad_norm": 4.0924980474366155, |
| "learning_rate": 3.298891566250209e-07, |
| "loss": 0.4535, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.7421695951107716, |
| "grad_norm": 3.707669815731349, |
| "learning_rate": 3.289712445255981e-07, |
| "loss": 0.508, |
| "step": 1943 |
| }, |
| { |
| "epoch": 0.7425515660809778, |
| "grad_norm": 3.200538361880574, |
| "learning_rate": 3.280543597690648e-07, |
| "loss": 0.4451, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.7429335370511841, |
| "grad_norm": 5.354158493938534, |
| "learning_rate": 3.27138503759169e-07, |
| "loss": 0.48, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.7433155080213903, |
| "grad_norm": 4.059813880081117, |
| "learning_rate": 3.262236778980836e-07, |
| "loss": 0.5653, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.7436974789915967, |
| "grad_norm": 10.018457891659642, |
| "learning_rate": 3.2530988358640334e-07, |
| "loss": 0.4526, |
| "step": 1947 |
| }, |
| { |
| "epoch": 0.7440794499618029, |
| "grad_norm": 6.077309375315518, |
| "learning_rate": 3.2439712222314496e-07, |
| "loss": 0.5287, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.7444614209320092, |
| "grad_norm": 7.0200332373569, |
| "learning_rate": 3.2348539520574337e-07, |
| "loss": 0.5588, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.7448433919022154, |
| "grad_norm": 3.051416350497911, |
| "learning_rate": 3.2257470393004903e-07, |
| "loss": 0.5365, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.7452253628724217, |
| "grad_norm": 3.286316707945846, |
| "learning_rate": 3.2166504979032794e-07, |
| "loss": 0.4837, |
| "step": 1951 |
| }, |
| { |
| "epoch": 0.745607333842628, |
| "grad_norm": 8.761969558075574, |
| "learning_rate": 3.207564341792578e-07, |
| "loss": 0.5069, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.7459893048128342, |
| "grad_norm": 8.10687240149731, |
| "learning_rate": 3.1984885848792564e-07, |
| "loss": 0.517, |
| "step": 1953 |
| }, |
| { |
| "epoch": 0.7463712757830405, |
| "grad_norm": 2.5829528950320664, |
| "learning_rate": 3.1894232410582754e-07, |
| "loss": 0.4683, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.7467532467532467, |
| "grad_norm": 3.2280729471211895, |
| "learning_rate": 3.180368324208643e-07, |
| "loss": 0.4826, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.747135217723453, |
| "grad_norm": 19.541963030290834, |
| "learning_rate": 3.17132384819341e-07, |
| "loss": 0.4297, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.7475171886936592, |
| "grad_norm": 3.293546129592615, |
| "learning_rate": 3.162289826859643e-07, |
| "loss": 0.5001, |
| "step": 1957 |
| }, |
| { |
| "epoch": 0.7478991596638656, |
| "grad_norm": 3.293069144037459, |
| "learning_rate": 3.153266274038395e-07, |
| "loss": 0.4919, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.7482811306340718, |
| "grad_norm": 4.082598515187843, |
| "learning_rate": 3.144253203544699e-07, |
| "loss": 0.5036, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.7486631016042781, |
| "grad_norm": 3.542666640119538, |
| "learning_rate": 3.1352506291775396e-07, |
| "loss": 0.4648, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.7490450725744844, |
| "grad_norm": 6.080114343806355, |
| "learning_rate": 3.126258564719825e-07, |
| "loss": 0.4833, |
| "step": 1961 |
| }, |
| { |
| "epoch": 0.7494270435446906, |
| "grad_norm": 6.1238966887725805, |
| "learning_rate": 3.1172770239383795e-07, |
| "loss": 0.4712, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.7498090145148969, |
| "grad_norm": 3.0678578727874464, |
| "learning_rate": 3.1083060205839164e-07, |
| "loss": 0.4988, |
| "step": 1963 |
| }, |
| { |
| "epoch": 0.7501909854851031, |
| "grad_norm": 7.899514482543266, |
| "learning_rate": 3.0993455683910073e-07, |
| "loss": 0.4609, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.7505729564553094, |
| "grad_norm": 9.342075315504848, |
| "learning_rate": 3.0903956810780817e-07, |
| "loss": 0.4689, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.7509549274255156, |
| "grad_norm": 2.56279037396652, |
| "learning_rate": 3.081456372347384e-07, |
| "loss": 0.4273, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.7513368983957219, |
| "grad_norm": 2.618297306043059, |
| "learning_rate": 3.0725276558849687e-07, |
| "loss": 0.4186, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.7517188693659282, |
| "grad_norm": 4.148327631141519, |
| "learning_rate": 3.063609545360676e-07, |
| "loss": 0.5607, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.7521008403361344, |
| "grad_norm": 4.97633562270342, |
| "learning_rate": 3.0547020544280987e-07, |
| "loss": 0.4812, |
| "step": 1969 |
| }, |
| { |
| "epoch": 0.7524828113063408, |
| "grad_norm": 9.0842543595599, |
| "learning_rate": 3.045805196724581e-07, |
| "loss": 0.5206, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.752864782276547, |
| "grad_norm": 9.27585635939645, |
| "learning_rate": 3.036918985871185e-07, |
| "loss": 0.4541, |
| "step": 1971 |
| }, |
| { |
| "epoch": 0.7532467532467533, |
| "grad_norm": 6.796792142983438, |
| "learning_rate": 3.028043435472667e-07, |
| "loss": 0.486, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.7536287242169595, |
| "grad_norm": 2.1624545717546257, |
| "learning_rate": 3.019178559117471e-07, |
| "loss": 0.4359, |
| "step": 1973 |
| }, |
| { |
| "epoch": 0.7540106951871658, |
| "grad_norm": 9.147224778529468, |
| "learning_rate": 3.010324370377689e-07, |
| "loss": 0.4562, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.754392666157372, |
| "grad_norm": 6.290289585365308, |
| "learning_rate": 3.001480882809059e-07, |
| "loss": 0.457, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.7547746371275783, |
| "grad_norm": 2.2374087017338513, |
| "learning_rate": 2.992648109950935e-07, |
| "loss": 0.4598, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.7551566080977846, |
| "grad_norm": 6.827094524701705, |
| "learning_rate": 2.9838260653262584e-07, |
| "loss": 0.5682, |
| "step": 1977 |
| }, |
| { |
| "epoch": 0.7555385790679908, |
| "grad_norm": 4.82841612141328, |
| "learning_rate": 2.975014762441558e-07, |
| "loss": 0.5364, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.7559205500381971, |
| "grad_norm": 4.100694903188989, |
| "learning_rate": 2.966214214786903e-07, |
| "loss": 0.5031, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.7563025210084033, |
| "grad_norm": 5.721139123760344, |
| "learning_rate": 2.9574244358359066e-07, |
| "loss": 0.4956, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.7566844919786097, |
| "grad_norm": 3.0348483174093523, |
| "learning_rate": 2.9486454390456983e-07, |
| "loss": 0.458, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.7570664629488159, |
| "grad_norm": 2.071255659160732, |
| "learning_rate": 2.939877237856886e-07, |
| "loss": 0.4417, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.7574484339190222, |
| "grad_norm": 2.5377244096701643, |
| "learning_rate": 2.931119845693565e-07, |
| "loss": 0.4835, |
| "step": 1983 |
| }, |
| { |
| "epoch": 0.7578304048892284, |
| "grad_norm": 2.491320364019066, |
| "learning_rate": 2.9223732759632667e-07, |
| "loss": 0.5483, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.7582123758594347, |
| "grad_norm": 3.524489564802955, |
| "learning_rate": 2.913637542056967e-07, |
| "loss": 0.4658, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.758594346829641, |
| "grad_norm": 5.875291912515906, |
| "learning_rate": 2.904912657349049e-07, |
| "loss": 0.5105, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.7589763177998472, |
| "grad_norm": 5.744010687005276, |
| "learning_rate": 2.8961986351972767e-07, |
| "loss": 0.3959, |
| "step": 1987 |
| }, |
| { |
| "epoch": 0.7593582887700535, |
| "grad_norm": 3.1503978340744427, |
| "learning_rate": 2.887495488942796e-07, |
| "loss": 0.49, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.7597402597402597, |
| "grad_norm": 4.9894448806439495, |
| "learning_rate": 2.8788032319100917e-07, |
| "loss": 0.4775, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.760122230710466, |
| "grad_norm": 2.886379728803835, |
| "learning_rate": 2.8701218774069836e-07, |
| "loss": 0.5524, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.7605042016806722, |
| "grad_norm": 2.894689146151311, |
| "learning_rate": 2.8614514387246015e-07, |
| "loss": 0.4567, |
| "step": 1991 |
| }, |
| { |
| "epoch": 0.7608861726508785, |
| "grad_norm": 4.866779262098827, |
| "learning_rate": 2.8527919291373526e-07, |
| "loss": 0.5063, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.7612681436210847, |
| "grad_norm": 3.4928860508117783, |
| "learning_rate": 2.844143361902924e-07, |
| "loss": 0.4714, |
| "step": 1993 |
| }, |
| { |
| "epoch": 0.7616501145912911, |
| "grad_norm": 4.847558397239765, |
| "learning_rate": 2.8355057502622413e-07, |
| "loss": 0.4903, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.7620320855614974, |
| "grad_norm": 1.8542567356578314, |
| "learning_rate": 2.826879107439464e-07, |
| "loss": 0.4106, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.7624140565317036, |
| "grad_norm": 3.959019907877066, |
| "learning_rate": 2.8182634466419485e-07, |
| "loss": 0.4876, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.7627960275019099, |
| "grad_norm": 3.897130491646812, |
| "learning_rate": 2.809658781060249e-07, |
| "loss": 0.4747, |
| "step": 1997 |
| }, |
| { |
| "epoch": 0.7631779984721161, |
| "grad_norm": 4.19546163163991, |
| "learning_rate": 2.801065123868083e-07, |
| "loss": 0.5102, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.7635599694423224, |
| "grad_norm": 13.123256650379632, |
| "learning_rate": 2.792482488222306e-07, |
| "loss": 0.5024, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.7639419404125286, |
| "grad_norm": 8.484215470641198, |
| "learning_rate": 2.78391088726291e-07, |
| "loss": 0.4743, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.7643239113827349, |
| "grad_norm": 2.5750063915205663, |
| "learning_rate": 2.7753503341129915e-07, |
| "loss": 0.4796, |
| "step": 2001 |
| }, |
| { |
| "epoch": 0.7647058823529411, |
| "grad_norm": 3.2179233263308844, |
| "learning_rate": 2.766800841878723e-07, |
| "loss": 0.5152, |
| "step": 2002 |
| }, |
| { |
| "epoch": 0.7650878533231474, |
| "grad_norm": 2.818601799258001, |
| "learning_rate": 2.7582624236493533e-07, |
| "loss": 0.467, |
| "step": 2003 |
| }, |
| { |
| "epoch": 0.7654698242933538, |
| "grad_norm": 3.4044326671523257, |
| "learning_rate": 2.7497350924971786e-07, |
| "loss": 0.4662, |
| "step": 2004 |
| }, |
| { |
| "epoch": 0.76585179526356, |
| "grad_norm": 17.145403912805843, |
| "learning_rate": 2.741218861477507e-07, |
| "loss": 0.4662, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.7662337662337663, |
| "grad_norm": 3.3533992472377405, |
| "learning_rate": 2.7327137436286687e-07, |
| "loss": 0.549, |
| "step": 2006 |
| }, |
| { |
| "epoch": 0.7666157372039725, |
| "grad_norm": 20.158085491971196, |
| "learning_rate": 2.7242197519719654e-07, |
| "loss": 0.5545, |
| "step": 2007 |
| }, |
| { |
| "epoch": 0.7669977081741788, |
| "grad_norm": 3.1064468642848824, |
| "learning_rate": 2.7157368995116737e-07, |
| "loss": 0.4057, |
| "step": 2008 |
| }, |
| { |
| "epoch": 0.767379679144385, |
| "grad_norm": 4.373884424352218, |
| "learning_rate": 2.7072651992350193e-07, |
| "loss": 0.5852, |
| "step": 2009 |
| }, |
| { |
| "epoch": 0.7677616501145913, |
| "grad_norm": 4.214662447583189, |
| "learning_rate": 2.698804664112139e-07, |
| "loss": 0.5052, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.7681436210847975, |
| "grad_norm": 2.3640521615152594, |
| "learning_rate": 2.6903553070960907e-07, |
| "loss": 0.4282, |
| "step": 2011 |
| }, |
| { |
| "epoch": 0.7685255920550038, |
| "grad_norm": 2.821646391524377, |
| "learning_rate": 2.6819171411228146e-07, |
| "loss": 0.5568, |
| "step": 2012 |
| }, |
| { |
| "epoch": 0.7689075630252101, |
| "grad_norm": 2.8535141415958574, |
| "learning_rate": 2.673490179111111e-07, |
| "loss": 0.4406, |
| "step": 2013 |
| }, |
| { |
| "epoch": 0.7692895339954163, |
| "grad_norm": 8.09229993906319, |
| "learning_rate": 2.665074433962634e-07, |
| "loss": 0.4413, |
| "step": 2014 |
| }, |
| { |
| "epoch": 0.7696715049656226, |
| "grad_norm": 2.8451998217945587, |
| "learning_rate": 2.656669918561866e-07, |
| "loss": 0.5313, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.7700534759358288, |
| "grad_norm": 5.017701230366891, |
| "learning_rate": 2.6482766457760883e-07, |
| "loss": 0.4855, |
| "step": 2016 |
| }, |
| { |
| "epoch": 0.7704354469060352, |
| "grad_norm": 3.0709257455045815, |
| "learning_rate": 2.639894628455379e-07, |
| "loss": 0.5046, |
| "step": 2017 |
| }, |
| { |
| "epoch": 0.7708174178762414, |
| "grad_norm": 2.9626905006442636, |
| "learning_rate": 2.631523879432576e-07, |
| "loss": 0.5317, |
| "step": 2018 |
| }, |
| { |
| "epoch": 0.7711993888464477, |
| "grad_norm": 7.163042586264671, |
| "learning_rate": 2.62316441152327e-07, |
| "loss": 0.5906, |
| "step": 2019 |
| }, |
| { |
| "epoch": 0.771581359816654, |
| "grad_norm": 5.0311784903125, |
| "learning_rate": 2.6148162375257855e-07, |
| "loss": 0.5227, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.7719633307868602, |
| "grad_norm": 4.102688596161408, |
| "learning_rate": 2.606479370221142e-07, |
| "loss": 0.4829, |
| "step": 2021 |
| }, |
| { |
| "epoch": 0.7723453017570665, |
| "grad_norm": 3.418560369710601, |
| "learning_rate": 2.5981538223730616e-07, |
| "loss": 0.4852, |
| "step": 2022 |
| }, |
| { |
| "epoch": 0.7727272727272727, |
| "grad_norm": 7.237199469607544, |
| "learning_rate": 2.5898396067279327e-07, |
| "loss": 0.4842, |
| "step": 2023 |
| }, |
| { |
| "epoch": 0.773109243697479, |
| "grad_norm": 5.8815534297234215, |
| "learning_rate": 2.581536736014789e-07, |
| "loss": 0.4698, |
| "step": 2024 |
| }, |
| { |
| "epoch": 0.7734912146676852, |
| "grad_norm": 5.286618171866562, |
| "learning_rate": 2.5732452229453005e-07, |
| "loss": 0.5295, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.7738731856378915, |
| "grad_norm": 2.64018006585823, |
| "learning_rate": 2.5649650802137513e-07, |
| "loss": 0.5107, |
| "step": 2026 |
| }, |
| { |
| "epoch": 0.7742551566080977, |
| "grad_norm": 5.39635291634368, |
| "learning_rate": 2.556696320497007e-07, |
| "loss": 0.5189, |
| "step": 2027 |
| }, |
| { |
| "epoch": 0.774637127578304, |
| "grad_norm": 4.327051391697355, |
| "learning_rate": 2.5484389564545194e-07, |
| "loss": 0.4771, |
| "step": 2028 |
| }, |
| { |
| "epoch": 0.7750190985485104, |
| "grad_norm": 3.6198992330991437, |
| "learning_rate": 2.54019300072828e-07, |
| "loss": 0.4664, |
| "step": 2029 |
| }, |
| { |
| "epoch": 0.7754010695187166, |
| "grad_norm": 3.4612319276877876, |
| "learning_rate": 2.5319584659428227e-07, |
| "loss": 0.4549, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.7757830404889229, |
| "grad_norm": 9.185387121029617, |
| "learning_rate": 2.5237353647051986e-07, |
| "loss": 0.4215, |
| "step": 2031 |
| }, |
| { |
| "epoch": 0.7761650114591291, |
| "grad_norm": 3.0793205141834337, |
| "learning_rate": 2.515523709604943e-07, |
| "loss": 0.4781, |
| "step": 2032 |
| }, |
| { |
| "epoch": 0.7765469824293354, |
| "grad_norm": 2.415691831688847, |
| "learning_rate": 2.507323513214077e-07, |
| "loss": 0.4111, |
| "step": 2033 |
| }, |
| { |
| "epoch": 0.7769289533995416, |
| "grad_norm": 2.9556289683823596, |
| "learning_rate": 2.4991347880870773e-07, |
| "loss": 0.4735, |
| "step": 2034 |
| }, |
| { |
| "epoch": 0.7773109243697479, |
| "grad_norm": 5.721939413184182, |
| "learning_rate": 2.490957546760851e-07, |
| "loss": 0.5053, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.7776928953399541, |
| "grad_norm": 2.8081377018686142, |
| "learning_rate": 2.4827918017547345e-07, |
| "loss": 0.4053, |
| "step": 2036 |
| }, |
| { |
| "epoch": 0.7780748663101604, |
| "grad_norm": 3.827787653477836, |
| "learning_rate": 2.474637565570451e-07, |
| "loss": 0.5339, |
| "step": 2037 |
| }, |
| { |
| "epoch": 0.7784568372803667, |
| "grad_norm": 2.4848945346733946, |
| "learning_rate": 2.4664948506921126e-07, |
| "loss": 0.48, |
| "step": 2038 |
| }, |
| { |
| "epoch": 0.778838808250573, |
| "grad_norm": 2.9962827306471205, |
| "learning_rate": 2.458363669586194e-07, |
| "loss": 0.4947, |
| "step": 2039 |
| }, |
| { |
| "epoch": 0.7792207792207793, |
| "grad_norm": 2.347916499410763, |
| "learning_rate": 2.450244034701501e-07, |
| "loss": 0.4814, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.7796027501909855, |
| "grad_norm": 2.5839724639312465, |
| "learning_rate": 2.442135958469171e-07, |
| "loss": 0.4388, |
| "step": 2041 |
| }, |
| { |
| "epoch": 0.7799847211611918, |
| "grad_norm": 6.96172510132702, |
| "learning_rate": 2.4340394533026486e-07, |
| "loss": 0.5038, |
| "step": 2042 |
| }, |
| { |
| "epoch": 0.780366692131398, |
| "grad_norm": 2.1074636866882495, |
| "learning_rate": 2.425954531597649e-07, |
| "loss": 0.4702, |
| "step": 2043 |
| }, |
| { |
| "epoch": 0.7807486631016043, |
| "grad_norm": 3.6538983777379612, |
| "learning_rate": 2.4178812057321653e-07, |
| "loss": 0.4599, |
| "step": 2044 |
| }, |
| { |
| "epoch": 0.7811306340718105, |
| "grad_norm": 2.7110070952816017, |
| "learning_rate": 2.4098194880664356e-07, |
| "loss": 0.4598, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.7815126050420168, |
| "grad_norm": 4.346035006886647, |
| "learning_rate": 2.4017693909429206e-07, |
| "loss": 0.5548, |
| "step": 2046 |
| }, |
| { |
| "epoch": 0.7818945760122231, |
| "grad_norm": 3.963055626489219, |
| "learning_rate": 2.393730926686297e-07, |
| "loss": 0.5198, |
| "step": 2047 |
| }, |
| { |
| "epoch": 0.7822765469824293, |
| "grad_norm": 8.215642411040255, |
| "learning_rate": 2.3857041076034236e-07, |
| "loss": 0.478, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.7826585179526356, |
| "grad_norm": 6.317693663913666, |
| "learning_rate": 2.3776889459833373e-07, |
| "loss": 0.4781, |
| "step": 2049 |
| }, |
| { |
| "epoch": 0.7830404889228418, |
| "grad_norm": 11.445087980882196, |
| "learning_rate": 2.3696854540972267e-07, |
| "loss": 0.4477, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.7834224598930482, |
| "grad_norm": 2.485869771208182, |
| "learning_rate": 2.361693644198408e-07, |
| "loss": 0.4807, |
| "step": 2051 |
| }, |
| { |
| "epoch": 0.7838044308632544, |
| "grad_norm": 3.104809165870729, |
| "learning_rate": 2.3537135285223199e-07, |
| "loss": 0.5194, |
| "step": 2052 |
| }, |
| { |
| "epoch": 0.7841864018334607, |
| "grad_norm": 6.563255132353687, |
| "learning_rate": 2.3457451192864962e-07, |
| "loss": 0.3995, |
| "step": 2053 |
| }, |
| { |
| "epoch": 0.7845683728036669, |
| "grad_norm": 4.457138302228237, |
| "learning_rate": 2.3377884286905414e-07, |
| "loss": 0.4693, |
| "step": 2054 |
| }, |
| { |
| "epoch": 0.7849503437738732, |
| "grad_norm": 4.312405424580879, |
| "learning_rate": 2.329843468916124e-07, |
| "loss": 0.4626, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.7853323147440795, |
| "grad_norm": 2.4779569058573796, |
| "learning_rate": 2.3219102521269575e-07, |
| "loss": 0.4662, |
| "step": 2056 |
| }, |
| { |
| "epoch": 0.7857142857142857, |
| "grad_norm": 3.203547257496715, |
| "learning_rate": 2.313988790468765e-07, |
| "loss": 0.5714, |
| "step": 2057 |
| }, |
| { |
| "epoch": 0.786096256684492, |
| "grad_norm": 3.6702308504670587, |
| "learning_rate": 2.3060790960692845e-07, |
| "loss": 0.4362, |
| "step": 2058 |
| }, |
| { |
| "epoch": 0.7864782276546982, |
| "grad_norm": 2.528143424070905, |
| "learning_rate": 2.2981811810382269e-07, |
| "loss": 0.4191, |
| "step": 2059 |
| }, |
| { |
| "epoch": 0.7868601986249045, |
| "grad_norm": 8.621735007155152, |
| "learning_rate": 2.2902950574672798e-07, |
| "loss": 0.4654, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.7872421695951107, |
| "grad_norm": 4.121940405291025, |
| "learning_rate": 2.282420737430073e-07, |
| "loss": 0.566, |
| "step": 2061 |
| }, |
| { |
| "epoch": 0.787624140565317, |
| "grad_norm": 2.8941965091486344, |
| "learning_rate": 2.2745582329821623e-07, |
| "loss": 0.5333, |
| "step": 2062 |
| }, |
| { |
| "epoch": 0.7880061115355232, |
| "grad_norm": 8.905160944846878, |
| "learning_rate": 2.2667075561610195e-07, |
| "loss": 0.4494, |
| "step": 2063 |
| }, |
| { |
| "epoch": 0.7883880825057296, |
| "grad_norm": 3.245421182359227, |
| "learning_rate": 2.258868718986008e-07, |
| "loss": 0.4727, |
| "step": 2064 |
| }, |
| { |
| "epoch": 0.7887700534759359, |
| "grad_norm": 3.994712407307103, |
| "learning_rate": 2.2510417334583566e-07, |
| "loss": 0.5717, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.7891520244461421, |
| "grad_norm": 3.1308390080703536, |
| "learning_rate": 2.2432266115611588e-07, |
| "loss": 0.5019, |
| "step": 2066 |
| }, |
| { |
| "epoch": 0.7895339954163484, |
| "grad_norm": 9.174614866376384, |
| "learning_rate": 2.2354233652593436e-07, |
| "loss": 0.4767, |
| "step": 2067 |
| }, |
| { |
| "epoch": 0.7899159663865546, |
| "grad_norm": 3.332817881843333, |
| "learning_rate": 2.2276320064996513e-07, |
| "loss": 0.5531, |
| "step": 2068 |
| }, |
| { |
| "epoch": 0.7902979373567609, |
| "grad_norm": 3.1221313508461694, |
| "learning_rate": 2.2198525472106322e-07, |
| "loss": 0.4997, |
| "step": 2069 |
| }, |
| { |
| "epoch": 0.7906799083269671, |
| "grad_norm": 6.25659098508917, |
| "learning_rate": 2.212084999302609e-07, |
| "loss": 0.5339, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.7910618792971734, |
| "grad_norm": 2.378343334180352, |
| "learning_rate": 2.204329374667675e-07, |
| "loss": 0.4792, |
| "step": 2071 |
| }, |
| { |
| "epoch": 0.7914438502673797, |
| "grad_norm": 5.407938710117125, |
| "learning_rate": 2.1965856851796704e-07, |
| "loss": 0.4508, |
| "step": 2072 |
| }, |
| { |
| "epoch": 0.7918258212375859, |
| "grad_norm": 8.650676035914138, |
| "learning_rate": 2.1888539426941534e-07, |
| "loss": 0.5357, |
| "step": 2073 |
| }, |
| { |
| "epoch": 0.7922077922077922, |
| "grad_norm": 3.320964515680207, |
| "learning_rate": 2.181134159048399e-07, |
| "loss": 0.54, |
| "step": 2074 |
| }, |
| { |
| "epoch": 0.7925897631779985, |
| "grad_norm": 4.3796081323275855, |
| "learning_rate": 2.1734263460613745e-07, |
| "loss": 0.4396, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.7929717341482048, |
| "grad_norm": 4.6250797467380185, |
| "learning_rate": 2.1657305155337114e-07, |
| "loss": 0.515, |
| "step": 2076 |
| }, |
| { |
| "epoch": 0.793353705118411, |
| "grad_norm": 3.2669468591765125, |
| "learning_rate": 2.158046679247706e-07, |
| "loss": 0.4935, |
| "step": 2077 |
| }, |
| { |
| "epoch": 0.7937356760886173, |
| "grad_norm": 3.32173143735186, |
| "learning_rate": 2.150374848967288e-07, |
| "loss": 0.3964, |
| "step": 2078 |
| }, |
| { |
| "epoch": 0.7941176470588235, |
| "grad_norm": 4.002395870563238, |
| "learning_rate": 2.142715036438001e-07, |
| "loss": 0.4846, |
| "step": 2079 |
| }, |
| { |
| "epoch": 0.7944996180290298, |
| "grad_norm": 3.262119711105562, |
| "learning_rate": 2.1350672533869985e-07, |
| "loss": 0.4709, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.7948815889992361, |
| "grad_norm": 3.897308700584841, |
| "learning_rate": 2.1274315115230069e-07, |
| "loss": 0.5574, |
| "step": 2081 |
| }, |
| { |
| "epoch": 0.7952635599694423, |
| "grad_norm": 2.788895366333766, |
| "learning_rate": 2.1198078225363248e-07, |
| "loss": 0.4567, |
| "step": 2082 |
| }, |
| { |
| "epoch": 0.7956455309396486, |
| "grad_norm": 4.428889893647171, |
| "learning_rate": 2.1121961980987991e-07, |
| "loss": 0.4818, |
| "step": 2083 |
| }, |
| { |
| "epoch": 0.7960275019098548, |
| "grad_norm": 3.4304173580488047, |
| "learning_rate": 2.1045966498637968e-07, |
| "loss": 0.4832, |
| "step": 2084 |
| }, |
| { |
| "epoch": 0.7964094728800611, |
| "grad_norm": 2.7090643157430634, |
| "learning_rate": 2.0970091894662046e-07, |
| "loss": 0.4742, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.7967914438502673, |
| "grad_norm": 3.498421884744891, |
| "learning_rate": 2.0894338285224032e-07, |
| "loss": 0.4831, |
| "step": 2086 |
| }, |
| { |
| "epoch": 0.7971734148204737, |
| "grad_norm": 2.554999201557372, |
| "learning_rate": 2.0818705786302414e-07, |
| "loss": 0.4128, |
| "step": 2087 |
| }, |
| { |
| "epoch": 0.7975553857906799, |
| "grad_norm": 5.764550803661119, |
| "learning_rate": 2.0743194513690354e-07, |
| "loss": 0.4518, |
| "step": 2088 |
| }, |
| { |
| "epoch": 0.7979373567608862, |
| "grad_norm": 4.392621968809952, |
| "learning_rate": 2.066780458299532e-07, |
| "loss": 0.5366, |
| "step": 2089 |
| }, |
| { |
| "epoch": 0.7983193277310925, |
| "grad_norm": 27.23762782223962, |
| "learning_rate": 2.059253610963908e-07, |
| "loss": 0.5096, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.7987012987012987, |
| "grad_norm": 4.917678268346897, |
| "learning_rate": 2.051738920885745e-07, |
| "loss": 0.4983, |
| "step": 2091 |
| }, |
| { |
| "epoch": 0.799083269671505, |
| "grad_norm": 2.4843286909212985, |
| "learning_rate": 2.0442363995700053e-07, |
| "loss": 0.4707, |
| "step": 2092 |
| }, |
| { |
| "epoch": 0.7994652406417112, |
| "grad_norm": 3.1580430450112864, |
| "learning_rate": 2.0367460585030294e-07, |
| "loss": 0.3834, |
| "step": 2093 |
| }, |
| { |
| "epoch": 0.7998472116119175, |
| "grad_norm": 4.74167956034697, |
| "learning_rate": 2.0292679091525e-07, |
| "loss": 0.5594, |
| "step": 2094 |
| }, |
| { |
| "epoch": 0.8002291825821237, |
| "grad_norm": 5.6498499573703365, |
| "learning_rate": 2.0218019629674444e-07, |
| "loss": 0.5196, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.80061115355233, |
| "grad_norm": 4.527159164195184, |
| "learning_rate": 2.0143482313782046e-07, |
| "loss": 0.5576, |
| "step": 2096 |
| }, |
| { |
| "epoch": 0.8009931245225362, |
| "grad_norm": 5.130664753102986, |
| "learning_rate": 2.0069067257964133e-07, |
| "loss": 0.5389, |
| "step": 2097 |
| }, |
| { |
| "epoch": 0.8013750954927426, |
| "grad_norm": 2.3508514319389784, |
| "learning_rate": 1.9994774576149986e-07, |
| "loss": 0.4141, |
| "step": 2098 |
| }, |
| { |
| "epoch": 0.8017570664629489, |
| "grad_norm": 4.036901095606672, |
| "learning_rate": 1.9920604382081396e-07, |
| "loss": 0.4771, |
| "step": 2099 |
| }, |
| { |
| "epoch": 0.8021390374331551, |
| "grad_norm": 3.8505341704800937, |
| "learning_rate": 1.984655678931274e-07, |
| "loss": 0.5119, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.8025210084033614, |
| "grad_norm": 4.315212828257233, |
| "learning_rate": 1.9772631911210658e-07, |
| "loss": 0.5262, |
| "step": 2101 |
| }, |
| { |
| "epoch": 0.8029029793735676, |
| "grad_norm": 4.179122202495466, |
| "learning_rate": 1.9698829860953869e-07, |
| "loss": 0.4772, |
| "step": 2102 |
| }, |
| { |
| "epoch": 0.8032849503437739, |
| "grad_norm": 3.510354812278512, |
| "learning_rate": 1.9625150751533105e-07, |
| "loss": 0.4846, |
| "step": 2103 |
| }, |
| { |
| "epoch": 0.8036669213139801, |
| "grad_norm": 5.192771915402893, |
| "learning_rate": 1.9551594695750807e-07, |
| "loss": 0.5724, |
| "step": 2104 |
| }, |
| { |
| "epoch": 0.8040488922841864, |
| "grad_norm": 2.8914671355912898, |
| "learning_rate": 1.9478161806221094e-07, |
| "loss": 0.5198, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.8044308632543926, |
| "grad_norm": 3.013881239807992, |
| "learning_rate": 1.9404852195369515e-07, |
| "loss": 0.4438, |
| "step": 2106 |
| }, |
| { |
| "epoch": 0.8048128342245989, |
| "grad_norm": 3.4213101296477197, |
| "learning_rate": 1.9331665975432775e-07, |
| "loss": 0.4714, |
| "step": 2107 |
| }, |
| { |
| "epoch": 0.8051948051948052, |
| "grad_norm": 2.855202248672021, |
| "learning_rate": 1.9258603258458827e-07, |
| "loss": 0.4218, |
| "step": 2108 |
| }, |
| { |
| "epoch": 0.8055767761650114, |
| "grad_norm": 4.939004677299009, |
| "learning_rate": 1.918566415630638e-07, |
| "loss": 0.4966, |
| "step": 2109 |
| }, |
| { |
| "epoch": 0.8059587471352178, |
| "grad_norm": 4.876125661218717, |
| "learning_rate": 1.9112848780645018e-07, |
| "loss": 0.5304, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.806340718105424, |
| "grad_norm": 5.349902870683962, |
| "learning_rate": 1.9040157242954856e-07, |
| "loss": 0.4977, |
| "step": 2111 |
| }, |
| { |
| "epoch": 0.8067226890756303, |
| "grad_norm": 6.427472086996999, |
| "learning_rate": 1.8967589654526362e-07, |
| "loss": 0.5665, |
| "step": 2112 |
| }, |
| { |
| "epoch": 0.8071046600458365, |
| "grad_norm": 3.9644914565815865, |
| "learning_rate": 1.8895146126460337e-07, |
| "loss": 0.5475, |
| "step": 2113 |
| }, |
| { |
| "epoch": 0.8074866310160428, |
| "grad_norm": 2.6214119049961675, |
| "learning_rate": 1.8822826769667533e-07, |
| "loss": 0.4595, |
| "step": 2114 |
| }, |
| { |
| "epoch": 0.807868601986249, |
| "grad_norm": 3.4275301139405765, |
| "learning_rate": 1.875063169486869e-07, |
| "loss": 0.4387, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.8082505729564553, |
| "grad_norm": 2.957960275521017, |
| "learning_rate": 1.8678561012594253e-07, |
| "loss": 0.5228, |
| "step": 2116 |
| }, |
| { |
| "epoch": 0.8086325439266616, |
| "grad_norm": 4.5705638000310715, |
| "learning_rate": 1.8606614833184165e-07, |
| "loss": 0.5427, |
| "step": 2117 |
| }, |
| { |
| "epoch": 0.8090145148968678, |
| "grad_norm": 2.8072984602726283, |
| "learning_rate": 1.853479326678783e-07, |
| "loss": 0.4979, |
| "step": 2118 |
| }, |
| { |
| "epoch": 0.8093964858670741, |
| "grad_norm": 3.268001934700151, |
| "learning_rate": 1.8463096423363843e-07, |
| "loss": 0.5171, |
| "step": 2119 |
| }, |
| { |
| "epoch": 0.8097784568372803, |
| "grad_norm": 2.32727482347449, |
| "learning_rate": 1.8391524412679805e-07, |
| "loss": 0.4387, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.8101604278074866, |
| "grad_norm": 2.3928853103257683, |
| "learning_rate": 1.832007734431229e-07, |
| "loss": 0.3816, |
| "step": 2121 |
| }, |
| { |
| "epoch": 0.8105423987776929, |
| "grad_norm": 3.522836164648356, |
| "learning_rate": 1.824875532764647e-07, |
| "loss": 0.4504, |
| "step": 2122 |
| }, |
| { |
| "epoch": 0.8109243697478992, |
| "grad_norm": 3.8543287249884086, |
| "learning_rate": 1.8177558471876164e-07, |
| "loss": 0.5192, |
| "step": 2123 |
| }, |
| { |
| "epoch": 0.8113063407181055, |
| "grad_norm": 2.82058060067478, |
| "learning_rate": 1.8106486886003547e-07, |
| "loss": 0.4373, |
| "step": 2124 |
| }, |
| { |
| "epoch": 0.8116883116883117, |
| "grad_norm": 5.33102966199271, |
| "learning_rate": 1.8035540678838946e-07, |
| "loss": 0.4829, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.812070282658518, |
| "grad_norm": 11.516447534177683, |
| "learning_rate": 1.7964719959000808e-07, |
| "loss": 0.5049, |
| "step": 2126 |
| }, |
| { |
| "epoch": 0.8124522536287242, |
| "grad_norm": 4.790046217132607, |
| "learning_rate": 1.7894024834915443e-07, |
| "loss": 0.4592, |
| "step": 2127 |
| }, |
| { |
| "epoch": 0.8128342245989305, |
| "grad_norm": 7.356390091789118, |
| "learning_rate": 1.7823455414816812e-07, |
| "loss": 0.4138, |
| "step": 2128 |
| }, |
| { |
| "epoch": 0.8132161955691367, |
| "grad_norm": 2.2861859979657995, |
| "learning_rate": 1.775301180674651e-07, |
| "loss": 0.4342, |
| "step": 2129 |
| }, |
| { |
| "epoch": 0.813598166539343, |
| "grad_norm": 7.537853643424543, |
| "learning_rate": 1.76826941185535e-07, |
| "loss": 0.5356, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.8139801375095492, |
| "grad_norm": 3.444331655734697, |
| "learning_rate": 1.7612502457893874e-07, |
| "loss": 0.4874, |
| "step": 2131 |
| }, |
| { |
| "epoch": 0.8143621084797555, |
| "grad_norm": 2.696003558115096, |
| "learning_rate": 1.7542436932230897e-07, |
| "loss": 0.4492, |
| "step": 2132 |
| }, |
| { |
| "epoch": 0.8147440794499619, |
| "grad_norm": 2.7325781405086365, |
| "learning_rate": 1.7472497648834627e-07, |
| "loss": 0.4839, |
| "step": 2133 |
| }, |
| { |
| "epoch": 0.8151260504201681, |
| "grad_norm": 2.4953850542257876, |
| "learning_rate": 1.74026847147819e-07, |
| "loss": 0.4183, |
| "step": 2134 |
| }, |
| { |
| "epoch": 0.8155080213903744, |
| "grad_norm": 2.698104289850833, |
| "learning_rate": 1.733299823695612e-07, |
| "loss": 0.4834, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.8158899923605806, |
| "grad_norm": 5.766516311048943, |
| "learning_rate": 1.726343832204702e-07, |
| "loss": 0.5258, |
| "step": 2136 |
| }, |
| { |
| "epoch": 0.8162719633307869, |
| "grad_norm": 4.263091493052819, |
| "learning_rate": 1.7194005076550633e-07, |
| "loss": 0.4654, |
| "step": 2137 |
| }, |
| { |
| "epoch": 0.8166539343009931, |
| "grad_norm": 3.6493299151046643, |
| "learning_rate": 1.712469860676905e-07, |
| "loss": 0.5148, |
| "step": 2138 |
| }, |
| { |
| "epoch": 0.8170359052711994, |
| "grad_norm": 3.118503435110606, |
| "learning_rate": 1.7055519018810215e-07, |
| "loss": 0.5313, |
| "step": 2139 |
| }, |
| { |
| "epoch": 0.8174178762414056, |
| "grad_norm": 2.856259832257614, |
| "learning_rate": 1.6986466418587875e-07, |
| "loss": 0.4324, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.8177998472116119, |
| "grad_norm": 2.6322054967660637, |
| "learning_rate": 1.6917540911821383e-07, |
| "loss": 0.4694, |
| "step": 2141 |
| }, |
| { |
| "epoch": 0.8181818181818182, |
| "grad_norm": 4.1708143673350095, |
| "learning_rate": 1.6848742604035405e-07, |
| "loss": 0.5139, |
| "step": 2142 |
| }, |
| { |
| "epoch": 0.8185637891520244, |
| "grad_norm": 2.6858118054559075, |
| "learning_rate": 1.6780071600559985e-07, |
| "loss": 0.4807, |
| "step": 2143 |
| }, |
| { |
| "epoch": 0.8189457601222307, |
| "grad_norm": 3.452723265801165, |
| "learning_rate": 1.6711528006530162e-07, |
| "loss": 0.485, |
| "step": 2144 |
| }, |
| { |
| "epoch": 0.819327731092437, |
| "grad_norm": 3.348365409723057, |
| "learning_rate": 1.6643111926885988e-07, |
| "loss": 0.5307, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.8197097020626433, |
| "grad_norm": 4.175205747051993, |
| "learning_rate": 1.657482346637229e-07, |
| "loss": 0.4897, |
| "step": 2146 |
| }, |
| { |
| "epoch": 0.8200916730328495, |
| "grad_norm": 2.627351449164976, |
| "learning_rate": 1.6506662729538424e-07, |
| "loss": 0.4675, |
| "step": 2147 |
| }, |
| { |
| "epoch": 0.8204736440030558, |
| "grad_norm": 4.364209282487767, |
| "learning_rate": 1.643862982073828e-07, |
| "loss": 0.4748, |
| "step": 2148 |
| }, |
| { |
| "epoch": 0.820855614973262, |
| "grad_norm": 3.8648444535189705, |
| "learning_rate": 1.637072484413008e-07, |
| "loss": 0.4898, |
| "step": 2149 |
| }, |
| { |
| "epoch": 0.8212375859434683, |
| "grad_norm": 2.4741715164345157, |
| "learning_rate": 1.6302947903676045e-07, |
| "loss": 0.5016, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.8216195569136746, |
| "grad_norm": 8.096634033181392, |
| "learning_rate": 1.6235299103142507e-07, |
| "loss": 0.53, |
| "step": 2151 |
| }, |
| { |
| "epoch": 0.8220015278838808, |
| "grad_norm": 10.094345497672295, |
| "learning_rate": 1.6167778546099563e-07, |
| "loss": 0.5067, |
| "step": 2152 |
| }, |
| { |
| "epoch": 0.8223834988540871, |
| "grad_norm": 3.0033457948105493, |
| "learning_rate": 1.6100386335920945e-07, |
| "loss": 0.4624, |
| "step": 2153 |
| }, |
| { |
| "epoch": 0.8227654698242933, |
| "grad_norm": 3.6237989363735474, |
| "learning_rate": 1.6033122575783943e-07, |
| "loss": 0.4402, |
| "step": 2154 |
| }, |
| { |
| "epoch": 0.8231474407944996, |
| "grad_norm": 27.114977190810855, |
| "learning_rate": 1.596598736866912e-07, |
| "loss": 0.5047, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.8235294117647058, |
| "grad_norm": 8.097836516523152, |
| "learning_rate": 1.5898980817360296e-07, |
| "loss": 0.5523, |
| "step": 2156 |
| }, |
| { |
| "epoch": 0.8239113827349122, |
| "grad_norm": 6.138753598509105, |
| "learning_rate": 1.58321030244443e-07, |
| "loss": 0.5264, |
| "step": 2157 |
| }, |
| { |
| "epoch": 0.8242933537051184, |
| "grad_norm": 5.54119380250345, |
| "learning_rate": 1.5765354092310767e-07, |
| "loss": 0.5002, |
| "step": 2158 |
| }, |
| { |
| "epoch": 0.8246753246753247, |
| "grad_norm": 12.383001202449798, |
| "learning_rate": 1.5698734123152147e-07, |
| "loss": 0.4764, |
| "step": 2159 |
| }, |
| { |
| "epoch": 0.825057295645531, |
| "grad_norm": 6.763350972657245, |
| "learning_rate": 1.5632243218963405e-07, |
| "loss": 0.5253, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.8254392666157372, |
| "grad_norm": 7.899539027451731, |
| "learning_rate": 1.5565881481541855e-07, |
| "loss": 0.5055, |
| "step": 2161 |
| }, |
| { |
| "epoch": 0.8258212375859435, |
| "grad_norm": 5.708250468763933, |
| "learning_rate": 1.5499649012487158e-07, |
| "loss": 0.5095, |
| "step": 2162 |
| }, |
| { |
| "epoch": 0.8262032085561497, |
| "grad_norm": 12.871641057671368, |
| "learning_rate": 1.5433545913200975e-07, |
| "loss": 0.4852, |
| "step": 2163 |
| }, |
| { |
| "epoch": 0.826585179526356, |
| "grad_norm": 3.844853650524388, |
| "learning_rate": 1.5367572284886966e-07, |
| "loss": 0.4783, |
| "step": 2164 |
| }, |
| { |
| "epoch": 0.8269671504965622, |
| "grad_norm": 3.5393461249782376, |
| "learning_rate": 1.5301728228550547e-07, |
| "loss": 0.4546, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.8273491214667685, |
| "grad_norm": 3.9628215025790277, |
| "learning_rate": 1.523601384499873e-07, |
| "loss": 0.4962, |
| "step": 2166 |
| }, |
| { |
| "epoch": 0.8277310924369747, |
| "grad_norm": 3.320017741891724, |
| "learning_rate": 1.5170429234840042e-07, |
| "loss": 0.5417, |
| "step": 2167 |
| }, |
| { |
| "epoch": 0.828113063407181, |
| "grad_norm": 16.613667129121023, |
| "learning_rate": 1.5104974498484345e-07, |
| "loss": 0.5198, |
| "step": 2168 |
| }, |
| { |
| "epoch": 0.8284950343773874, |
| "grad_norm": 2.97697112468617, |
| "learning_rate": 1.5039649736142578e-07, |
| "loss": 0.54, |
| "step": 2169 |
| }, |
| { |
| "epoch": 0.8288770053475936, |
| "grad_norm": 3.968470149842816, |
| "learning_rate": 1.4974455047826784e-07, |
| "loss": 0.4708, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.8292589763177999, |
| "grad_norm": 13.993826599883949, |
| "learning_rate": 1.490939053334982e-07, |
| "loss": 0.5484, |
| "step": 2171 |
| }, |
| { |
| "epoch": 0.8296409472880061, |
| "grad_norm": 4.0595859874577975, |
| "learning_rate": 1.4844456292325212e-07, |
| "loss": 0.5645, |
| "step": 2172 |
| }, |
| { |
| "epoch": 0.8300229182582124, |
| "grad_norm": 2.9777732873389056, |
| "learning_rate": 1.4779652424167131e-07, |
| "loss": 0.5123, |
| "step": 2173 |
| }, |
| { |
| "epoch": 0.8304048892284186, |
| "grad_norm": 5.623326166041766, |
| "learning_rate": 1.4714979028090058e-07, |
| "loss": 0.4902, |
| "step": 2174 |
| }, |
| { |
| "epoch": 0.8307868601986249, |
| "grad_norm": 3.638460408651059, |
| "learning_rate": 1.465043620310875e-07, |
| "loss": 0.5674, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.8311688311688312, |
| "grad_norm": 16.00436964062797, |
| "learning_rate": 1.4586024048038103e-07, |
| "loss": 0.6007, |
| "step": 2176 |
| }, |
| { |
| "epoch": 0.8315508021390374, |
| "grad_norm": 9.547989664149393, |
| "learning_rate": 1.4521742661492885e-07, |
| "loss": 0.4768, |
| "step": 2177 |
| }, |
| { |
| "epoch": 0.8319327731092437, |
| "grad_norm": 3.880657659745083, |
| "learning_rate": 1.4457592141887708e-07, |
| "loss": 0.5351, |
| "step": 2178 |
| }, |
| { |
| "epoch": 0.8323147440794499, |
| "grad_norm": 2.504159388877166, |
| "learning_rate": 1.4393572587436843e-07, |
| "loss": 0.4805, |
| "step": 2179 |
| }, |
| { |
| "epoch": 0.8326967150496563, |
| "grad_norm": 2.413992644051076, |
| "learning_rate": 1.4329684096153972e-07, |
| "loss": 0.4288, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.8330786860198625, |
| "grad_norm": 2.805884193807672, |
| "learning_rate": 1.4265926765852187e-07, |
| "loss": 0.4767, |
| "step": 2181 |
| }, |
| { |
| "epoch": 0.8334606569900688, |
| "grad_norm": 2.6599816498305864, |
| "learning_rate": 1.42023006941438e-07, |
| "loss": 0.4093, |
| "step": 2182 |
| }, |
| { |
| "epoch": 0.833842627960275, |
| "grad_norm": 2.268768752495317, |
| "learning_rate": 1.413880597844007e-07, |
| "loss": 0.426, |
| "step": 2183 |
| }, |
| { |
| "epoch": 0.8342245989304813, |
| "grad_norm": 3.1786372741174933, |
| "learning_rate": 1.4075442715951246e-07, |
| "loss": 0.4236, |
| "step": 2184 |
| }, |
| { |
| "epoch": 0.8346065699006876, |
| "grad_norm": 4.764909929435903, |
| "learning_rate": 1.4012211003686236e-07, |
| "loss": 0.5414, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.8349885408708938, |
| "grad_norm": 10.893544216950978, |
| "learning_rate": 1.3949110938452613e-07, |
| "loss": 0.5049, |
| "step": 2186 |
| }, |
| { |
| "epoch": 0.8353705118411001, |
| "grad_norm": 7.152163635201567, |
| "learning_rate": 1.388614261685641e-07, |
| "loss": 0.4803, |
| "step": 2187 |
| }, |
| { |
| "epoch": 0.8357524828113063, |
| "grad_norm": 3.5881330377029164, |
| "learning_rate": 1.3823306135301872e-07, |
| "loss": 0.5535, |
| "step": 2188 |
| }, |
| { |
| "epoch": 0.8361344537815126, |
| "grad_norm": 2.51608097849988, |
| "learning_rate": 1.3760601589991472e-07, |
| "loss": 0.4877, |
| "step": 2189 |
| }, |
| { |
| "epoch": 0.8365164247517188, |
| "grad_norm": 6.759430257728035, |
| "learning_rate": 1.36980290769257e-07, |
| "loss": 0.5013, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.8368983957219251, |
| "grad_norm": 5.091071275038226, |
| "learning_rate": 1.3635588691902822e-07, |
| "loss": 0.4787, |
| "step": 2191 |
| }, |
| { |
| "epoch": 0.8372803666921314, |
| "grad_norm": 2.7856178151388478, |
| "learning_rate": 1.3573280530518893e-07, |
| "loss": 0.5048, |
| "step": 2192 |
| }, |
| { |
| "epoch": 0.8376623376623377, |
| "grad_norm": 2.4746294744651887, |
| "learning_rate": 1.351110468816754e-07, |
| "loss": 0.5198, |
| "step": 2193 |
| }, |
| { |
| "epoch": 0.838044308632544, |
| "grad_norm": 33.71405414753952, |
| "learning_rate": 1.344906126003973e-07, |
| "loss": 0.468, |
| "step": 2194 |
| }, |
| { |
| "epoch": 0.8384262796027502, |
| "grad_norm": 7.024288440007473, |
| "learning_rate": 1.3387150341123798e-07, |
| "loss": 0.5526, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.8388082505729565, |
| "grad_norm": 6.049475591050513, |
| "learning_rate": 1.3325372026205116e-07, |
| "loss": 0.4301, |
| "step": 2196 |
| }, |
| { |
| "epoch": 0.8391902215431627, |
| "grad_norm": 10.611540554361598, |
| "learning_rate": 1.3263726409866116e-07, |
| "loss": 0.4779, |
| "step": 2197 |
| }, |
| { |
| "epoch": 0.839572192513369, |
| "grad_norm": 3.0621823518575115, |
| "learning_rate": 1.3202213586486056e-07, |
| "loss": 0.5016, |
| "step": 2198 |
| }, |
| { |
| "epoch": 0.8399541634835752, |
| "grad_norm": 3.275246324988664, |
| "learning_rate": 1.3140833650240834e-07, |
| "loss": 0.5119, |
| "step": 2199 |
| }, |
| { |
| "epoch": 0.8403361344537815, |
| "grad_norm": 3.3609199868920148, |
| "learning_rate": 1.3079586695102963e-07, |
| "loss": 0.5135, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.8407181054239877, |
| "grad_norm": 3.3231780812489666, |
| "learning_rate": 1.3018472814841342e-07, |
| "loss": 0.4663, |
| "step": 2201 |
| }, |
| { |
| "epoch": 0.841100076394194, |
| "grad_norm": 4.4956394515998515, |
| "learning_rate": 1.2957492103021107e-07, |
| "loss": 0.4903, |
| "step": 2202 |
| }, |
| { |
| "epoch": 0.8414820473644004, |
| "grad_norm": 3.852529019099927, |
| "learning_rate": 1.2896644653003552e-07, |
| "loss": 0.4153, |
| "step": 2203 |
| }, |
| { |
| "epoch": 0.8418640183346066, |
| "grad_norm": 4.721021829716811, |
| "learning_rate": 1.2835930557945906e-07, |
| "loss": 0.4437, |
| "step": 2204 |
| }, |
| { |
| "epoch": 0.8422459893048129, |
| "grad_norm": 4.45443689471161, |
| "learning_rate": 1.277534991080128e-07, |
| "loss": 0.4884, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.8426279602750191, |
| "grad_norm": 2.8545799117331105, |
| "learning_rate": 1.271490280431845e-07, |
| "loss": 0.5027, |
| "step": 2206 |
| }, |
| { |
| "epoch": 0.8430099312452254, |
| "grad_norm": 3.1618830266696367, |
| "learning_rate": 1.265458933104172e-07, |
| "loss": 0.5227, |
| "step": 2207 |
| }, |
| { |
| "epoch": 0.8433919022154316, |
| "grad_norm": 5.868702228977397, |
| "learning_rate": 1.259440958331086e-07, |
| "loss": 0.5185, |
| "step": 2208 |
| }, |
| { |
| "epoch": 0.8437738731856379, |
| "grad_norm": 11.923531533986598, |
| "learning_rate": 1.2534363653260838e-07, |
| "loss": 0.4696, |
| "step": 2209 |
| }, |
| { |
| "epoch": 0.8441558441558441, |
| "grad_norm": 4.096587385166496, |
| "learning_rate": 1.2474451632821792e-07, |
| "loss": 0.496, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.8445378151260504, |
| "grad_norm": 3.9900336904878166, |
| "learning_rate": 1.2414673613718863e-07, |
| "loss": 0.5002, |
| "step": 2211 |
| }, |
| { |
| "epoch": 0.8449197860962567, |
| "grad_norm": 3.21690791572575, |
| "learning_rate": 1.2355029687471963e-07, |
| "loss": 0.517, |
| "step": 2212 |
| }, |
| { |
| "epoch": 0.8453017570664629, |
| "grad_norm": 2.998421983038558, |
| "learning_rate": 1.2295519945395806e-07, |
| "loss": 0.4889, |
| "step": 2213 |
| }, |
| { |
| "epoch": 0.8456837280366692, |
| "grad_norm": 4.306846266548668, |
| "learning_rate": 1.2236144478599553e-07, |
| "loss": 0.4833, |
| "step": 2214 |
| }, |
| { |
| "epoch": 0.8460656990068754, |
| "grad_norm": 3.816892740943075, |
| "learning_rate": 1.2176903377986903e-07, |
| "loss": 0.5117, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.8464476699770818, |
| "grad_norm": 3.27557051439516, |
| "learning_rate": 1.2117796734255793e-07, |
| "loss": 0.5633, |
| "step": 2216 |
| }, |
| { |
| "epoch": 0.846829640947288, |
| "grad_norm": 2.809194476163277, |
| "learning_rate": 1.2058824637898267e-07, |
| "loss": 0.4319, |
| "step": 2217 |
| }, |
| { |
| "epoch": 0.8472116119174943, |
| "grad_norm": 2.565263632594187, |
| "learning_rate": 1.1999987179200466e-07, |
| "loss": 0.4182, |
| "step": 2218 |
| }, |
| { |
| "epoch": 0.8475935828877005, |
| "grad_norm": 4.393436093843852, |
| "learning_rate": 1.1941284448242306e-07, |
| "loss": 0.5012, |
| "step": 2219 |
| }, |
| { |
| "epoch": 0.8479755538579068, |
| "grad_norm": 3.784435251005329, |
| "learning_rate": 1.1882716534897507e-07, |
| "loss": 0.496, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.8483575248281131, |
| "grad_norm": 8.747122275021068, |
| "learning_rate": 1.1824283528833379e-07, |
| "loss": 0.5423, |
| "step": 2221 |
| }, |
| { |
| "epoch": 0.8487394957983193, |
| "grad_norm": 4.644657697010408, |
| "learning_rate": 1.1765985519510624e-07, |
| "loss": 0.5356, |
| "step": 2222 |
| }, |
| { |
| "epoch": 0.8491214667685256, |
| "grad_norm": 2.098684965570757, |
| "learning_rate": 1.1707822596183337e-07, |
| "loss": 0.4655, |
| "step": 2223 |
| }, |
| { |
| "epoch": 0.8495034377387318, |
| "grad_norm": 3.0204783803502404, |
| "learning_rate": 1.1649794847898754e-07, |
| "loss": 0.4898, |
| "step": 2224 |
| }, |
| { |
| "epoch": 0.8498854087089381, |
| "grad_norm": 14.77076289760896, |
| "learning_rate": 1.1591902363497175e-07, |
| "loss": 0.5915, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.8502673796791443, |
| "grad_norm": 2.2247545000489946, |
| "learning_rate": 1.1534145231611836e-07, |
| "loss": 0.4969, |
| "step": 2226 |
| }, |
| { |
| "epoch": 0.8506493506493507, |
| "grad_norm": 4.198150069950216, |
| "learning_rate": 1.147652354066867e-07, |
| "loss": 0.5177, |
| "step": 2227 |
| }, |
| { |
| "epoch": 0.851031321619557, |
| "grad_norm": 4.011138143417836, |
| "learning_rate": 1.1419037378886364e-07, |
| "loss": 0.4546, |
| "step": 2228 |
| }, |
| { |
| "epoch": 0.8514132925897632, |
| "grad_norm": 4.1242405856658, |
| "learning_rate": 1.1361686834275997e-07, |
| "loss": 0.5162, |
| "step": 2229 |
| }, |
| { |
| "epoch": 0.8517952635599695, |
| "grad_norm": 3.33567545055266, |
| "learning_rate": 1.1304471994641085e-07, |
| "loss": 0.5543, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.8521772345301757, |
| "grad_norm": 3.3591483538343927, |
| "learning_rate": 1.1247392947577395e-07, |
| "loss": 0.4238, |
| "step": 2231 |
| }, |
| { |
| "epoch": 0.852559205500382, |
| "grad_norm": 3.2108515377110924, |
| "learning_rate": 1.1190449780472722e-07, |
| "loss": 0.4729, |
| "step": 2232 |
| }, |
| { |
| "epoch": 0.8529411764705882, |
| "grad_norm": 11.40142858082818, |
| "learning_rate": 1.1133642580506886e-07, |
| "loss": 0.4736, |
| "step": 2233 |
| }, |
| { |
| "epoch": 0.8533231474407945, |
| "grad_norm": 3.1885378611447446, |
| "learning_rate": 1.1076971434651571e-07, |
| "loss": 0.4514, |
| "step": 2234 |
| }, |
| { |
| "epoch": 0.8537051184110007, |
| "grad_norm": 6.454722532760631, |
| "learning_rate": 1.1020436429670066e-07, |
| "loss": 0.463, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.854087089381207, |
| "grad_norm": 6.051133144528017, |
| "learning_rate": 1.096403765211732e-07, |
| "loss": 0.4592, |
| "step": 2236 |
| }, |
| { |
| "epoch": 0.8544690603514133, |
| "grad_norm": 9.702170781782433, |
| "learning_rate": 1.0907775188339652e-07, |
| "loss": 0.4529, |
| "step": 2237 |
| }, |
| { |
| "epoch": 0.8548510313216195, |
| "grad_norm": 6.04570371091639, |
| "learning_rate": 1.0851649124474727e-07, |
| "loss": 0.4939, |
| "step": 2238 |
| }, |
| { |
| "epoch": 0.8552330022918259, |
| "grad_norm": 5.28690485826026, |
| "learning_rate": 1.0795659546451397e-07, |
| "loss": 0.5272, |
| "step": 2239 |
| }, |
| { |
| "epoch": 0.8556149732620321, |
| "grad_norm": 9.375052196894691, |
| "learning_rate": 1.0739806539989482e-07, |
| "loss": 0.5103, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.8559969442322384, |
| "grad_norm": 7.951196817714195, |
| "learning_rate": 1.0684090190599782e-07, |
| "loss": 0.5101, |
| "step": 2241 |
| }, |
| { |
| "epoch": 0.8563789152024446, |
| "grad_norm": 7.003244801403864, |
| "learning_rate": 1.0628510583583861e-07, |
| "loss": 0.5662, |
| "step": 2242 |
| }, |
| { |
| "epoch": 0.8567608861726509, |
| "grad_norm": 11.099276113974952, |
| "learning_rate": 1.0573067804033897e-07, |
| "loss": 0.5318, |
| "step": 2243 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 2.2709048820976707, |
| "learning_rate": 1.0517761936832615e-07, |
| "loss": 0.4368, |
| "step": 2244 |
| }, |
| { |
| "epoch": 0.8575248281130634, |
| "grad_norm": 7.724425718241674, |
| "learning_rate": 1.0462593066653159e-07, |
| "loss": 0.4265, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.8579067990832697, |
| "grad_norm": 8.808875882128211, |
| "learning_rate": 1.0407561277958831e-07, |
| "loss": 0.5229, |
| "step": 2246 |
| }, |
| { |
| "epoch": 0.8582887700534759, |
| "grad_norm": 4.23873315916548, |
| "learning_rate": 1.0352666655003173e-07, |
| "loss": 0.516, |
| "step": 2247 |
| }, |
| { |
| "epoch": 0.8586707410236822, |
| "grad_norm": 4.203252767377129, |
| "learning_rate": 1.0297909281829642e-07, |
| "loss": 0.4807, |
| "step": 2248 |
| }, |
| { |
| "epoch": 0.8590527119938884, |
| "grad_norm": 4.602668002547525, |
| "learning_rate": 1.0243289242271625e-07, |
| "loss": 0.476, |
| "step": 2249 |
| }, |
| { |
| "epoch": 0.8594346829640948, |
| "grad_norm": 2.7565613852101833, |
| "learning_rate": 1.0188806619952239e-07, |
| "loss": 0.4661, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.859816653934301, |
| "grad_norm": 2.9586330341043285, |
| "learning_rate": 1.0134461498284175e-07, |
| "loss": 0.4072, |
| "step": 2251 |
| }, |
| { |
| "epoch": 0.8601986249045073, |
| "grad_norm": 3.921729259810394, |
| "learning_rate": 1.0080253960469653e-07, |
| "loss": 0.5747, |
| "step": 2252 |
| }, |
| { |
| "epoch": 0.8605805958747135, |
| "grad_norm": 24.577549995932177, |
| "learning_rate": 1.0026184089500266e-07, |
| "loss": 0.5181, |
| "step": 2253 |
| }, |
| { |
| "epoch": 0.8609625668449198, |
| "grad_norm": 2.887900020199325, |
| "learning_rate": 9.972251968156775e-08, |
| "loss": 0.4497, |
| "step": 2254 |
| }, |
| { |
| "epoch": 0.8613445378151261, |
| "grad_norm": 4.525304080726074, |
| "learning_rate": 9.918457679009095e-08, |
| "loss": 0.5317, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.8617265087853323, |
| "grad_norm": 3.461849105116977, |
| "learning_rate": 9.864801304416159e-08, |
| "loss": 0.5421, |
| "step": 2256 |
| }, |
| { |
| "epoch": 0.8621084797555386, |
| "grad_norm": 2.8242327267596985, |
| "learning_rate": 9.811282926525632e-08, |
| "loss": 0.4628, |
| "step": 2257 |
| }, |
| { |
| "epoch": 0.8624904507257448, |
| "grad_norm": 9.00792987972709, |
| "learning_rate": 9.757902627274039e-08, |
| "loss": 0.505, |
| "step": 2258 |
| }, |
| { |
| "epoch": 0.8628724216959511, |
| "grad_norm": 5.880509030524464, |
| "learning_rate": 9.704660488386418e-08, |
| "loss": 0.4353, |
| "step": 2259 |
| }, |
| { |
| "epoch": 0.8632543926661573, |
| "grad_norm": 4.774488398492021, |
| "learning_rate": 9.651556591376309e-08, |
| "loss": 0.4097, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.8636363636363636, |
| "grad_norm": 3.6326302819898615, |
| "learning_rate": 9.598591017545643e-08, |
| "loss": 0.4939, |
| "step": 2261 |
| }, |
| { |
| "epoch": 0.8640183346065698, |
| "grad_norm": 2.4374436794116012, |
| "learning_rate": 9.545763847984512e-08, |
| "loss": 0.5212, |
| "step": 2262 |
| }, |
| { |
| "epoch": 0.8644003055767762, |
| "grad_norm": 2.223777079210593, |
| "learning_rate": 9.493075163571152e-08, |
| "loss": 0.4479, |
| "step": 2263 |
| }, |
| { |
| "epoch": 0.8647822765469825, |
| "grad_norm": 3.53955584612186, |
| "learning_rate": 9.440525044971793e-08, |
| "loss": 0.4763, |
| "step": 2264 |
| }, |
| { |
| "epoch": 0.8651642475171887, |
| "grad_norm": 2.9697819295899017, |
| "learning_rate": 9.388113572640454e-08, |
| "loss": 0.4184, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.865546218487395, |
| "grad_norm": 4.924061622038229, |
| "learning_rate": 9.335840826818975e-08, |
| "loss": 0.5132, |
| "step": 2266 |
| }, |
| { |
| "epoch": 0.8659281894576012, |
| "grad_norm": 3.511432459698299, |
| "learning_rate": 9.283706887536769e-08, |
| "loss": 0.4636, |
| "step": 2267 |
| }, |
| { |
| "epoch": 0.8663101604278075, |
| "grad_norm": 2.5156321800382826, |
| "learning_rate": 9.23171183461069e-08, |
| "loss": 0.4564, |
| "step": 2268 |
| }, |
| { |
| "epoch": 0.8666921313980137, |
| "grad_norm": 8.531918185130868, |
| "learning_rate": 9.179855747645027e-08, |
| "loss": 0.462, |
| "step": 2269 |
| }, |
| { |
| "epoch": 0.86707410236822, |
| "grad_norm": 2.6380802463987316, |
| "learning_rate": 9.128138706031274e-08, |
| "loss": 0.4548, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.8674560733384262, |
| "grad_norm": 4.445704232933789, |
| "learning_rate": 9.07656078894805e-08, |
| "loss": 0.5486, |
| "step": 2271 |
| }, |
| { |
| "epoch": 0.8678380443086325, |
| "grad_norm": 2.1578780349871587, |
| "learning_rate": 9.025122075361013e-08, |
| "loss": 0.4434, |
| "step": 2272 |
| }, |
| { |
| "epoch": 0.8682200152788389, |
| "grad_norm": 2.6670098332020467, |
| "learning_rate": 8.973822644022632e-08, |
| "loss": 0.4345, |
| "step": 2273 |
| }, |
| { |
| "epoch": 0.868601986249045, |
| "grad_norm": 7.2891934149937345, |
| "learning_rate": 8.922662573472195e-08, |
| "loss": 0.4511, |
| "step": 2274 |
| }, |
| { |
| "epoch": 0.8689839572192514, |
| "grad_norm": 10.840324811059782, |
| "learning_rate": 8.871641942035612e-08, |
| "loss": 0.5316, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.8693659281894576, |
| "grad_norm": 4.289385323121571, |
| "learning_rate": 8.820760827825292e-08, |
| "loss": 0.5083, |
| "step": 2276 |
| }, |
| { |
| "epoch": 0.8697478991596639, |
| "grad_norm": 3.1582635618624133, |
| "learning_rate": 8.770019308740051e-08, |
| "loss": 0.51, |
| "step": 2277 |
| }, |
| { |
| "epoch": 0.8701298701298701, |
| "grad_norm": 1.8803307145675334, |
| "learning_rate": 8.719417462465039e-08, |
| "loss": 0.4284, |
| "step": 2278 |
| }, |
| { |
| "epoch": 0.8705118411000764, |
| "grad_norm": 3.5424445332825427, |
| "learning_rate": 8.668955366471465e-08, |
| "loss": 0.458, |
| "step": 2279 |
| }, |
| { |
| "epoch": 0.8708938120702827, |
| "grad_norm": 3.201396165183148, |
| "learning_rate": 8.618633098016681e-08, |
| "loss": 0.4927, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.8712757830404889, |
| "grad_norm": 3.991953266601079, |
| "learning_rate": 8.568450734143873e-08, |
| "loss": 0.4885, |
| "step": 2281 |
| }, |
| { |
| "epoch": 0.8716577540106952, |
| "grad_norm": 3.4953614222608214, |
| "learning_rate": 8.518408351682127e-08, |
| "loss": 0.5084, |
| "step": 2282 |
| }, |
| { |
| "epoch": 0.8720397249809014, |
| "grad_norm": 2.695167087812339, |
| "learning_rate": 8.468506027246158e-08, |
| "loss": 0.4777, |
| "step": 2283 |
| }, |
| { |
| "epoch": 0.8724216959511077, |
| "grad_norm": 5.30059850738619, |
| "learning_rate": 8.418743837236242e-08, |
| "loss": 0.5029, |
| "step": 2284 |
| }, |
| { |
| "epoch": 0.872803666921314, |
| "grad_norm": 2.629519582190331, |
| "learning_rate": 8.369121857838157e-08, |
| "loss": 0.4694, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.8731856378915203, |
| "grad_norm": 4.258188402714665, |
| "learning_rate": 8.319640165023012e-08, |
| "loss": 0.4264, |
| "step": 2286 |
| }, |
| { |
| "epoch": 0.8735676088617265, |
| "grad_norm": 2.4821274197746366, |
| "learning_rate": 8.270298834547085e-08, |
| "loss": 0.4497, |
| "step": 2287 |
| }, |
| { |
| "epoch": 0.8739495798319328, |
| "grad_norm": 6.887109932801548, |
| "learning_rate": 8.221097941951816e-08, |
| "loss": 0.4585, |
| "step": 2288 |
| }, |
| { |
| "epoch": 0.8743315508021391, |
| "grad_norm": 3.5491839840353103, |
| "learning_rate": 8.172037562563605e-08, |
| "loss": 0.4904, |
| "step": 2289 |
| }, |
| { |
| "epoch": 0.8747135217723453, |
| "grad_norm": 3.3348486996500757, |
| "learning_rate": 8.123117771493737e-08, |
| "loss": 0.4956, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.8750954927425516, |
| "grad_norm": 5.276977593463025, |
| "learning_rate": 8.074338643638279e-08, |
| "loss": 0.4255, |
| "step": 2291 |
| }, |
| { |
| "epoch": 0.8754774637127578, |
| "grad_norm": 5.252023258479506, |
| "learning_rate": 8.025700253677892e-08, |
| "loss": 0.5391, |
| "step": 2292 |
| }, |
| { |
| "epoch": 0.8758594346829641, |
| "grad_norm": 2.187421377334062, |
| "learning_rate": 7.977202676077799e-08, |
| "loss": 0.4243, |
| "step": 2293 |
| }, |
| { |
| "epoch": 0.8762414056531703, |
| "grad_norm": 3.491067074766581, |
| "learning_rate": 7.928845985087662e-08, |
| "loss": 0.5148, |
| "step": 2294 |
| }, |
| { |
| "epoch": 0.8766233766233766, |
| "grad_norm": 4.314321061348468, |
| "learning_rate": 7.880630254741394e-08, |
| "loss": 0.5677, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.8770053475935828, |
| "grad_norm": 2.5728758575647257, |
| "learning_rate": 7.832555558857135e-08, |
| "loss": 0.4648, |
| "step": 2296 |
| }, |
| { |
| "epoch": 0.8773873185637892, |
| "grad_norm": 17.279851999637817, |
| "learning_rate": 7.784621971037108e-08, |
| "loss": 0.5449, |
| "step": 2297 |
| }, |
| { |
| "epoch": 0.8777692895339955, |
| "grad_norm": 10.228330033165268, |
| "learning_rate": 7.736829564667447e-08, |
| "loss": 0.4463, |
| "step": 2298 |
| }, |
| { |
| "epoch": 0.8781512605042017, |
| "grad_norm": 4.060117251246413, |
| "learning_rate": 7.689178412918218e-08, |
| "loss": 0.4704, |
| "step": 2299 |
| }, |
| { |
| "epoch": 0.878533231474408, |
| "grad_norm": 5.799277070801896, |
| "learning_rate": 7.641668588743133e-08, |
| "loss": 0.4838, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.8789152024446142, |
| "grad_norm": 3.719374887698917, |
| "learning_rate": 7.594300164879619e-08, |
| "loss": 0.5071, |
| "step": 2301 |
| }, |
| { |
| "epoch": 0.8792971734148205, |
| "grad_norm": 4.264362033842296, |
| "learning_rate": 7.547073213848577e-08, |
| "loss": 0.4403, |
| "step": 2302 |
| }, |
| { |
| "epoch": 0.8796791443850267, |
| "grad_norm": 3.788827425478075, |
| "learning_rate": 7.4999878079543e-08, |
| "loss": 0.5339, |
| "step": 2303 |
| }, |
| { |
| "epoch": 0.880061115355233, |
| "grad_norm": 2.5289529401638777, |
| "learning_rate": 7.453044019284405e-08, |
| "loss": 0.4423, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.8804430863254392, |
| "grad_norm": 7.002007214800676, |
| "learning_rate": 7.40624191970971e-08, |
| "loss": 0.4629, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.8808250572956455, |
| "grad_norm": 5.266495507291039, |
| "learning_rate": 7.359581580884033e-08, |
| "loss": 0.5676, |
| "step": 2306 |
| }, |
| { |
| "epoch": 0.8812070282658518, |
| "grad_norm": 3.8260975120477334, |
| "learning_rate": 7.31306307424423e-08, |
| "loss": 0.5122, |
| "step": 2307 |
| }, |
| { |
| "epoch": 0.881588999236058, |
| "grad_norm": 2.790345430405287, |
| "learning_rate": 7.266686471009997e-08, |
| "loss": 0.4735, |
| "step": 2308 |
| }, |
| { |
| "epoch": 0.8819709702062644, |
| "grad_norm": 3.684760773280003, |
| "learning_rate": 7.220451842183739e-08, |
| "loss": 0.5038, |
| "step": 2309 |
| }, |
| { |
| "epoch": 0.8823529411764706, |
| "grad_norm": 4.563314359100101, |
| "learning_rate": 7.174359258550556e-08, |
| "loss": 0.5461, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.8827349121466769, |
| "grad_norm": 2.5504725475804335, |
| "learning_rate": 7.12840879067802e-08, |
| "loss": 0.4883, |
| "step": 2311 |
| }, |
| { |
| "epoch": 0.8831168831168831, |
| "grad_norm": 5.159420179728155, |
| "learning_rate": 7.082600508916159e-08, |
| "loss": 0.5941, |
| "step": 2312 |
| }, |
| { |
| "epoch": 0.8834988540870894, |
| "grad_norm": 11.219695075561999, |
| "learning_rate": 7.036934483397317e-08, |
| "loss": 0.476, |
| "step": 2313 |
| }, |
| { |
| "epoch": 0.8838808250572956, |
| "grad_norm": 2.978056435882148, |
| "learning_rate": 6.991410784036022e-08, |
| "loss": 0.477, |
| "step": 2314 |
| }, |
| { |
| "epoch": 0.8842627960275019, |
| "grad_norm": 3.7975254482400365, |
| "learning_rate": 6.946029480528903e-08, |
| "loss": 0.4614, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.8846447669977082, |
| "grad_norm": 3.4134180584954406, |
| "learning_rate": 6.900790642354637e-08, |
| "loss": 0.4966, |
| "step": 2316 |
| }, |
| { |
| "epoch": 0.8850267379679144, |
| "grad_norm": 4.643651136566239, |
| "learning_rate": 6.855694338773688e-08, |
| "loss": 0.5225, |
| "step": 2317 |
| }, |
| { |
| "epoch": 0.8854087089381207, |
| "grad_norm": 9.136809850536642, |
| "learning_rate": 6.810740638828383e-08, |
| "loss": 0.4469, |
| "step": 2318 |
| }, |
| { |
| "epoch": 0.8857906799083269, |
| "grad_norm": 3.1500664562294083, |
| "learning_rate": 6.76592961134268e-08, |
| "loss": 0.5334, |
| "step": 2319 |
| }, |
| { |
| "epoch": 0.8861726508785333, |
| "grad_norm": 2.409148369848989, |
| "learning_rate": 6.721261324922112e-08, |
| "loss": 0.3789, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.8865546218487395, |
| "grad_norm": 2.8271574701763527, |
| "learning_rate": 6.67673584795373e-08, |
| "loss": 0.5126, |
| "step": 2321 |
| }, |
| { |
| "epoch": 0.8869365928189458, |
| "grad_norm": 2.8112882228861777, |
| "learning_rate": 6.632353248605837e-08, |
| "loss": 0.5374, |
| "step": 2322 |
| }, |
| { |
| "epoch": 0.887318563789152, |
| "grad_norm": 2.1807020495214937, |
| "learning_rate": 6.588113594828093e-08, |
| "loss": 0.4594, |
| "step": 2323 |
| }, |
| { |
| "epoch": 0.8877005347593583, |
| "grad_norm": 4.0306723914894675, |
| "learning_rate": 6.544016954351239e-08, |
| "loss": 0.431, |
| "step": 2324 |
| }, |
| { |
| "epoch": 0.8880825057295646, |
| "grad_norm": 3.8868049027748692, |
| "learning_rate": 6.500063394687106e-08, |
| "loss": 0.4137, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.8884644766997708, |
| "grad_norm": 6.682039265354064, |
| "learning_rate": 6.456252983128474e-08, |
| "loss": 0.4696, |
| "step": 2326 |
| }, |
| { |
| "epoch": 0.8888464476699771, |
| "grad_norm": 3.4499785260092697, |
| "learning_rate": 6.412585786748903e-08, |
| "loss": 0.5297, |
| "step": 2327 |
| }, |
| { |
| "epoch": 0.8892284186401833, |
| "grad_norm": 2.429085558496027, |
| "learning_rate": 6.369061872402759e-08, |
| "loss": 0.513, |
| "step": 2328 |
| }, |
| { |
| "epoch": 0.8896103896103896, |
| "grad_norm": 3.9376933833626175, |
| "learning_rate": 6.325681306725005e-08, |
| "loss": 0.4788, |
| "step": 2329 |
| }, |
| { |
| "epoch": 0.8899923605805958, |
| "grad_norm": 3.2597699671375877, |
| "learning_rate": 6.282444156131151e-08, |
| "loss": 0.4658, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.8903743315508021, |
| "grad_norm": 4.26811354289429, |
| "learning_rate": 6.239350486817152e-08, |
| "loss": 0.5007, |
| "step": 2331 |
| }, |
| { |
| "epoch": 0.8907563025210085, |
| "grad_norm": 4.2206368649504755, |
| "learning_rate": 6.196400364759247e-08, |
| "loss": 0.4902, |
| "step": 2332 |
| }, |
| { |
| "epoch": 0.8911382734912147, |
| "grad_norm": 2.4907328780717592, |
| "learning_rate": 6.153593855713968e-08, |
| "loss": 0.4511, |
| "step": 2333 |
| }, |
| { |
| "epoch": 0.891520244461421, |
| "grad_norm": 4.959565850081995, |
| "learning_rate": 6.110931025217925e-08, |
| "loss": 0.5045, |
| "step": 2334 |
| }, |
| { |
| "epoch": 0.8919022154316272, |
| "grad_norm": 3.7572745391689524, |
| "learning_rate": 6.068411938587781e-08, |
| "loss": 0.5677, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.8922841864018335, |
| "grad_norm": 2.379380511225663, |
| "learning_rate": 6.02603666092013e-08, |
| "loss": 0.4285, |
| "step": 2336 |
| }, |
| { |
| "epoch": 0.8926661573720397, |
| "grad_norm": 3.754647796988103, |
| "learning_rate": 5.983805257091368e-08, |
| "loss": 0.5302, |
| "step": 2337 |
| }, |
| { |
| "epoch": 0.893048128342246, |
| "grad_norm": 2.848286049389822, |
| "learning_rate": 5.941717791757672e-08, |
| "loss": 0.4725, |
| "step": 2338 |
| }, |
| { |
| "epoch": 0.8934300993124522, |
| "grad_norm": 3.033769690010149, |
| "learning_rate": 5.899774329354779e-08, |
| "loss": 0.5222, |
| "step": 2339 |
| }, |
| { |
| "epoch": 0.8938120702826585, |
| "grad_norm": 2.822997726176448, |
| "learning_rate": 5.857974934098009e-08, |
| "loss": 0.4834, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.8941940412528648, |
| "grad_norm": 3.6218212785099846, |
| "learning_rate": 5.816319669982128e-08, |
| "loss": 0.4322, |
| "step": 2341 |
| }, |
| { |
| "epoch": 0.894576012223071, |
| "grad_norm": 4.100612733001243, |
| "learning_rate": 5.774808600781189e-08, |
| "loss": 0.4262, |
| "step": 2342 |
| }, |
| { |
| "epoch": 0.8949579831932774, |
| "grad_norm": 2.564049036298038, |
| "learning_rate": 5.733441790048521e-08, |
| "loss": 0.4038, |
| "step": 2343 |
| }, |
| { |
| "epoch": 0.8953399541634836, |
| "grad_norm": 3.9446403466513287, |
| "learning_rate": 5.692219301116552e-08, |
| "loss": 0.4825, |
| "step": 2344 |
| }, |
| { |
| "epoch": 0.8957219251336899, |
| "grad_norm": 6.785105325773704, |
| "learning_rate": 5.651141197096798e-08, |
| "loss": 0.5957, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.8961038961038961, |
| "grad_norm": 2.694455153342699, |
| "learning_rate": 5.61020754087973e-08, |
| "loss": 0.476, |
| "step": 2346 |
| }, |
| { |
| "epoch": 0.8964858670741024, |
| "grad_norm": 3.815755349635915, |
| "learning_rate": 5.5694183951346065e-08, |
| "loss": 0.4931, |
| "step": 2347 |
| }, |
| { |
| "epoch": 0.8968678380443086, |
| "grad_norm": 3.8630788349567897, |
| "learning_rate": 5.528773822309496e-08, |
| "loss": 0.5034, |
| "step": 2348 |
| }, |
| { |
| "epoch": 0.8972498090145149, |
| "grad_norm": 2.5044630949597626, |
| "learning_rate": 5.488273884631123e-08, |
| "loss": 0.5415, |
| "step": 2349 |
| }, |
| { |
| "epoch": 0.8976317799847212, |
| "grad_norm": 2.867664534297458, |
| "learning_rate": 5.447918644104743e-08, |
| "loss": 0.4707, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.8980137509549274, |
| "grad_norm": 8.48950132856148, |
| "learning_rate": 5.407708162514113e-08, |
| "loss": 0.498, |
| "step": 2351 |
| }, |
| { |
| "epoch": 0.8983957219251337, |
| "grad_norm": 2.34402164869529, |
| "learning_rate": 5.3676425014213435e-08, |
| "loss": 0.4552, |
| "step": 2352 |
| }, |
| { |
| "epoch": 0.8987776928953399, |
| "grad_norm": 4.1371745573301615, |
| "learning_rate": 5.327721722166834e-08, |
| "loss": 0.5665, |
| "step": 2353 |
| }, |
| { |
| "epoch": 0.8991596638655462, |
| "grad_norm": 6.504312039213924, |
| "learning_rate": 5.287945885869194e-08, |
| "loss": 0.4734, |
| "step": 2354 |
| }, |
| { |
| "epoch": 0.8995416348357524, |
| "grad_norm": 2.9021662899236684, |
| "learning_rate": 5.248315053425056e-08, |
| "loss": 0.4604, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.8999236058059588, |
| "grad_norm": 3.403797598625929, |
| "learning_rate": 5.208829285509142e-08, |
| "loss": 0.5411, |
| "step": 2356 |
| }, |
| { |
| "epoch": 0.900305576776165, |
| "grad_norm": 4.783477891165306, |
| "learning_rate": 5.169488642574027e-08, |
| "loss": 0.4629, |
| "step": 2357 |
| }, |
| { |
| "epoch": 0.9006875477463713, |
| "grad_norm": 15.68911867533447, |
| "learning_rate": 5.130293184850099e-08, |
| "loss": 0.5054, |
| "step": 2358 |
| }, |
| { |
| "epoch": 0.9010695187165776, |
| "grad_norm": 2.5332595127468616, |
| "learning_rate": 5.091242972345478e-08, |
| "loss": 0.5008, |
| "step": 2359 |
| }, |
| { |
| "epoch": 0.9014514896867838, |
| "grad_norm": 8.588767382940686, |
| "learning_rate": 5.052338064845929e-08, |
| "loss": 0.4966, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.9018334606569901, |
| "grad_norm": 4.865329868153571, |
| "learning_rate": 5.0135785219147296e-08, |
| "loss": 0.466, |
| "step": 2361 |
| }, |
| { |
| "epoch": 0.9022154316271963, |
| "grad_norm": 3.3066273732746083, |
| "learning_rate": 4.974964402892634e-08, |
| "loss": 0.4862, |
| "step": 2362 |
| }, |
| { |
| "epoch": 0.9025974025974026, |
| "grad_norm": 3.3019950317215727, |
| "learning_rate": 4.936495766897708e-08, |
| "loss": 0.4352, |
| "step": 2363 |
| }, |
| { |
| "epoch": 0.9029793735676088, |
| "grad_norm": 4.445642650292908, |
| "learning_rate": 4.898172672825318e-08, |
| "loss": 0.4258, |
| "step": 2364 |
| }, |
| { |
| "epoch": 0.9033613445378151, |
| "grad_norm": 2.6308787434531125, |
| "learning_rate": 4.859995179348031e-08, |
| "loss": 0.4671, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.9037433155080213, |
| "grad_norm": 4.461380943240074, |
| "learning_rate": 4.821963344915425e-08, |
| "loss": 0.45, |
| "step": 2366 |
| }, |
| { |
| "epoch": 0.9041252864782277, |
| "grad_norm": 2.569926207230875, |
| "learning_rate": 4.7840772277541355e-08, |
| "loss": 0.5161, |
| "step": 2367 |
| }, |
| { |
| "epoch": 0.904507257448434, |
| "grad_norm": 2.310180671077372, |
| "learning_rate": 4.746336885867708e-08, |
| "loss": 0.413, |
| "step": 2368 |
| }, |
| { |
| "epoch": 0.9048892284186402, |
| "grad_norm": 8.008831838428597, |
| "learning_rate": 4.708742377036445e-08, |
| "loss": 0.4887, |
| "step": 2369 |
| }, |
| { |
| "epoch": 0.9052711993888465, |
| "grad_norm": 2.417115548070838, |
| "learning_rate": 4.6712937588174516e-08, |
| "loss": 0.4353, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.9056531703590527, |
| "grad_norm": 4.394918894147626, |
| "learning_rate": 4.633991088544431e-08, |
| "loss": 0.4839, |
| "step": 2371 |
| }, |
| { |
| "epoch": 0.906035141329259, |
| "grad_norm": 8.02100161280262, |
| "learning_rate": 4.5968344233276555e-08, |
| "loss": 0.4704, |
| "step": 2372 |
| }, |
| { |
| "epoch": 0.9064171122994652, |
| "grad_norm": 3.9031007453137305, |
| "learning_rate": 4.5598238200538656e-08, |
| "loss": 0.4826, |
| "step": 2373 |
| }, |
| { |
| "epoch": 0.9067990832696715, |
| "grad_norm": 2.4618887290175016, |
| "learning_rate": 4.522959335386156e-08, |
| "loss": 0.4246, |
| "step": 2374 |
| }, |
| { |
| "epoch": 0.9071810542398777, |
| "grad_norm": 3.934186892254672, |
| "learning_rate": 4.4862410257639596e-08, |
| "loss": 0.4209, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.907563025210084, |
| "grad_norm": 2.1542872213974675, |
| "learning_rate": 4.449668947402896e-08, |
| "loss": 0.3929, |
| "step": 2376 |
| }, |
| { |
| "epoch": 0.9079449961802903, |
| "grad_norm": 18.797820507369316, |
| "learning_rate": 4.413243156294666e-08, |
| "loss": 0.4449, |
| "step": 2377 |
| }, |
| { |
| "epoch": 0.9083269671504965, |
| "grad_norm": 2.8407513122567387, |
| "learning_rate": 4.376963708207071e-08, |
| "loss": 0.4631, |
| "step": 2378 |
| }, |
| { |
| "epoch": 0.9087089381207029, |
| "grad_norm": 4.291609007601337, |
| "learning_rate": 4.340830658683825e-08, |
| "loss": 0.52, |
| "step": 2379 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 4.264413087780691, |
| "learning_rate": 4.304844063044499e-08, |
| "loss": 0.577, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.9094728800611154, |
| "grad_norm": 3.5747033080813173, |
| "learning_rate": 4.2690039763844667e-08, |
| "loss": 0.5238, |
| "step": 2381 |
| }, |
| { |
| "epoch": 0.9098548510313216, |
| "grad_norm": 3.5116154000371353, |
| "learning_rate": 4.233310453574801e-08, |
| "loss": 0.5185, |
| "step": 2382 |
| }, |
| { |
| "epoch": 0.9102368220015279, |
| "grad_norm": 3.3241750674383623, |
| "learning_rate": 4.197763549262146e-08, |
| "loss": 0.5087, |
| "step": 2383 |
| }, |
| { |
| "epoch": 0.9106187929717342, |
| "grad_norm": 3.5558252293960857, |
| "learning_rate": 4.1623633178687114e-08, |
| "loss": 0.5488, |
| "step": 2384 |
| }, |
| { |
| "epoch": 0.9110007639419404, |
| "grad_norm": 3.4242856695089157, |
| "learning_rate": 4.1271098135921336e-08, |
| "loss": 0.5749, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.9113827349121467, |
| "grad_norm": 2.8263395122975896, |
| "learning_rate": 4.092003090405416e-08, |
| "loss": 0.428, |
| "step": 2386 |
| }, |
| { |
| "epoch": 0.9117647058823529, |
| "grad_norm": 4.024724825063178, |
| "learning_rate": 4.0570432020568644e-08, |
| "loss": 0.4923, |
| "step": 2387 |
| }, |
| { |
| "epoch": 0.9121466768525592, |
| "grad_norm": 2.4570053679729487, |
| "learning_rate": 4.0222302020699094e-08, |
| "loss": 0.4676, |
| "step": 2388 |
| }, |
| { |
| "epoch": 0.9125286478227654, |
| "grad_norm": 4.360719142970035, |
| "learning_rate": 3.987564143743172e-08, |
| "loss": 0.5316, |
| "step": 2389 |
| }, |
| { |
| "epoch": 0.9129106187929718, |
| "grad_norm": 3.028029807023603, |
| "learning_rate": 3.953045080150297e-08, |
| "loss": 0.4971, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.913292589763178, |
| "grad_norm": 2.3050412394820627, |
| "learning_rate": 3.9186730641398215e-08, |
| "loss": 0.4644, |
| "step": 2391 |
| }, |
| { |
| "epoch": 0.9136745607333843, |
| "grad_norm": 2.8659318912563894, |
| "learning_rate": 3.8844481483352064e-08, |
| "loss": 0.4844, |
| "step": 2392 |
| }, |
| { |
| "epoch": 0.9140565317035906, |
| "grad_norm": 6.253440099952343, |
| "learning_rate": 3.8503703851347045e-08, |
| "loss": 0.5149, |
| "step": 2393 |
| }, |
| { |
| "epoch": 0.9144385026737968, |
| "grad_norm": 5.154526787048037, |
| "learning_rate": 3.8164398267112374e-08, |
| "loss": 0.5132, |
| "step": 2394 |
| }, |
| { |
| "epoch": 0.9148204736440031, |
| "grad_norm": 2.1481475788873787, |
| "learning_rate": 3.782656525012407e-08, |
| "loss": 0.4361, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.9152024446142093, |
| "grad_norm": 2.763346783105214, |
| "learning_rate": 3.7490205317603166e-08, |
| "loss": 0.5156, |
| "step": 2396 |
| }, |
| { |
| "epoch": 0.9155844155844156, |
| "grad_norm": 2.8926375420505344, |
| "learning_rate": 3.715531898451574e-08, |
| "loss": 0.4157, |
| "step": 2397 |
| }, |
| { |
| "epoch": 0.9159663865546218, |
| "grad_norm": 3.4496840499009234, |
| "learning_rate": 3.6821906763572e-08, |
| "loss": 0.4903, |
| "step": 2398 |
| }, |
| { |
| "epoch": 0.9163483575248281, |
| "grad_norm": 3.4267602997255486, |
| "learning_rate": 3.648996916522451e-08, |
| "loss": 0.4465, |
| "step": 2399 |
| }, |
| { |
| "epoch": 0.9167303284950343, |
| "grad_norm": 5.860122366156432, |
| "learning_rate": 3.6159506697668873e-08, |
| "loss": 0.5308, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.9171122994652406, |
| "grad_norm": 5.488387961195745, |
| "learning_rate": 3.5830519866842157e-08, |
| "loss": 0.5484, |
| "step": 2401 |
| }, |
| { |
| "epoch": 0.917494270435447, |
| "grad_norm": 6.794836685398453, |
| "learning_rate": 3.550300917642213e-08, |
| "loss": 0.4806, |
| "step": 2402 |
| }, |
| { |
| "epoch": 0.9178762414056532, |
| "grad_norm": 2.6093841180604693, |
| "learning_rate": 3.517697512782658e-08, |
| "loss": 0.4599, |
| "step": 2403 |
| }, |
| { |
| "epoch": 0.9182582123758595, |
| "grad_norm": 3.4504804735195282, |
| "learning_rate": 3.4852418220212566e-08, |
| "loss": 0.5402, |
| "step": 2404 |
| }, |
| { |
| "epoch": 0.9186401833460657, |
| "grad_norm": 4.185650752063594, |
| "learning_rate": 3.4529338950475714e-08, |
| "loss": 0.4815, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.919022154316272, |
| "grad_norm": 3.9855108336709373, |
| "learning_rate": 3.420773781324937e-08, |
| "loss": 0.4591, |
| "step": 2406 |
| }, |
| { |
| "epoch": 0.9194041252864782, |
| "grad_norm": 4.145701142888078, |
| "learning_rate": 3.388761530090378e-08, |
| "loss": 0.5677, |
| "step": 2407 |
| }, |
| { |
| "epoch": 0.9197860962566845, |
| "grad_norm": 6.238351276020026, |
| "learning_rate": 3.356897190354557e-08, |
| "loss": 0.488, |
| "step": 2408 |
| }, |
| { |
| "epoch": 0.9201680672268907, |
| "grad_norm": 4.5878782856218, |
| "learning_rate": 3.3251808109016956e-08, |
| "loss": 0.599, |
| "step": 2409 |
| }, |
| { |
| "epoch": 0.920550038197097, |
| "grad_norm": 2.9796613949389337, |
| "learning_rate": 3.293612440289428e-08, |
| "loss": 0.468, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.9209320091673033, |
| "grad_norm": 38.85951380119334, |
| "learning_rate": 3.2621921268488504e-08, |
| "loss": 0.5484, |
| "step": 2411 |
| }, |
| { |
| "epoch": 0.9213139801375095, |
| "grad_norm": 2.4367627848014224, |
| "learning_rate": 3.230919918684372e-08, |
| "loss": 0.4052, |
| "step": 2412 |
| }, |
| { |
| "epoch": 0.9216959511077158, |
| "grad_norm": 4.128488958335345, |
| "learning_rate": 3.1997958636736265e-08, |
| "loss": 0.5357, |
| "step": 2413 |
| }, |
| { |
| "epoch": 0.922077922077922, |
| "grad_norm": 3.62015265467357, |
| "learning_rate": 3.1688200094674656e-08, |
| "loss": 0.5516, |
| "step": 2414 |
| }, |
| { |
| "epoch": 0.9224598930481284, |
| "grad_norm": 5.609850730563149, |
| "learning_rate": 3.1379924034897866e-08, |
| "loss": 0.4584, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.9228418640183346, |
| "grad_norm": 4.925225282573639, |
| "learning_rate": 3.107313092937591e-08, |
| "loss": 0.4784, |
| "step": 2416 |
| }, |
| { |
| "epoch": 0.9232238349885409, |
| "grad_norm": 3.614076871714596, |
| "learning_rate": 3.0767821247807966e-08, |
| "loss": 0.5702, |
| "step": 2417 |
| }, |
| { |
| "epoch": 0.9236058059587471, |
| "grad_norm": 2.3233578959478702, |
| "learning_rate": 3.0463995457622125e-08, |
| "loss": 0.4334, |
| "step": 2418 |
| }, |
| { |
| "epoch": 0.9239877769289534, |
| "grad_norm": 2.735136462427685, |
| "learning_rate": 3.016165402397475e-08, |
| "loss": 0.4939, |
| "step": 2419 |
| }, |
| { |
| "epoch": 0.9243697478991597, |
| "grad_norm": 14.909691725186427, |
| "learning_rate": 2.986079740974967e-08, |
| "loss": 0.5128, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.9247517188693659, |
| "grad_norm": 3.937721610666776, |
| "learning_rate": 2.9561426075557115e-08, |
| "loss": 0.5306, |
| "step": 2421 |
| }, |
| { |
| "epoch": 0.9251336898395722, |
| "grad_norm": 3.9909072151582774, |
| "learning_rate": 2.9263540479734006e-08, |
| "loss": 0.5413, |
| "step": 2422 |
| }, |
| { |
| "epoch": 0.9255156608097784, |
| "grad_norm": 2.098663773977231, |
| "learning_rate": 2.8967141078342196e-08, |
| "loss": 0.4502, |
| "step": 2423 |
| }, |
| { |
| "epoch": 0.9258976317799847, |
| "grad_norm": 2.7573703745421296, |
| "learning_rate": 2.8672228325167912e-08, |
| "loss": 0.4884, |
| "step": 2424 |
| }, |
| { |
| "epoch": 0.926279602750191, |
| "grad_norm": 6.424686498232581, |
| "learning_rate": 2.8378802671722103e-08, |
| "loss": 0.5137, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.9266615737203973, |
| "grad_norm": 7.560061974499517, |
| "learning_rate": 2.8086864567238078e-08, |
| "loss": 0.4843, |
| "step": 2426 |
| }, |
| { |
| "epoch": 0.9270435446906035, |
| "grad_norm": 6.3387544092100185, |
| "learning_rate": 2.7796414458672314e-08, |
| "loss": 0.5097, |
| "step": 2427 |
| }, |
| { |
| "epoch": 0.9274255156608098, |
| "grad_norm": 3.6039281338462255, |
| "learning_rate": 2.7507452790703213e-08, |
| "loss": 0.4869, |
| "step": 2428 |
| }, |
| { |
| "epoch": 0.9278074866310161, |
| "grad_norm": 5.806052774033399, |
| "learning_rate": 2.7219980005729783e-08, |
| "loss": 0.4672, |
| "step": 2429 |
| }, |
| { |
| "epoch": 0.9281894576012223, |
| "grad_norm": 7.564785636649657, |
| "learning_rate": 2.6933996543872183e-08, |
| "loss": 0.4386, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.9285714285714286, |
| "grad_norm": 2.9170232799721747, |
| "learning_rate": 2.6649502842970296e-08, |
| "loss": 0.4851, |
| "step": 2431 |
| }, |
| { |
| "epoch": 0.9289533995416348, |
| "grad_norm": 9.392568327180424, |
| "learning_rate": 2.6366499338582815e-08, |
| "loss": 0.5296, |
| "step": 2432 |
| }, |
| { |
| "epoch": 0.9293353705118411, |
| "grad_norm": 9.70506303380789, |
| "learning_rate": 2.6084986463987268e-08, |
| "loss": 0.4244, |
| "step": 2433 |
| }, |
| { |
| "epoch": 0.9297173414820473, |
| "grad_norm": 13.748575755941072, |
| "learning_rate": 2.5804964650179006e-08, |
| "loss": 0.5472, |
| "step": 2434 |
| }, |
| { |
| "epoch": 0.9300993124522536, |
| "grad_norm": 6.1287489440569605, |
| "learning_rate": 2.5526434325870538e-08, |
| "loss": 0.5373, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.93048128342246, |
| "grad_norm": 2.167781902789066, |
| "learning_rate": 2.524939591749109e-08, |
| "loss": 0.4751, |
| "step": 2436 |
| }, |
| { |
| "epoch": 0.9308632543926662, |
| "grad_norm": 7.266627876871788, |
| "learning_rate": 2.497384984918516e-08, |
| "loss": 0.5423, |
| "step": 2437 |
| }, |
| { |
| "epoch": 0.9312452253628725, |
| "grad_norm": 3.263820326616446, |
| "learning_rate": 2.469979654281329e-08, |
| "loss": 0.4727, |
| "step": 2438 |
| }, |
| { |
| "epoch": 0.9316271963330787, |
| "grad_norm": 3.5820381946347752, |
| "learning_rate": 2.4427236417949972e-08, |
| "loss": 0.5531, |
| "step": 2439 |
| }, |
| { |
| "epoch": 0.932009167303285, |
| "grad_norm": 8.255211815573727, |
| "learning_rate": 2.4156169891884072e-08, |
| "loss": 0.4857, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.9323911382734912, |
| "grad_norm": 3.131970430778959, |
| "learning_rate": 2.3886597379617513e-08, |
| "loss": 0.45, |
| "step": 2441 |
| }, |
| { |
| "epoch": 0.9327731092436975, |
| "grad_norm": 5.276546664852998, |
| "learning_rate": 2.3618519293864823e-08, |
| "loss": 0.4804, |
| "step": 2442 |
| }, |
| { |
| "epoch": 0.9331550802139037, |
| "grad_norm": 7.611226617433435, |
| "learning_rate": 2.3351936045052924e-08, |
| "loss": 0.4964, |
| "step": 2443 |
| }, |
| { |
| "epoch": 0.93353705118411, |
| "grad_norm": 2.066917669950662, |
| "learning_rate": 2.308684804131966e-08, |
| "loss": 0.4592, |
| "step": 2444 |
| }, |
| { |
| "epoch": 0.9339190221543163, |
| "grad_norm": 69.162277438671, |
| "learning_rate": 2.2823255688514176e-08, |
| "loss": 0.4307, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.9343009931245225, |
| "grad_norm": 4.138543166981627, |
| "learning_rate": 2.256115939019543e-08, |
| "loss": 0.5291, |
| "step": 2446 |
| }, |
| { |
| "epoch": 0.9346829640947288, |
| "grad_norm": 2.53167006255338, |
| "learning_rate": 2.230055954763199e-08, |
| "loss": 0.4888, |
| "step": 2447 |
| }, |
| { |
| "epoch": 0.935064935064935, |
| "grad_norm": 4.663611449231072, |
| "learning_rate": 2.2041456559801496e-08, |
| "loss": 0.552, |
| "step": 2448 |
| }, |
| { |
| "epoch": 0.9354469060351414, |
| "grad_norm": 2.829414766827518, |
| "learning_rate": 2.1783850823389515e-08, |
| "loss": 0.5144, |
| "step": 2449 |
| }, |
| { |
| "epoch": 0.9358288770053476, |
| "grad_norm": 2.5354145568379356, |
| "learning_rate": 2.15277427327899e-08, |
| "loss": 0.5085, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.9362108479755539, |
| "grad_norm": 2.3139431168002016, |
| "learning_rate": 2.1273132680103334e-08, |
| "loss": 0.4271, |
| "step": 2451 |
| }, |
| { |
| "epoch": 0.9365928189457601, |
| "grad_norm": 3.6244433465779653, |
| "learning_rate": 2.102002105513678e-08, |
| "loss": 0.5293, |
| "step": 2452 |
| }, |
| { |
| "epoch": 0.9369747899159664, |
| "grad_norm": 2.5781479249341968, |
| "learning_rate": 2.076840824540349e-08, |
| "loss": 0.4442, |
| "step": 2453 |
| }, |
| { |
| "epoch": 0.9373567608861727, |
| "grad_norm": 5.273646909202582, |
| "learning_rate": 2.0518294636121757e-08, |
| "loss": 0.5527, |
| "step": 2454 |
| }, |
| { |
| "epoch": 0.9377387318563789, |
| "grad_norm": 5.390575416829436, |
| "learning_rate": 2.0269680610214833e-08, |
| "loss": 0.4569, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.9381207028265852, |
| "grad_norm": 2.6922649557728886, |
| "learning_rate": 2.0022566548310027e-08, |
| "loss": 0.4581, |
| "step": 2456 |
| }, |
| { |
| "epoch": 0.9385026737967914, |
| "grad_norm": 6.359944613369677, |
| "learning_rate": 1.9776952828737926e-08, |
| "loss": 0.4757, |
| "step": 2457 |
| }, |
| { |
| "epoch": 0.9388846447669977, |
| "grad_norm": 4.587885863519702, |
| "learning_rate": 1.9532839827532732e-08, |
| "loss": 0.485, |
| "step": 2458 |
| }, |
| { |
| "epoch": 0.9392666157372039, |
| "grad_norm": 9.028109372763168, |
| "learning_rate": 1.929022791843038e-08, |
| "loss": 0.5032, |
| "step": 2459 |
| }, |
| { |
| "epoch": 0.9396485867074102, |
| "grad_norm": 4.07780994271552, |
| "learning_rate": 1.904911747286908e-08, |
| "loss": 0.4723, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.9400305576776165, |
| "grad_norm": 7.248527924705579, |
| "learning_rate": 1.880950885998811e-08, |
| "loss": 0.5223, |
| "step": 2461 |
| }, |
| { |
| "epoch": 0.9404125286478228, |
| "grad_norm": 21.084309021441634, |
| "learning_rate": 1.857140244662747e-08, |
| "loss": 0.4618, |
| "step": 2462 |
| }, |
| { |
| "epoch": 0.9407944996180291, |
| "grad_norm": 9.320765763275892, |
| "learning_rate": 1.833479859732734e-08, |
| "loss": 0.4479, |
| "step": 2463 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "grad_norm": 2.95765530893394, |
| "learning_rate": 1.8099697674327508e-08, |
| "loss": 0.5336, |
| "step": 2464 |
| }, |
| { |
| "epoch": 0.9415584415584416, |
| "grad_norm": 5.30798966055467, |
| "learning_rate": 1.786610003756661e-08, |
| "loss": 0.473, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.9419404125286478, |
| "grad_norm": 5.338861028871045, |
| "learning_rate": 1.7634006044682126e-08, |
| "loss": 0.5285, |
| "step": 2466 |
| }, |
| { |
| "epoch": 0.9423223834988541, |
| "grad_norm": 5.805591085748861, |
| "learning_rate": 1.7403416051008924e-08, |
| "loss": 0.5057, |
| "step": 2467 |
| }, |
| { |
| "epoch": 0.9427043544690603, |
| "grad_norm": 2.9144284769885775, |
| "learning_rate": 1.717433040957972e-08, |
| "loss": 0.4936, |
| "step": 2468 |
| }, |
| { |
| "epoch": 0.9430863254392666, |
| "grad_norm": 24.451455797558253, |
| "learning_rate": 1.6946749471123956e-08, |
| "loss": 0.4861, |
| "step": 2469 |
| }, |
| { |
| "epoch": 0.9434682964094728, |
| "grad_norm": 12.971234620764871, |
| "learning_rate": 1.6720673584067148e-08, |
| "loss": 0.4059, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.9438502673796791, |
| "grad_norm": 18.01743851371368, |
| "learning_rate": 1.6496103094530757e-08, |
| "loss": 0.5127, |
| "step": 2471 |
| }, |
| { |
| "epoch": 0.9442322383498855, |
| "grad_norm": 2.9056555653541305, |
| "learning_rate": 1.627303834633187e-08, |
| "loss": 0.446, |
| "step": 2472 |
| }, |
| { |
| "epoch": 0.9446142093200917, |
| "grad_norm": 2.551777757341228, |
| "learning_rate": 1.6051479680981415e-08, |
| "loss": 0.4613, |
| "step": 2473 |
| }, |
| { |
| "epoch": 0.944996180290298, |
| "grad_norm": 3.003276545289939, |
| "learning_rate": 1.5831427437685173e-08, |
| "loss": 0.5248, |
| "step": 2474 |
| }, |
| { |
| "epoch": 0.9453781512605042, |
| "grad_norm": 5.323459704857999, |
| "learning_rate": 1.561288195334265e-08, |
| "loss": 0.532, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.9457601222307105, |
| "grad_norm": 2.365400516468899, |
| "learning_rate": 1.5395843562545974e-08, |
| "loss": 0.4618, |
| "step": 2476 |
| }, |
| { |
| "epoch": 0.9461420932009167, |
| "grad_norm": 2.755227125455112, |
| "learning_rate": 1.5180312597580458e-08, |
| "loss": 0.3999, |
| "step": 2477 |
| }, |
| { |
| "epoch": 0.946524064171123, |
| "grad_norm": 3.2326623623248794, |
| "learning_rate": 1.4966289388423147e-08, |
| "loss": 0.4357, |
| "step": 2478 |
| }, |
| { |
| "epoch": 0.9469060351413292, |
| "grad_norm": 2.361174127926857, |
| "learning_rate": 1.4753774262743046e-08, |
| "loss": 0.4554, |
| "step": 2479 |
| }, |
| { |
| "epoch": 0.9472880061115355, |
| "grad_norm": 4.378556170360985, |
| "learning_rate": 1.4542767545900003e-08, |
| "loss": 0.4378, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.9476699770817418, |
| "grad_norm": 2.2364556283777164, |
| "learning_rate": 1.4333269560944717e-08, |
| "loss": 0.4661, |
| "step": 2481 |
| }, |
| { |
| "epoch": 0.948051948051948, |
| "grad_norm": 2.563590121932103, |
| "learning_rate": 1.4125280628617952e-08, |
| "loss": 0.4635, |
| "step": 2482 |
| }, |
| { |
| "epoch": 0.9484339190221543, |
| "grad_norm": 3.3298514105520134, |
| "learning_rate": 1.3918801067349995e-08, |
| "loss": 0.4859, |
| "step": 2483 |
| }, |
| { |
| "epoch": 0.9488158899923606, |
| "grad_norm": 5.8126286577642015, |
| "learning_rate": 1.3713831193260528e-08, |
| "loss": 0.5104, |
| "step": 2484 |
| }, |
| { |
| "epoch": 0.9491978609625669, |
| "grad_norm": 2.400524311988587, |
| "learning_rate": 1.3510371320157643e-08, |
| "loss": 0.4765, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.9495798319327731, |
| "grad_norm": 2.1295617001631304, |
| "learning_rate": 1.3308421759537836e-08, |
| "loss": 0.4082, |
| "step": 2486 |
| }, |
| { |
| "epoch": 0.9499618029029794, |
| "grad_norm": 13.243672852391542, |
| "learning_rate": 1.3107982820585228e-08, |
| "loss": 0.4391, |
| "step": 2487 |
| }, |
| { |
| "epoch": 0.9503437738731857, |
| "grad_norm": 3.0429698587661154, |
| "learning_rate": 1.2909054810171239e-08, |
| "loss": 0.4903, |
| "step": 2488 |
| }, |
| { |
| "epoch": 0.9507257448433919, |
| "grad_norm": 4.626317767425915, |
| "learning_rate": 1.2711638032854021e-08, |
| "loss": 0.4923, |
| "step": 2489 |
| }, |
| { |
| "epoch": 0.9511077158135982, |
| "grad_norm": 6.637464390802047, |
| "learning_rate": 1.251573279087792e-08, |
| "loss": 0.4712, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.9514896867838044, |
| "grad_norm": 3.5719726736167736, |
| "learning_rate": 1.2321339384173345e-08, |
| "loss": 0.5081, |
| "step": 2491 |
| }, |
| { |
| "epoch": 0.9518716577540107, |
| "grad_norm": 4.561259347019035, |
| "learning_rate": 1.21284581103559e-08, |
| "loss": 0.5352, |
| "step": 2492 |
| }, |
| { |
| "epoch": 0.9522536287242169, |
| "grad_norm": 2.8215706918666403, |
| "learning_rate": 1.1937089264726253e-08, |
| "loss": 0.519, |
| "step": 2493 |
| }, |
| { |
| "epoch": 0.9526355996944232, |
| "grad_norm": 3.378391343951543, |
| "learning_rate": 1.1747233140269596e-08, |
| "loss": 0.4295, |
| "step": 2494 |
| }, |
| { |
| "epoch": 0.9530175706646294, |
| "grad_norm": 8.946458315475008, |
| "learning_rate": 1.1558890027654866e-08, |
| "loss": 0.4447, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.9533995416348358, |
| "grad_norm": 7.700140610935818, |
| "learning_rate": 1.1372060215234847e-08, |
| "loss": 0.4444, |
| "step": 2496 |
| }, |
| { |
| "epoch": 0.9537815126050421, |
| "grad_norm": 6.912722171712339, |
| "learning_rate": 1.118674398904551e-08, |
| "loss": 0.4639, |
| "step": 2497 |
| }, |
| { |
| "epoch": 0.9541634835752483, |
| "grad_norm": 4.024897980877189, |
| "learning_rate": 1.100294163280513e-08, |
| "loss": 0.492, |
| "step": 2498 |
| }, |
| { |
| "epoch": 0.9545454545454546, |
| "grad_norm": 3.0992206704953342, |
| "learning_rate": 1.0820653427914828e-08, |
| "loss": 0.522, |
| "step": 2499 |
| }, |
| { |
| "epoch": 0.9549274255156608, |
| "grad_norm": 2.855578844605935, |
| "learning_rate": 1.063987965345703e-08, |
| "loss": 0.4911, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.9553093964858671, |
| "grad_norm": 3.906976426151052, |
| "learning_rate": 1.04606205861959e-08, |
| "loss": 0.4569, |
| "step": 2501 |
| }, |
| { |
| "epoch": 0.9556913674560733, |
| "grad_norm": 12.941475800622458, |
| "learning_rate": 1.0282876500576688e-08, |
| "loss": 0.4352, |
| "step": 2502 |
| }, |
| { |
| "epoch": 0.9560733384262796, |
| "grad_norm": 4.644900257875716, |
| "learning_rate": 1.01066476687246e-08, |
| "loss": 0.4578, |
| "step": 2503 |
| }, |
| { |
| "epoch": 0.9564553093964858, |
| "grad_norm": 2.668015599505941, |
| "learning_rate": 9.931934360445814e-09, |
| "loss": 0.4355, |
| "step": 2504 |
| }, |
| { |
| "epoch": 0.9568372803666921, |
| "grad_norm": 2.806844667156648, |
| "learning_rate": 9.758736843225701e-09, |
| "loss": 0.4629, |
| "step": 2505 |
| }, |
| { |
| "epoch": 0.9572192513368984, |
| "grad_norm": 2.660779902021113, |
| "learning_rate": 9.587055382229037e-09, |
| "loss": 0.5203, |
| "step": 2506 |
| }, |
| { |
| "epoch": 0.9576012223071046, |
| "grad_norm": 3.800974420517706, |
| "learning_rate": 9.41689024029968e-09, |
| "loss": 0.4719, |
| "step": 2507 |
| }, |
| { |
| "epoch": 0.957983193277311, |
| "grad_norm": 3.0800978990708643, |
| "learning_rate": 9.248241677960011e-09, |
| "loss": 0.4703, |
| "step": 2508 |
| }, |
| { |
| "epoch": 0.9583651642475172, |
| "grad_norm": 2.978333812158537, |
| "learning_rate": 9.08110995341016e-09, |
| "loss": 0.4924, |
| "step": 2509 |
| }, |
| { |
| "epoch": 0.9587471352177235, |
| "grad_norm": 2.790216095881215, |
| "learning_rate": 8.915495322528555e-09, |
| "loss": 0.4961, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.9591291061879297, |
| "grad_norm": 3.960282013008847, |
| "learning_rate": 8.751398038870484e-09, |
| "loss": 0.5025, |
| "step": 2511 |
| }, |
| { |
| "epoch": 0.959511077158136, |
| "grad_norm": 3.166325201504291, |
| "learning_rate": 8.588818353668537e-09, |
| "loss": 0.453, |
| "step": 2512 |
| }, |
| { |
| "epoch": 0.9598930481283422, |
| "grad_norm": 19.411603373034804, |
| "learning_rate": 8.427756515831497e-09, |
| "loss": 0.4555, |
| "step": 2513 |
| }, |
| { |
| "epoch": 0.9602750190985485, |
| "grad_norm": 2.721687001904413, |
| "learning_rate": 8.26821277194467e-09, |
| "loss": 0.4848, |
| "step": 2514 |
| }, |
| { |
| "epoch": 0.9606569900687548, |
| "grad_norm": 2.762646240823191, |
| "learning_rate": 8.110187366268894e-09, |
| "loss": 0.5042, |
| "step": 2515 |
| }, |
| { |
| "epoch": 0.961038961038961, |
| "grad_norm": 2.817165214716127, |
| "learning_rate": 7.953680540740748e-09, |
| "loss": 0.4613, |
| "step": 2516 |
| }, |
| { |
| "epoch": 0.9614209320091673, |
| "grad_norm": 7.43002937648401, |
| "learning_rate": 7.798692534971451e-09, |
| "loss": 0.4693, |
| "step": 2517 |
| }, |
| { |
| "epoch": 0.9618029029793735, |
| "grad_norm": 3.180631330964851, |
| "learning_rate": 7.645223586247418e-09, |
| "loss": 0.5238, |
| "step": 2518 |
| }, |
| { |
| "epoch": 0.9621848739495799, |
| "grad_norm": 2.856222981196358, |
| "learning_rate": 7.493273929528921e-09, |
| "loss": 0.4452, |
| "step": 2519 |
| }, |
| { |
| "epoch": 0.9625668449197861, |
| "grad_norm": 3.3447643860752265, |
| "learning_rate": 7.3428437974504265e-09, |
| "loss": 0.5306, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.9629488158899924, |
| "grad_norm": 2.3267905770269666, |
| "learning_rate": 7.193933420320042e-09, |
| "loss": 0.4516, |
| "step": 2521 |
| }, |
| { |
| "epoch": 0.9633307868601986, |
| "grad_norm": 2.7302756172093074, |
| "learning_rate": 7.046543026118956e-09, |
| "loss": 0.4803, |
| "step": 2522 |
| }, |
| { |
| "epoch": 0.9637127578304049, |
| "grad_norm": 4.923203717473156, |
| "learning_rate": 6.900672840501554e-09, |
| "loss": 0.6031, |
| "step": 2523 |
| }, |
| { |
| "epoch": 0.9640947288006112, |
| "grad_norm": 3.8328762563622543, |
| "learning_rate": 6.7563230867946354e-09, |
| "loss": 0.4466, |
| "step": 2524 |
| }, |
| { |
| "epoch": 0.9644766997708174, |
| "grad_norm": 4.211498973825368, |
| "learning_rate": 6.613493985997088e-09, |
| "loss": 0.4968, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.9648586707410237, |
| "grad_norm": 5.64224813345612, |
| "learning_rate": 6.47218575677988e-09, |
| "loss": 0.4485, |
| "step": 2526 |
| }, |
| { |
| "epoch": 0.9652406417112299, |
| "grad_norm": 3.6256739132444364, |
| "learning_rate": 6.332398615485512e-09, |
| "loss": 0.4546, |
| "step": 2527 |
| }, |
| { |
| "epoch": 0.9656226126814362, |
| "grad_norm": 3.114141972804922, |
| "learning_rate": 6.194132776127458e-09, |
| "loss": 0.4622, |
| "step": 2528 |
| }, |
| { |
| "epoch": 0.9660045836516424, |
| "grad_norm": 4.335451632477308, |
| "learning_rate": 6.057388450390499e-09, |
| "loss": 0.4512, |
| "step": 2529 |
| }, |
| { |
| "epoch": 0.9663865546218487, |
| "grad_norm": 3.6689875192180774, |
| "learning_rate": 5.922165847629612e-09, |
| "loss": 0.4927, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.966768525592055, |
| "grad_norm": 2.7010211520935528, |
| "learning_rate": 5.788465174870194e-09, |
| "loss": 0.4484, |
| "step": 2531 |
| }, |
| { |
| "epoch": 0.9671504965622613, |
| "grad_norm": 3.6118256042660954, |
| "learning_rate": 5.656286636807728e-09, |
| "loss": 0.4991, |
| "step": 2532 |
| }, |
| { |
| "epoch": 0.9675324675324676, |
| "grad_norm": 4.550237561723368, |
| "learning_rate": 5.525630435806894e-09, |
| "loss": 0.5044, |
| "step": 2533 |
| }, |
| { |
| "epoch": 0.9679144385026738, |
| "grad_norm": 3.809819785626597, |
| "learning_rate": 5.396496771902015e-09, |
| "loss": 0.5116, |
| "step": 2534 |
| }, |
| { |
| "epoch": 0.9682964094728801, |
| "grad_norm": 2.018702332806629, |
| "learning_rate": 5.268885842796389e-09, |
| "loss": 0.4236, |
| "step": 2535 |
| }, |
| { |
| "epoch": 0.9686783804430863, |
| "grad_norm": 3.0965862496642207, |
| "learning_rate": 5.142797843861846e-09, |
| "loss": 0.47, |
| "step": 2536 |
| }, |
| { |
| "epoch": 0.9690603514132926, |
| "grad_norm": 5.721837411629174, |
| "learning_rate": 5.018232968138747e-09, |
| "loss": 0.6053, |
| "step": 2537 |
| }, |
| { |
| "epoch": 0.9694423223834988, |
| "grad_norm": 4.538074720461138, |
| "learning_rate": 4.895191406335542e-09, |
| "loss": 0.4949, |
| "step": 2538 |
| }, |
| { |
| "epoch": 0.9698242933537051, |
| "grad_norm": 3.284799260650926, |
| "learning_rate": 4.773673346828322e-09, |
| "loss": 0.496, |
| "step": 2539 |
| }, |
| { |
| "epoch": 0.9702062643239114, |
| "grad_norm": 2.30954834543498, |
| "learning_rate": 4.653678975661157e-09, |
| "loss": 0.4054, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.9705882352941176, |
| "grad_norm": 5.735682125808379, |
| "learning_rate": 4.535208476544761e-09, |
| "loss": 0.5455, |
| "step": 2541 |
| }, |
| { |
| "epoch": 0.970970206264324, |
| "grad_norm": 3.7993614694320303, |
| "learning_rate": 4.418262030857156e-09, |
| "loss": 0.4997, |
| "step": 2542 |
| }, |
| { |
| "epoch": 0.9713521772345302, |
| "grad_norm": 2.7825335207587245, |
| "learning_rate": 4.302839817643122e-09, |
| "loss": 0.4377, |
| "step": 2543 |
| }, |
| { |
| "epoch": 0.9717341482047365, |
| "grad_norm": 2.7985021160529144, |
| "learning_rate": 4.188942013613417e-09, |
| "loss": 0.5475, |
| "step": 2544 |
| }, |
| { |
| "epoch": 0.9721161191749427, |
| "grad_norm": 6.054199223017417, |
| "learning_rate": 4.076568793145552e-09, |
| "loss": 0.4502, |
| "step": 2545 |
| }, |
| { |
| "epoch": 0.972498090145149, |
| "grad_norm": 5.005797345035652, |
| "learning_rate": 3.9657203282823514e-09, |
| "loss": 0.4847, |
| "step": 2546 |
| }, |
| { |
| "epoch": 0.9728800611153552, |
| "grad_norm": 3.542144976610882, |
| "learning_rate": 3.856396788732508e-09, |
| "loss": 0.4225, |
| "step": 2547 |
| }, |
| { |
| "epoch": 0.9732620320855615, |
| "grad_norm": 3.9331468206663573, |
| "learning_rate": 3.748598341870135e-09, |
| "loss": 0.4686, |
| "step": 2548 |
| }, |
| { |
| "epoch": 0.9736440030557678, |
| "grad_norm": 11.047515167994217, |
| "learning_rate": 3.6423251527341048e-09, |
| "loss": 0.4378, |
| "step": 2549 |
| }, |
| { |
| "epoch": 0.974025974025974, |
| "grad_norm": 5.641008244986063, |
| "learning_rate": 3.5375773840284895e-09, |
| "loss": 0.4458, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.9744079449961803, |
| "grad_norm": 3.1768114309293574, |
| "learning_rate": 3.434355196121674e-09, |
| "loss": 0.4294, |
| "step": 2551 |
| }, |
| { |
| "epoch": 0.9747899159663865, |
| "grad_norm": 3.326041714498732, |
| "learning_rate": 3.3326587470465793e-09, |
| "loss": 0.4936, |
| "step": 2552 |
| }, |
| { |
| "epoch": 0.9751718869365928, |
| "grad_norm": 3.192592176693493, |
| "learning_rate": 3.2324881925001045e-09, |
| "loss": 0.5045, |
| "step": 2553 |
| }, |
| { |
| "epoch": 0.975553857906799, |
| "grad_norm": 3.6882386753493313, |
| "learning_rate": 3.1338436858431293e-09, |
| "loss": 0.4618, |
| "step": 2554 |
| }, |
| { |
| "epoch": 0.9759358288770054, |
| "grad_norm": 3.1630091731444105, |
| "learning_rate": 3.036725378099958e-09, |
| "loss": 0.5647, |
| "step": 2555 |
| }, |
| { |
| "epoch": 0.9763177998472116, |
| "grad_norm": 6.080592480427539, |
| "learning_rate": 2.941133417958541e-09, |
| "loss": 0.4768, |
| "step": 2556 |
| }, |
| { |
| "epoch": 0.9766997708174179, |
| "grad_norm": 3.32881963748884, |
| "learning_rate": 2.8470679517696995e-09, |
| "loss": 0.4456, |
| "step": 2557 |
| }, |
| { |
| "epoch": 0.9770817417876242, |
| "grad_norm": 2.825770502568057, |
| "learning_rate": 2.7545291235475665e-09, |
| "loss": 0.4687, |
| "step": 2558 |
| }, |
| { |
| "epoch": 0.9774637127578304, |
| "grad_norm": 2.8682663422456565, |
| "learning_rate": 2.663517074968591e-09, |
| "loss": 0.4346, |
| "step": 2559 |
| }, |
| { |
| "epoch": 0.9778456837280367, |
| "grad_norm": 20.475308532466034, |
| "learning_rate": 2.5740319453720906e-09, |
| "loss": 0.5423, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.9782276546982429, |
| "grad_norm": 3.427236872657874, |
| "learning_rate": 2.4860738717593643e-09, |
| "loss": 0.4925, |
| "step": 2561 |
| }, |
| { |
| "epoch": 0.9786096256684492, |
| "grad_norm": 3.4960090273367253, |
| "learning_rate": 2.399642988794137e-09, |
| "loss": 0.4906, |
| "step": 2562 |
| }, |
| { |
| "epoch": 0.9789915966386554, |
| "grad_norm": 5.310310660843343, |
| "learning_rate": 2.314739428801671e-09, |
| "loss": 0.5329, |
| "step": 2563 |
| }, |
| { |
| "epoch": 0.9793735676088617, |
| "grad_norm": 2.1641671491311025, |
| "learning_rate": 2.2313633217689865e-09, |
| "loss": 0.4817, |
| "step": 2564 |
| }, |
| { |
| "epoch": 0.9797555385790679, |
| "grad_norm": 7.750415967420114, |
| "learning_rate": 2.1495147953448643e-09, |
| "loss": 0.5322, |
| "step": 2565 |
| }, |
| { |
| "epoch": 0.9801375095492743, |
| "grad_norm": 2.076980851759081, |
| "learning_rate": 2.0691939748389566e-09, |
| "loss": 0.4361, |
| "step": 2566 |
| }, |
| { |
| "epoch": 0.9805194805194806, |
| "grad_norm": 4.796938970992404, |
| "learning_rate": 1.990400983222229e-09, |
| "loss": 0.4756, |
| "step": 2567 |
| }, |
| { |
| "epoch": 0.9809014514896868, |
| "grad_norm": 6.541046842142587, |
| "learning_rate": 1.9131359411265203e-09, |
| "loss": 0.4655, |
| "step": 2568 |
| }, |
| { |
| "epoch": 0.9812834224598931, |
| "grad_norm": 5.938506466582482, |
| "learning_rate": 1.8373989668443168e-09, |
| "loss": 0.5141, |
| "step": 2569 |
| }, |
| { |
| "epoch": 0.9816653934300993, |
| "grad_norm": 2.8652369163931986, |
| "learning_rate": 1.7631901763287549e-09, |
| "loss": 0.5437, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.9820473644003056, |
| "grad_norm": 2.910245040907176, |
| "learning_rate": 1.6905096831931753e-09, |
| "loss": 0.4538, |
| "step": 2571 |
| }, |
| { |
| "epoch": 0.9824293353705118, |
| "grad_norm": 303.4846717609501, |
| "learning_rate": 1.6193575987112352e-09, |
| "loss": 0.4642, |
| "step": 2572 |
| }, |
| { |
| "epoch": 0.9828113063407181, |
| "grad_norm": 3.269168545125339, |
| "learning_rate": 1.5497340318165742e-09, |
| "loss": 0.4603, |
| "step": 2573 |
| }, |
| { |
| "epoch": 0.9831932773109243, |
| "grad_norm": 2.640937522483087, |
| "learning_rate": 1.4816390891025931e-09, |
| "loss": 0.4574, |
| "step": 2574 |
| }, |
| { |
| "epoch": 0.9835752482811306, |
| "grad_norm": 3.57904681889891, |
| "learning_rate": 1.4150728748224538e-09, |
| "loss": 0.5048, |
| "step": 2575 |
| }, |
| { |
| "epoch": 0.983957219251337, |
| "grad_norm": 8.684873939885174, |
| "learning_rate": 1.3500354908888566e-09, |
| "loss": 0.5121, |
| "step": 2576 |
| }, |
| { |
| "epoch": 0.9843391902215431, |
| "grad_norm": 2.807742880348149, |
| "learning_rate": 1.286527036873819e-09, |
| "loss": 0.4514, |
| "step": 2577 |
| }, |
| { |
| "epoch": 0.9847211611917495, |
| "grad_norm": 2.6363709403478466, |
| "learning_rate": 1.224547610008453e-09, |
| "loss": 0.4982, |
| "step": 2578 |
| }, |
| { |
| "epoch": 0.9851031321619557, |
| "grad_norm": 3.1684012952039726, |
| "learning_rate": 1.164097305183298e-09, |
| "loss": 0.4583, |
| "step": 2579 |
| }, |
| { |
| "epoch": 0.985485103132162, |
| "grad_norm": 3.590684132000082, |
| "learning_rate": 1.1051762149473232e-09, |
| "loss": 0.4964, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.9858670741023682, |
| "grad_norm": 5.004510927683761, |
| "learning_rate": 1.0477844295087024e-09, |
| "loss": 0.4452, |
| "step": 2581 |
| }, |
| { |
| "epoch": 0.9862490450725745, |
| "grad_norm": 7.36797219813988, |
| "learning_rate": 9.919220367340387e-10, |
| "loss": 0.5761, |
| "step": 2582 |
| }, |
| { |
| "epoch": 0.9866310160427807, |
| "grad_norm": 3.418088354979299, |
| "learning_rate": 9.375891221484743e-10, |
| "loss": 0.4862, |
| "step": 2583 |
| }, |
| { |
| "epoch": 0.987012987012987, |
| "grad_norm": 2.8783008805563166, |
| "learning_rate": 8.847857689355809e-10, |
| "loss": 0.4573, |
| "step": 2584 |
| }, |
| { |
| "epoch": 0.9873949579831933, |
| "grad_norm": 13.812348241958448, |
| "learning_rate": 8.335120579370247e-10, |
| "loss": 0.4834, |
| "step": 2585 |
| }, |
| { |
| "epoch": 0.9877769289533995, |
| "grad_norm": 3.035692731524948, |
| "learning_rate": 7.837680676526792e-10, |
| "loss": 0.4293, |
| "step": 2586 |
| }, |
| { |
| "epoch": 0.9881588999236058, |
| "grad_norm": 2.311185049015733, |
| "learning_rate": 7.355538742406242e-10, |
| "loss": 0.4276, |
| "step": 2587 |
| }, |
| { |
| "epoch": 0.988540870893812, |
| "grad_norm": 2.3430547457905457, |
| "learning_rate": 6.88869551516369e-10, |
| "loss": 0.4485, |
| "step": 2588 |
| }, |
| { |
| "epoch": 0.9889228418640184, |
| "grad_norm": 4.04539151835123, |
| "learning_rate": 6.437151709536292e-10, |
| "loss": 0.5413, |
| "step": 2589 |
| }, |
| { |
| "epoch": 0.9893048128342246, |
| "grad_norm": 3.344931739067561, |
| "learning_rate": 6.000908016836614e-10, |
| "loss": 0.4889, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.9896867838044309, |
| "grad_norm": 5.715271667567572, |
| "learning_rate": 5.579965104951511e-10, |
| "loss": 0.4916, |
| "step": 2591 |
| }, |
| { |
| "epoch": 0.9900687547746372, |
| "grad_norm": 4.474119824184043, |
| "learning_rate": 5.174323618343246e-10, |
| "loss": 0.4628, |
| "step": 2592 |
| }, |
| { |
| "epoch": 0.9904507257448434, |
| "grad_norm": 4.58984054764715, |
| "learning_rate": 4.783984178047263e-10, |
| "loss": 0.4496, |
| "step": 2593 |
| }, |
| { |
| "epoch": 0.9908326967150497, |
| "grad_norm": 3.537106032864748, |
| "learning_rate": 4.40894738167219e-10, |
| "loss": 0.5855, |
| "step": 2594 |
| }, |
| { |
| "epoch": 0.9912146676852559, |
| "grad_norm": 3.2765152812486344, |
| "learning_rate": 4.0492138033998424e-10, |
| "loss": 0.4051, |
| "step": 2595 |
| }, |
| { |
| "epoch": 0.9915966386554622, |
| "grad_norm": 4.1733591811192685, |
| "learning_rate": 3.7047839939785553e-10, |
| "loss": 0.5353, |
| "step": 2596 |
| }, |
| { |
| "epoch": 0.9919786096256684, |
| "grad_norm": 4.696537622938952, |
| "learning_rate": 3.3756584807309587e-10, |
| "loss": 0.4364, |
| "step": 2597 |
| }, |
| { |
| "epoch": 0.9923605805958747, |
| "grad_norm": 3.76697874842984, |
| "learning_rate": 3.061837767547315e-10, |
| "loss": 0.5534, |
| "step": 2598 |
| }, |
| { |
| "epoch": 0.9927425515660809, |
| "grad_norm": 8.74608643405226, |
| "learning_rate": 2.7633223348844106e-10, |
| "loss": 0.4994, |
| "step": 2599 |
| }, |
| { |
| "epoch": 0.9931245225362872, |
| "grad_norm": 4.006165578795337, |
| "learning_rate": 2.480112639769993e-10, |
| "loss": 0.5504, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.9935064935064936, |
| "grad_norm": 3.9404386333333754, |
| "learning_rate": 2.2122091157972256e-10, |
| "loss": 0.5143, |
| "step": 2601 |
| }, |
| { |
| "epoch": 0.9938884644766998, |
| "grad_norm": 10.682839455672944, |
| "learning_rate": 1.9596121731257908e-10, |
| "loss": 0.4989, |
| "step": 2602 |
| }, |
| { |
| "epoch": 0.9942704354469061, |
| "grad_norm": 2.76581270014255, |
| "learning_rate": 1.7223221984785651e-10, |
| "loss": 0.4919, |
| "step": 2603 |
| }, |
| { |
| "epoch": 0.9946524064171123, |
| "grad_norm": 2.909695663450389, |
| "learning_rate": 1.500339555148278e-10, |
| "loss": 0.4611, |
| "step": 2604 |
| }, |
| { |
| "epoch": 0.9950343773873186, |
| "grad_norm": 4.261689227706372, |
| "learning_rate": 1.2936645829886294e-10, |
| "loss": 0.4687, |
| "step": 2605 |
| }, |
| { |
| "epoch": 0.9954163483575248, |
| "grad_norm": 2.7347150907423297, |
| "learning_rate": 1.1022975984176231e-10, |
| "loss": 0.4518, |
| "step": 2606 |
| }, |
| { |
| "epoch": 0.9957983193277311, |
| "grad_norm": 5.4011893230672, |
| "learning_rate": 9.262388944186739e-11, |
| "loss": 0.5702, |
| "step": 2607 |
| }, |
| { |
| "epoch": 0.9961802902979373, |
| "grad_norm": 3.4713061472027165, |
| "learning_rate": 7.65488740537279e-11, |
| "loss": 0.4385, |
| "step": 2608 |
| }, |
| { |
| "epoch": 0.9965622612681436, |
| "grad_norm": 3.823133193566733, |
| "learning_rate": 6.200473828810171e-11, |
| "loss": 0.4967, |
| "step": 2609 |
| }, |
| { |
| "epoch": 0.9969442322383499, |
| "grad_norm": 2.8124484286478255, |
| "learning_rate": 4.8991504411843856e-11, |
| "loss": 0.448, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.9973262032085561, |
| "grad_norm": 8.56790054183414, |
| "learning_rate": 3.7509192348461614e-11, |
| "loss": 0.4716, |
| "step": 2611 |
| }, |
| { |
| "epoch": 0.9977081741787625, |
| "grad_norm": 3.489183065122739, |
| "learning_rate": 2.7557819677115345e-11, |
| "loss": 0.4173, |
| "step": 2612 |
| }, |
| { |
| "epoch": 0.9980901451489687, |
| "grad_norm": 3.064351005201259, |
| "learning_rate": 1.9137401633506635e-11, |
| "loss": 0.4869, |
| "step": 2613 |
| }, |
| { |
| "epoch": 0.998472116119175, |
| "grad_norm": 3.0529889006171773, |
| "learning_rate": 1.2247951109101151e-11, |
| "loss": 0.4295, |
| "step": 2614 |
| }, |
| { |
| "epoch": 0.9988540870893812, |
| "grad_norm": 11.94148717007962, |
| "learning_rate": 6.889478651794789e-12, |
| "loss": 0.4622, |
| "step": 2615 |
| }, |
| { |
| "epoch": 0.9992360580595875, |
| "grad_norm": 13.14822182317399, |
| "learning_rate": 3.0619924651364982e-12, |
| "loss": 0.5448, |
| "step": 2616 |
| }, |
| { |
| "epoch": 0.9996180290297937, |
| "grad_norm": 6.143156160558923, |
| "learning_rate": 7.654984093274919e-13, |
| "loss": 0.4924, |
| "step": 2617 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 2.676839745191836, |
| "learning_rate": 0.0, |
| "loss": 0.5148, |
| "step": 2618 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 2618, |
| "total_flos": 1946642581422080.0, |
| "train_loss": 0.5203552902473394, |
| "train_runtime": 30094.9628, |
| "train_samples_per_second": 22.269, |
| "train_steps_per_second": 0.087 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 2618, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 10000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1946642581422080.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|