| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 7.0, | |
| "eval_steps": 500, | |
| "global_step": 1141, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.006134969325153374, | |
| "grad_norm": 0.14804252982139587, | |
| "learning_rate": 8.333333333333333e-07, | |
| "loss": 0.5673, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.012269938650306749, | |
| "grad_norm": 0.1421855241060257, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.5703, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.018404907975460124, | |
| "grad_norm": 0.14651049673557281, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.5759, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.024539877300613498, | |
| "grad_norm": 0.14899852871894836, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.6024, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.03067484662576687, | |
| "grad_norm": 0.1509060114622116, | |
| "learning_rate": 4.166666666666667e-06, | |
| "loss": 0.5794, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.03680981595092025, | |
| "grad_norm": 0.14855755865573883, | |
| "learning_rate": 5e-06, | |
| "loss": 0.5726, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.04294478527607362, | |
| "grad_norm": 0.161569744348526, | |
| "learning_rate": 5.833333333333334e-06, | |
| "loss": 0.5992, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.049079754601226995, | |
| "grad_norm": 0.1378004103899002, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.5801, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.05521472392638037, | |
| "grad_norm": 0.1505780816078186, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.5923, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.06134969325153374, | |
| "grad_norm": 0.15210111439228058, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.5794, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.06748466257668712, | |
| "grad_norm": 0.16127604246139526, | |
| "learning_rate": 9.166666666666666e-06, | |
| "loss": 0.5925, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0736196319018405, | |
| "grad_norm": 0.15838903188705444, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5879, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.07975460122699386, | |
| "grad_norm": 0.15082281827926636, | |
| "learning_rate": 9.999980642396502e-06, | |
| "loss": 0.6217, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.08588957055214724, | |
| "grad_norm": 0.16154126822948456, | |
| "learning_rate": 9.999922569735891e-06, | |
| "loss": 0.5725, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.09202453987730061, | |
| "grad_norm": 0.16344521939754486, | |
| "learning_rate": 9.999825782467827e-06, | |
| "loss": 0.6094, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.09815950920245399, | |
| "grad_norm": 0.16911642253398895, | |
| "learning_rate": 9.99969028134174e-06, | |
| "loss": 0.5627, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.10429447852760736, | |
| "grad_norm": 0.16826483607292175, | |
| "learning_rate": 9.999516067406818e-06, | |
| "loss": 0.5996, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.11042944785276074, | |
| "grad_norm": 0.16453488171100616, | |
| "learning_rate": 9.999303142012008e-06, | |
| "loss": 0.5984, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.1165644171779141, | |
| "grad_norm": 0.16882814466953278, | |
| "learning_rate": 9.999051506806e-06, | |
| "loss": 0.5684, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.12269938650306748, | |
| "grad_norm": 0.15942800045013428, | |
| "learning_rate": 9.998761163737217e-06, | |
| "loss": 0.5879, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.12883435582822086, | |
| "grad_norm": 0.17792896926403046, | |
| "learning_rate": 9.998432115053796e-06, | |
| "loss": 0.5572, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.13496932515337423, | |
| "grad_norm": 0.1712748408317566, | |
| "learning_rate": 9.998064363303573e-06, | |
| "loss": 0.5693, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.1411042944785276, | |
| "grad_norm": 0.1689537614583969, | |
| "learning_rate": 9.997657911334068e-06, | |
| "loss": 0.5584, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.147239263803681, | |
| "grad_norm": 0.17206093668937683, | |
| "learning_rate": 9.997212762292453e-06, | |
| "loss": 0.5406, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.15337423312883436, | |
| "grad_norm": 0.1795872151851654, | |
| "learning_rate": 9.996728919625538e-06, | |
| "loss": 0.5767, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.15950920245398773, | |
| "grad_norm": 0.16666671633720398, | |
| "learning_rate": 9.996206387079736e-06, | |
| "loss": 0.5792, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.1656441717791411, | |
| "grad_norm": 0.15964794158935547, | |
| "learning_rate": 9.995645168701038e-06, | |
| "loss": 0.5771, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.17177914110429449, | |
| "grad_norm": 0.1661137342453003, | |
| "learning_rate": 9.995045268834979e-06, | |
| "loss": 0.5695, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.17791411042944785, | |
| "grad_norm": 0.16640233993530273, | |
| "learning_rate": 9.99440669212661e-06, | |
| "loss": 0.6123, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.18404907975460122, | |
| "grad_norm": 0.14831523597240448, | |
| "learning_rate": 9.99372944352046e-06, | |
| "loss": 0.5309, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.1901840490797546, | |
| "grad_norm": 0.13072659075260162, | |
| "learning_rate": 9.993013528260486e-06, | |
| "loss": 0.5782, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.19631901840490798, | |
| "grad_norm": 0.12631100416183472, | |
| "learning_rate": 9.992258951890057e-06, | |
| "loss": 0.5361, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.20245398773006135, | |
| "grad_norm": 0.1304839849472046, | |
| "learning_rate": 9.991465720251885e-06, | |
| "loss": 0.5673, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.2085889570552147, | |
| "grad_norm": 0.12783069908618927, | |
| "learning_rate": 9.990633839487997e-06, | |
| "loss": 0.5662, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.2147239263803681, | |
| "grad_norm": 0.12577638030052185, | |
| "learning_rate": 9.989763316039678e-06, | |
| "loss": 0.5328, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.22085889570552147, | |
| "grad_norm": 0.12679477035999298, | |
| "learning_rate": 9.988854156647428e-06, | |
| "loss": 0.5758, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.22699386503067484, | |
| "grad_norm": 0.1201774999499321, | |
| "learning_rate": 9.987906368350908e-06, | |
| "loss": 0.5598, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.2331288343558282, | |
| "grad_norm": 0.1210893765091896, | |
| "learning_rate": 9.98691995848888e-06, | |
| "loss": 0.5902, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.2392638036809816, | |
| "grad_norm": 0.10759458690881729, | |
| "learning_rate": 9.985894934699154e-06, | |
| "loss": 0.524, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.24539877300613497, | |
| "grad_norm": 0.10544677078723907, | |
| "learning_rate": 9.984831304918537e-06, | |
| "loss": 0.5535, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.25153374233128833, | |
| "grad_norm": 0.1004803329706192, | |
| "learning_rate": 9.983729077382755e-06, | |
| "loss": 0.5204, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.25766871165644173, | |
| "grad_norm": 0.10377446562051773, | |
| "learning_rate": 9.982588260626402e-06, | |
| "loss": 0.5319, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.26380368098159507, | |
| "grad_norm": 0.09945333003997803, | |
| "learning_rate": 9.981408863482872e-06, | |
| "loss": 0.5473, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.26993865030674846, | |
| "grad_norm": 0.09462588280439377, | |
| "learning_rate": 9.98019089508428e-06, | |
| "loss": 0.5455, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.27607361963190186, | |
| "grad_norm": 0.09673507511615753, | |
| "learning_rate": 9.97893436486141e-06, | |
| "loss": 0.5175, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.2822085889570552, | |
| "grad_norm": 0.09385982155799866, | |
| "learning_rate": 9.977639282543627e-06, | |
| "loss": 0.5519, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.2883435582822086, | |
| "grad_norm": 0.10252804309129715, | |
| "learning_rate": 9.976305658158806e-06, | |
| "loss": 0.5197, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.294478527607362, | |
| "grad_norm": 0.09260332584381104, | |
| "learning_rate": 9.97493350203326e-06, | |
| "loss": 0.5454, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.3006134969325153, | |
| "grad_norm": 0.09458579868078232, | |
| "learning_rate": 9.973522824791643e-06, | |
| "loss": 0.5376, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.3067484662576687, | |
| "grad_norm": 0.10349428653717041, | |
| "learning_rate": 9.972073637356894e-06, | |
| "loss": 0.5747, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.3128834355828221, | |
| "grad_norm": 0.09267648309469223, | |
| "learning_rate": 9.970585950950129e-06, | |
| "loss": 0.525, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.31901840490797545, | |
| "grad_norm": 0.08669942617416382, | |
| "learning_rate": 9.969059777090564e-06, | |
| "loss": 0.5121, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.32515337423312884, | |
| "grad_norm": 0.09225864708423615, | |
| "learning_rate": 9.967495127595427e-06, | |
| "loss": 0.523, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.3312883435582822, | |
| "grad_norm": 0.09258433431386948, | |
| "learning_rate": 9.965892014579867e-06, | |
| "loss": 0.5275, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.3374233128834356, | |
| "grad_norm": 0.08944110572338104, | |
| "learning_rate": 9.96425045045685e-06, | |
| "loss": 0.5338, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.34355828220858897, | |
| "grad_norm": 0.09073130786418915, | |
| "learning_rate": 9.962570447937077e-06, | |
| "loss": 0.5379, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.3496932515337423, | |
| "grad_norm": 0.10125018656253815, | |
| "learning_rate": 9.960852020028877e-06, | |
| "loss": 0.5216, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.3558282208588957, | |
| "grad_norm": 0.09612809121608734, | |
| "learning_rate": 9.95909518003811e-06, | |
| "loss": 0.5076, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.3619631901840491, | |
| "grad_norm": 0.09233148396015167, | |
| "learning_rate": 9.957299941568058e-06, | |
| "loss": 0.5443, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.36809815950920244, | |
| "grad_norm": 0.14108285307884216, | |
| "learning_rate": 9.955466318519327e-06, | |
| "loss": 0.5588, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.37423312883435583, | |
| "grad_norm": 0.09588061273097992, | |
| "learning_rate": 9.953594325089738e-06, | |
| "loss": 0.506, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.3803680981595092, | |
| "grad_norm": 0.08795749396085739, | |
| "learning_rate": 9.951683975774213e-06, | |
| "loss": 0.5158, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.38650306748466257, | |
| "grad_norm": 0.0903969258069992, | |
| "learning_rate": 9.949735285364666e-06, | |
| "loss": 0.5337, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.39263803680981596, | |
| "grad_norm": 0.09337311238050461, | |
| "learning_rate": 9.947748268949885e-06, | |
| "loss": 0.5313, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.3987730061349693, | |
| "grad_norm": 0.08544723689556122, | |
| "learning_rate": 9.945722941915424e-06, | |
| "loss": 0.5398, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.4049079754601227, | |
| "grad_norm": 0.08584097027778625, | |
| "learning_rate": 9.943659319943472e-06, | |
| "loss": 0.5116, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.4110429447852761, | |
| "grad_norm": 0.08828586339950562, | |
| "learning_rate": 9.941557419012742e-06, | |
| "loss": 0.5004, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.4171779141104294, | |
| "grad_norm": 0.0884728655219078, | |
| "learning_rate": 9.939417255398336e-06, | |
| "loss": 0.5463, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.4233128834355828, | |
| "grad_norm": 0.08237382769584656, | |
| "learning_rate": 9.93723884567163e-06, | |
| "loss": 0.4999, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.4294478527607362, | |
| "grad_norm": 0.07927916198968887, | |
| "learning_rate": 9.935022206700145e-06, | |
| "loss": 0.5056, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.43558282208588955, | |
| "grad_norm": 0.08695581555366516, | |
| "learning_rate": 9.932767355647404e-06, | |
| "loss": 0.5327, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.44171779141104295, | |
| "grad_norm": 0.0842549055814743, | |
| "learning_rate": 9.930474309972813e-06, | |
| "loss": 0.5371, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.44785276073619634, | |
| "grad_norm": 0.09977614879608154, | |
| "learning_rate": 9.92814308743152e-06, | |
| "loss": 0.5404, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.4539877300613497, | |
| "grad_norm": 0.08078821003437042, | |
| "learning_rate": 9.925773706074278e-06, | |
| "loss": 0.5223, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.4601226993865031, | |
| "grad_norm": 0.08402088284492493, | |
| "learning_rate": 9.923366184247306e-06, | |
| "loss": 0.4808, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.4662576687116564, | |
| "grad_norm": 0.08832000941038132, | |
| "learning_rate": 9.920920540592141e-06, | |
| "loss": 0.5187, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.4723926380368098, | |
| "grad_norm": 0.0865439772605896, | |
| "learning_rate": 9.918436794045507e-06, | |
| "loss": 0.5013, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.4785276073619632, | |
| "grad_norm": 0.0843573585152626, | |
| "learning_rate": 9.915914963839154e-06, | |
| "loss": 0.5203, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.48466257668711654, | |
| "grad_norm": 0.08602918684482574, | |
| "learning_rate": 9.91335506949972e-06, | |
| "loss": 0.509, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.49079754601226994, | |
| "grad_norm": 0.11076867580413818, | |
| "learning_rate": 9.910757130848571e-06, | |
| "loss": 0.5022, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.49693251533742333, | |
| "grad_norm": 0.08907414972782135, | |
| "learning_rate": 9.908121168001657e-06, | |
| "loss": 0.524, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.5030674846625767, | |
| "grad_norm": 0.09341124445199966, | |
| "learning_rate": 9.90544720136934e-06, | |
| "loss": 0.4973, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.50920245398773, | |
| "grad_norm": 0.10275765508413315, | |
| "learning_rate": 9.902735251656263e-06, | |
| "loss": 0.5031, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.5153374233128835, | |
| "grad_norm": 0.08170262724161148, | |
| "learning_rate": 9.89998533986116e-06, | |
| "loss": 0.4829, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.5214723926380368, | |
| "grad_norm": 0.08169631659984589, | |
| "learning_rate": 9.897197487276712e-06, | |
| "loss": 0.4893, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.5276073619631901, | |
| "grad_norm": 0.07817935198545456, | |
| "learning_rate": 9.894371715489376e-06, | |
| "loss": 0.5155, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.5337423312883436, | |
| "grad_norm": 0.0947134792804718, | |
| "learning_rate": 9.891508046379225e-06, | |
| "loss": 0.5513, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.5398773006134969, | |
| "grad_norm": 0.08149490505456924, | |
| "learning_rate": 9.888606502119763e-06, | |
| "loss": 0.4797, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.5460122699386503, | |
| "grad_norm": 0.09330648183822632, | |
| "learning_rate": 9.885667105177769e-06, | |
| "loss": 0.4906, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.5521472392638037, | |
| "grad_norm": 0.07798902690410614, | |
| "learning_rate": 9.882689878313114e-06, | |
| "loss": 0.4751, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.558282208588957, | |
| "grad_norm": 0.08369090408086777, | |
| "learning_rate": 9.879674844578588e-06, | |
| "loss": 0.5378, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.5644171779141104, | |
| "grad_norm": 0.09182888269424438, | |
| "learning_rate": 9.876622027319726e-06, | |
| "loss": 0.5062, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.5705521472392638, | |
| "grad_norm": 0.07969052344560623, | |
| "learning_rate": 9.873531450174616e-06, | |
| "loss": 0.5108, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.5766871165644172, | |
| "grad_norm": 0.1103825718164444, | |
| "learning_rate": 9.870403137073723e-06, | |
| "loss": 0.5127, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.5828220858895705, | |
| "grad_norm": 0.0967809185385704, | |
| "learning_rate": 9.867237112239708e-06, | |
| "loss": 0.4884, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.588957055214724, | |
| "grad_norm": 0.07554975152015686, | |
| "learning_rate": 9.86403340018723e-06, | |
| "loss": 0.4743, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.5950920245398773, | |
| "grad_norm": 0.09686005860567093, | |
| "learning_rate": 9.860792025722768e-06, | |
| "loss": 0.5033, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.6012269938650306, | |
| "grad_norm": 0.07932725548744202, | |
| "learning_rate": 9.857513013944413e-06, | |
| "loss": 0.5121, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.6073619631901841, | |
| "grad_norm": 0.08431375026702881, | |
| "learning_rate": 9.854196390241691e-06, | |
| "loss": 0.5058, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.6134969325153374, | |
| "grad_norm": 0.081431545317173, | |
| "learning_rate": 9.85084218029536e-06, | |
| "loss": 0.5138, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.6196319018404908, | |
| "grad_norm": 0.08625893294811249, | |
| "learning_rate": 9.847450410077202e-06, | |
| "loss": 0.5052, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.6257668711656442, | |
| "grad_norm": 0.08018495887517929, | |
| "learning_rate": 9.844021105849837e-06, | |
| "loss": 0.5018, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.6319018404907976, | |
| "grad_norm": 0.07922230660915375, | |
| "learning_rate": 9.840554294166507e-06, | |
| "loss": 0.4949, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.6380368098159509, | |
| "grad_norm": 0.07988546043634415, | |
| "learning_rate": 9.83705000187088e-06, | |
| "loss": 0.4774, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.6441717791411042, | |
| "grad_norm": 0.08108440041542053, | |
| "learning_rate": 9.833508256096837e-06, | |
| "loss": 0.508, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.6503067484662577, | |
| "grad_norm": 0.07891630381345749, | |
| "learning_rate": 9.829929084268262e-06, | |
| "loss": 0.4729, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.656441717791411, | |
| "grad_norm": 0.07818285375833511, | |
| "learning_rate": 9.82631251409883e-06, | |
| "loss": 0.4535, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.6625766871165644, | |
| "grad_norm": 0.07954879850149155, | |
| "learning_rate": 9.822658573591794e-06, | |
| "loss": 0.5095, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.6687116564417178, | |
| "grad_norm": 0.08702922612428665, | |
| "learning_rate": 9.818967291039767e-06, | |
| "loss": 0.5049, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.6748466257668712, | |
| "grad_norm": 0.07859531790018082, | |
| "learning_rate": 9.8152386950245e-06, | |
| "loss": 0.4565, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.6809815950920245, | |
| "grad_norm": 0.09203047305345535, | |
| "learning_rate": 9.811472814416669e-06, | |
| "loss": 0.487, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.6871165644171779, | |
| "grad_norm": 0.08309823274612427, | |
| "learning_rate": 9.807669678375643e-06, | |
| "loss": 0.4731, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.6932515337423313, | |
| "grad_norm": 0.0878622755408287, | |
| "learning_rate": 9.803829316349262e-06, | |
| "loss": 0.5039, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.6993865030674846, | |
| "grad_norm": 0.09177032113075256, | |
| "learning_rate": 9.799951758073607e-06, | |
| "loss": 0.4605, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.7055214723926381, | |
| "grad_norm": 0.08374316245317459, | |
| "learning_rate": 9.796037033572771e-06, | |
| "loss": 0.4926, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.7116564417177914, | |
| "grad_norm": 0.08500220626592636, | |
| "learning_rate": 9.792085173158633e-06, | |
| "loss": 0.5156, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.7177914110429447, | |
| "grad_norm": 0.10756376385688782, | |
| "learning_rate": 9.788096207430608e-06, | |
| "loss": 0.4892, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.7239263803680982, | |
| "grad_norm": 0.08716326951980591, | |
| "learning_rate": 9.784070167275422e-06, | |
| "loss": 0.488, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.7300613496932515, | |
| "grad_norm": 0.08646374195814133, | |
| "learning_rate": 9.780007083866872e-06, | |
| "loss": 0.5176, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.7361963190184049, | |
| "grad_norm": 0.08787130564451218, | |
| "learning_rate": 9.775906988665583e-06, | |
| "loss": 0.5049, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.7423312883435583, | |
| "grad_norm": 0.11348209530115128, | |
| "learning_rate": 9.771769913418758e-06, | |
| "loss": 0.4736, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.7484662576687117, | |
| "grad_norm": 0.09234916418790817, | |
| "learning_rate": 9.767595890159944e-06, | |
| "loss": 0.4943, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.754601226993865, | |
| "grad_norm": 0.08790959417819977, | |
| "learning_rate": 9.763384951208776e-06, | |
| "loss": 0.5097, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.7607361963190185, | |
| "grad_norm": 0.08863342553377151, | |
| "learning_rate": 9.759137129170728e-06, | |
| "loss": 0.5241, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.7668711656441718, | |
| "grad_norm": 0.08124402910470963, | |
| "learning_rate": 9.754852456936862e-06, | |
| "loss": 0.4759, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.7730061349693251, | |
| "grad_norm": 0.08492821455001831, | |
| "learning_rate": 9.750530967683573e-06, | |
| "loss": 0.478, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.7791411042944786, | |
| "grad_norm": 0.09666764736175537, | |
| "learning_rate": 9.746172694872332e-06, | |
| "loss": 0.4842, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.7852760736196319, | |
| "grad_norm": 0.08827357739210129, | |
| "learning_rate": 9.741777672249424e-06, | |
| "loss": 0.5038, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.7914110429447853, | |
| "grad_norm": 0.09018061310052872, | |
| "learning_rate": 9.737345933845692e-06, | |
| "loss": 0.5357, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.7975460122699386, | |
| "grad_norm": 0.09164313971996307, | |
| "learning_rate": 9.732877513976269e-06, | |
| "loss": 0.5167, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.803680981595092, | |
| "grad_norm": 0.08618041127920151, | |
| "learning_rate": 9.728372447240315e-06, | |
| "loss": 0.4807, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.8098159509202454, | |
| "grad_norm": 0.10933379828929901, | |
| "learning_rate": 9.72383076852075e-06, | |
| "loss": 0.5073, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.8159509202453987, | |
| "grad_norm": 0.08988262712955475, | |
| "learning_rate": 9.71925251298398e-06, | |
| "loss": 0.5095, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.8220858895705522, | |
| "grad_norm": 0.08735327422618866, | |
| "learning_rate": 9.714637716079627e-06, | |
| "loss": 0.4838, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.8282208588957055, | |
| "grad_norm": 0.08958423137664795, | |
| "learning_rate": 9.709986413540254e-06, | |
| "loss": 0.4931, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.8343558282208589, | |
| "grad_norm": 0.08985975384712219, | |
| "learning_rate": 9.705298641381089e-06, | |
| "loss": 0.4776, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.8404907975460123, | |
| "grad_norm": 0.10067565739154816, | |
| "learning_rate": 9.700574435899745e-06, | |
| "loss": 0.4672, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.8466257668711656, | |
| "grad_norm": 0.09510832279920578, | |
| "learning_rate": 9.695813833675943e-06, | |
| "loss": 0.4497, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.852760736196319, | |
| "grad_norm": 0.09438912570476532, | |
| "learning_rate": 9.691016871571219e-06, | |
| "loss": 0.4935, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.8588957055214724, | |
| "grad_norm": 0.0871388390660286, | |
| "learning_rate": 9.686183586728654e-06, | |
| "loss": 0.5168, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.8650306748466258, | |
| "grad_norm": 0.08199909329414368, | |
| "learning_rate": 9.681314016572572e-06, | |
| "loss": 0.4852, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.8711656441717791, | |
| "grad_norm": 0.08726981282234192, | |
| "learning_rate": 9.676408198808253e-06, | |
| "loss": 0.468, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.8773006134969326, | |
| "grad_norm": 0.09949006140232086, | |
| "learning_rate": 9.671466171421651e-06, | |
| "loss": 0.4706, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.8834355828220859, | |
| "grad_norm": 0.0843021422624588, | |
| "learning_rate": 9.666487972679085e-06, | |
| "loss": 0.4729, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.8895705521472392, | |
| "grad_norm": 0.0966368094086647, | |
| "learning_rate": 9.661473641126954e-06, | |
| "loss": 0.4628, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.8957055214723927, | |
| "grad_norm": 0.09247634559869766, | |
| "learning_rate": 9.65642321559144e-06, | |
| "loss": 0.4907, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.901840490797546, | |
| "grad_norm": 0.09630908071994781, | |
| "learning_rate": 9.651336735178191e-06, | |
| "loss": 0.4878, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.9079754601226994, | |
| "grad_norm": 0.09485550224781036, | |
| "learning_rate": 9.646214239272038e-06, | |
| "loss": 0.4735, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.9141104294478528, | |
| "grad_norm": 0.09002482891082764, | |
| "learning_rate": 9.64105576753668e-06, | |
| "loss": 0.4755, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.9202453987730062, | |
| "grad_norm": 0.09443383663892746, | |
| "learning_rate": 9.635861359914374e-06, | |
| "loss": 0.486, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.9263803680981595, | |
| "grad_norm": 0.08977822959423065, | |
| "learning_rate": 9.630631056625635e-06, | |
| "loss": 0.4966, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.9325153374233128, | |
| "grad_norm": 0.09370667487382889, | |
| "learning_rate": 9.62536489816892e-06, | |
| "loss": 0.4792, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.9386503067484663, | |
| "grad_norm": 0.09303581714630127, | |
| "learning_rate": 9.620062925320309e-06, | |
| "loss": 0.4648, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.9447852760736196, | |
| "grad_norm": 0.08958807587623596, | |
| "learning_rate": 9.614725179133197e-06, | |
| "loss": 0.4786, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.950920245398773, | |
| "grad_norm": 0.10791140049695969, | |
| "learning_rate": 9.609351700937976e-06, | |
| "loss": 0.472, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.9570552147239264, | |
| "grad_norm": 0.09399349987506866, | |
| "learning_rate": 9.60394253234171e-06, | |
| "loss": 0.4529, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.9631901840490797, | |
| "grad_norm": 0.09015163034200668, | |
| "learning_rate": 9.598497715227815e-06, | |
| "loss": 0.4839, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.9693251533742331, | |
| "grad_norm": 0.09602297842502594, | |
| "learning_rate": 9.593017291755733e-06, | |
| "loss": 0.4806, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.9754601226993865, | |
| "grad_norm": 0.097842738032341, | |
| "learning_rate": 9.587501304360612e-06, | |
| "loss": 0.4518, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.9815950920245399, | |
| "grad_norm": 0.11291999369859695, | |
| "learning_rate": 9.581949795752972e-06, | |
| "loss": 0.4375, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.9877300613496932, | |
| "grad_norm": 0.0913916602730751, | |
| "learning_rate": 9.576362808918368e-06, | |
| "loss": 0.4771, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.9938650306748467, | |
| "grad_norm": 0.10012573003768921, | |
| "learning_rate": 9.570740387117078e-06, | |
| "loss": 0.4775, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.09484586119651794, | |
| "learning_rate": 9.565082573883745e-06, | |
| "loss": 0.4859, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 1.0061349693251533, | |
| "grad_norm": 0.09999288618564606, | |
| "learning_rate": 9.559389413027048e-06, | |
| "loss": 0.4723, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 1.0122699386503067, | |
| "grad_norm": 0.09577009081840515, | |
| "learning_rate": 9.553660948629369e-06, | |
| "loss": 0.492, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.01840490797546, | |
| "grad_norm": 0.1010693684220314, | |
| "learning_rate": 9.547897225046445e-06, | |
| "loss": 0.489, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 1.0245398773006136, | |
| "grad_norm": 0.10461648553609848, | |
| "learning_rate": 9.542098286907024e-06, | |
| "loss": 0.4795, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 1.030674846625767, | |
| "grad_norm": 0.10396389663219452, | |
| "learning_rate": 9.536264179112529e-06, | |
| "loss": 0.464, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 1.0368098159509203, | |
| "grad_norm": 0.09160111844539642, | |
| "learning_rate": 9.530394946836694e-06, | |
| "loss": 0.4997, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 1.0429447852760736, | |
| "grad_norm": 0.09714648127555847, | |
| "learning_rate": 9.524490635525228e-06, | |
| "loss": 0.4724, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.049079754601227, | |
| "grad_norm": 0.10137296468019485, | |
| "learning_rate": 9.51855129089546e-06, | |
| "loss": 0.4802, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 1.0552147239263803, | |
| "grad_norm": 0.09933356940746307, | |
| "learning_rate": 9.51257695893598e-06, | |
| "loss": 0.4478, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 1.0613496932515338, | |
| "grad_norm": 0.0967121571302414, | |
| "learning_rate": 9.506567685906289e-06, | |
| "loss": 0.4496, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 1.0674846625766872, | |
| "grad_norm": 0.10460793972015381, | |
| "learning_rate": 9.500523518336435e-06, | |
| "loss": 0.4569, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 1.0736196319018405, | |
| "grad_norm": 0.11570947617292404, | |
| "learning_rate": 9.494444503026656e-06, | |
| "loss": 0.4808, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.0797546012269938, | |
| "grad_norm": 0.1063118651509285, | |
| "learning_rate": 9.488330687047025e-06, | |
| "loss": 0.4889, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 1.0858895705521472, | |
| "grad_norm": 0.09846626967191696, | |
| "learning_rate": 9.482182117737066e-06, | |
| "loss": 0.4876, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 1.0920245398773005, | |
| "grad_norm": 0.09458671510219574, | |
| "learning_rate": 9.475998842705412e-06, | |
| "loss": 0.4633, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 1.098159509202454, | |
| "grad_norm": 0.0982397273182869, | |
| "learning_rate": 9.469780909829411e-06, | |
| "loss": 0.462, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 1.1042944785276074, | |
| "grad_norm": 0.09949040412902832, | |
| "learning_rate": 9.46352836725478e-06, | |
| "loss": 0.4677, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.1104294478527608, | |
| "grad_norm": 0.10287673771381378, | |
| "learning_rate": 9.457241263395212e-06, | |
| "loss": 0.4743, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 1.116564417177914, | |
| "grad_norm": 0.10356439650058746, | |
| "learning_rate": 9.450919646932013e-06, | |
| "loss": 0.483, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 1.1226993865030674, | |
| "grad_norm": 0.09517747908830643, | |
| "learning_rate": 9.44456356681372e-06, | |
| "loss": 0.4462, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 1.1288343558282208, | |
| "grad_norm": 0.12741521000862122, | |
| "learning_rate": 9.438173072255727e-06, | |
| "loss": 0.4577, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 1.1349693251533743, | |
| "grad_norm": 0.10354242473840714, | |
| "learning_rate": 9.431748212739897e-06, | |
| "loss": 0.4308, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.1411042944785277, | |
| "grad_norm": 0.11123675853013992, | |
| "learning_rate": 9.425289038014184e-06, | |
| "loss": 0.4937, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 1.147239263803681, | |
| "grad_norm": 0.09957747161388397, | |
| "learning_rate": 9.418795598092243e-06, | |
| "loss": 0.4646, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 1.1533742331288344, | |
| "grad_norm": 0.10711025446653366, | |
| "learning_rate": 9.41226794325305e-06, | |
| "loss": 0.5061, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 1.1595092024539877, | |
| "grad_norm": 0.09933678060770035, | |
| "learning_rate": 9.405706124040506e-06, | |
| "loss": 0.4725, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 1.165644171779141, | |
| "grad_norm": 0.10218477994203568, | |
| "learning_rate": 9.399110191263048e-06, | |
| "loss": 0.4745, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.1717791411042944, | |
| "grad_norm": 0.10867977887392044, | |
| "learning_rate": 9.392480195993258e-06, | |
| "loss": 0.4866, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 1.177914110429448, | |
| "grad_norm": 0.10019568353891373, | |
| "learning_rate": 9.385816189567462e-06, | |
| "loss": 0.4638, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.1840490797546013, | |
| "grad_norm": 0.0998578816652298, | |
| "learning_rate": 9.379118223585342e-06, | |
| "loss": 0.4609, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 1.1901840490797546, | |
| "grad_norm": 0.10544786602258682, | |
| "learning_rate": 9.372386349909521e-06, | |
| "loss": 0.4702, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.196319018404908, | |
| "grad_norm": 0.11477886140346527, | |
| "learning_rate": 9.365620620665176e-06, | |
| "loss": 0.4664, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.2024539877300613, | |
| "grad_norm": 0.09951955080032349, | |
| "learning_rate": 9.358821088239632e-06, | |
| "loss": 0.4672, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.2085889570552146, | |
| "grad_norm": 0.10202579945325851, | |
| "learning_rate": 9.351987805281949e-06, | |
| "loss": 0.4728, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 1.2147239263803682, | |
| "grad_norm": 0.11044152826070786, | |
| "learning_rate": 9.345120824702515e-06, | |
| "loss": 0.4306, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.2208588957055215, | |
| "grad_norm": 0.10835869610309601, | |
| "learning_rate": 9.338220199672652e-06, | |
| "loss": 0.4879, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.2269938650306749, | |
| "grad_norm": 0.10329335927963257, | |
| "learning_rate": 9.331285983624182e-06, | |
| "loss": 0.4728, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.2331288343558282, | |
| "grad_norm": 0.10831797868013382, | |
| "learning_rate": 9.324318230249026e-06, | |
| "loss": 0.4789, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.2392638036809815, | |
| "grad_norm": 0.10910773277282715, | |
| "learning_rate": 9.317316993498788e-06, | |
| "loss": 0.4642, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.2453987730061349, | |
| "grad_norm": 0.10746899992227554, | |
| "learning_rate": 9.310282327584335e-06, | |
| "loss": 0.4659, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.2515337423312882, | |
| "grad_norm": 0.11725542694330215, | |
| "learning_rate": 9.303214286975373e-06, | |
| "loss": 0.4629, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.2576687116564418, | |
| "grad_norm": 0.10740689188241959, | |
| "learning_rate": 9.296112926400038e-06, | |
| "loss": 0.4835, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.2638036809815951, | |
| "grad_norm": 0.10859764367341995, | |
| "learning_rate": 9.288978300844456e-06, | |
| "loss": 0.4555, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.2699386503067485, | |
| "grad_norm": 0.11524678766727448, | |
| "learning_rate": 9.281810465552327e-06, | |
| "loss": 0.4597, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.2760736196319018, | |
| "grad_norm": 0.11158965528011322, | |
| "learning_rate": 9.274609476024499e-06, | |
| "loss": 0.4903, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.2822085889570551, | |
| "grad_norm": 0.10941608250141144, | |
| "learning_rate": 9.26737538801853e-06, | |
| "loss": 0.4778, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.2883435582822087, | |
| "grad_norm": 0.10840465873479843, | |
| "learning_rate": 9.260108257548264e-06, | |
| "loss": 0.4635, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.294478527607362, | |
| "grad_norm": 0.11878825724124908, | |
| "learning_rate": 9.252808140883393e-06, | |
| "loss": 0.4668, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.3006134969325154, | |
| "grad_norm": 0.11123567819595337, | |
| "learning_rate": 9.24547509454902e-06, | |
| "loss": 0.4587, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.3067484662576687, | |
| "grad_norm": 0.1218094527721405, | |
| "learning_rate": 9.238109175325232e-06, | |
| "loss": 0.4741, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.312883435582822, | |
| "grad_norm": 0.11062923818826675, | |
| "learning_rate": 9.230710440246642e-06, | |
| "loss": 0.4712, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.3190184049079754, | |
| "grad_norm": 0.11379806697368622, | |
| "learning_rate": 9.223278946601963e-06, | |
| "loss": 0.4822, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.3251533742331287, | |
| "grad_norm": 0.11029627174139023, | |
| "learning_rate": 9.215814751933559e-06, | |
| "loss": 0.4479, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.331288343558282, | |
| "grad_norm": 0.10827028751373291, | |
| "learning_rate": 9.208317914036997e-06, | |
| "loss": 0.4707, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.3374233128834356, | |
| "grad_norm": 0.11715540289878845, | |
| "learning_rate": 9.200788490960605e-06, | |
| "loss": 0.4576, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.343558282208589, | |
| "grad_norm": 0.11236132681369781, | |
| "learning_rate": 9.193226541005015e-06, | |
| "loss": 0.4646, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.3496932515337423, | |
| "grad_norm": 0.1052820086479187, | |
| "learning_rate": 9.185632122722719e-06, | |
| "loss": 0.4761, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.3558282208588956, | |
| "grad_norm": 0.12960059940814972, | |
| "learning_rate": 9.178005294917615e-06, | |
| "loss": 0.4741, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.3619631901840492, | |
| "grad_norm": 0.11204280704259872, | |
| "learning_rate": 9.170346116644545e-06, | |
| "loss": 0.4752, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.3680981595092025, | |
| "grad_norm": 0.11414473503828049, | |
| "learning_rate": 9.16265464720884e-06, | |
| "loss": 0.479, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.3742331288343559, | |
| "grad_norm": 0.12588758766651154, | |
| "learning_rate": 9.154930946165872e-06, | |
| "loss": 0.4299, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.3803680981595092, | |
| "grad_norm": 0.12927886843681335, | |
| "learning_rate": 9.147175073320574e-06, | |
| "loss": 0.4669, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.3865030674846626, | |
| "grad_norm": 0.1076217070221901, | |
| "learning_rate": 9.13938708872699e-06, | |
| "loss": 0.4788, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.392638036809816, | |
| "grad_norm": 0.11360085755586624, | |
| "learning_rate": 9.131567052687811e-06, | |
| "loss": 0.4374, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.3987730061349692, | |
| "grad_norm": 0.11873716115951538, | |
| "learning_rate": 9.123715025753896e-06, | |
| "loss": 0.4401, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.4049079754601226, | |
| "grad_norm": 0.11451287567615509, | |
| "learning_rate": 9.115831068723816e-06, | |
| "loss": 0.4811, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.4110429447852761, | |
| "grad_norm": 0.11967512965202332, | |
| "learning_rate": 9.10791524264338e-06, | |
| "loss": 0.4767, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.4171779141104295, | |
| "grad_norm": 0.11932694166898727, | |
| "learning_rate": 9.099967608805152e-06, | |
| "loss": 0.4567, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.4233128834355828, | |
| "grad_norm": 0.11423443257808685, | |
| "learning_rate": 9.091988228747992e-06, | |
| "loss": 0.4549, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.4294478527607362, | |
| "grad_norm": 0.11714685708284378, | |
| "learning_rate": 9.08397716425657e-06, | |
| "loss": 0.4259, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.4355828220858895, | |
| "grad_norm": 0.1315395087003708, | |
| "learning_rate": 9.07593447736089e-06, | |
| "loss": 0.502, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.441717791411043, | |
| "grad_norm": 0.1216348186135292, | |
| "learning_rate": 9.06786023033581e-06, | |
| "loss": 0.4622, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.4478527607361964, | |
| "grad_norm": 0.11694086343050003, | |
| "learning_rate": 9.059754485700557e-06, | |
| "loss": 0.4413, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.4539877300613497, | |
| "grad_norm": 0.12045416980981827, | |
| "learning_rate": 9.05161730621825e-06, | |
| "loss": 0.4579, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.460122699386503, | |
| "grad_norm": 0.11791419237852097, | |
| "learning_rate": 9.043448754895405e-06, | |
| "loss": 0.4712, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.4662576687116564, | |
| "grad_norm": 0.12033417075872421, | |
| "learning_rate": 9.035248894981454e-06, | |
| "loss": 0.4787, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.4723926380368098, | |
| "grad_norm": 0.1140502542257309, | |
| "learning_rate": 9.02701778996825e-06, | |
| "loss": 0.4909, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.478527607361963, | |
| "grad_norm": 0.1263529509305954, | |
| "learning_rate": 9.018755503589582e-06, | |
| "loss": 0.4606, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.4846625766871164, | |
| "grad_norm": 0.11217690259218216, | |
| "learning_rate": 9.010462099820674e-06, | |
| "loss": 0.4455, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.49079754601227, | |
| "grad_norm": 0.12041988223791122, | |
| "learning_rate": 9.002137642877696e-06, | |
| "loss": 0.4317, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.4969325153374233, | |
| "grad_norm": 0.128048375248909, | |
| "learning_rate": 8.993782197217262e-06, | |
| "loss": 0.4843, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.5030674846625767, | |
| "grad_norm": 0.12902449071407318, | |
| "learning_rate": 8.985395827535934e-06, | |
| "loss": 0.4224, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.50920245398773, | |
| "grad_norm": 0.1286553293466568, | |
| "learning_rate": 8.976978598769719e-06, | |
| "loss": 0.4613, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.5153374233128836, | |
| "grad_norm": 0.11924152076244354, | |
| "learning_rate": 8.96853057609357e-06, | |
| "loss": 0.4288, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.521472392638037, | |
| "grad_norm": 0.12467361986637115, | |
| "learning_rate": 8.960051824920873e-06, | |
| "loss": 0.4631, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.5276073619631902, | |
| "grad_norm": 0.12638317048549652, | |
| "learning_rate": 8.951542410902949e-06, | |
| "loss": 0.4331, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.5337423312883436, | |
| "grad_norm": 0.11676699668169022, | |
| "learning_rate": 8.943002399928547e-06, | |
| "loss": 0.4565, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.539877300613497, | |
| "grad_norm": 0.1417926698923111, | |
| "learning_rate": 8.934431858123324e-06, | |
| "loss": 0.4796, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.5460122699386503, | |
| "grad_norm": 0.1345888376235962, | |
| "learning_rate": 8.925830851849338e-06, | |
| "loss": 0.4658, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.5521472392638036, | |
| "grad_norm": 0.11821699142456055, | |
| "learning_rate": 8.917199447704538e-06, | |
| "loss": 0.4376, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.558282208588957, | |
| "grad_norm": 0.12282978743314743, | |
| "learning_rate": 8.908537712522246e-06, | |
| "loss": 0.4799, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.5644171779141103, | |
| "grad_norm": 0.1122986450791359, | |
| "learning_rate": 8.899845713370632e-06, | |
| "loss": 0.4423, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.5705521472392638, | |
| "grad_norm": 0.1364981234073639, | |
| "learning_rate": 8.891123517552208e-06, | |
| "loss": 0.4559, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.5766871165644172, | |
| "grad_norm": 0.14287355542182922, | |
| "learning_rate": 8.882371192603297e-06, | |
| "loss": 0.4446, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.5828220858895705, | |
| "grad_norm": 0.13158780336380005, | |
| "learning_rate": 8.87358880629351e-06, | |
| "loss": 0.4805, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.588957055214724, | |
| "grad_norm": 0.14378562569618225, | |
| "learning_rate": 8.864776426625231e-06, | |
| "loss": 0.4656, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.5950920245398774, | |
| "grad_norm": 0.1245376318693161, | |
| "learning_rate": 8.855934121833083e-06, | |
| "loss": 0.4425, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.6012269938650308, | |
| "grad_norm": 0.12470446527004242, | |
| "learning_rate": 8.847061960383395e-06, | |
| "loss": 0.4495, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.607361963190184, | |
| "grad_norm": 0.11994479596614838, | |
| "learning_rate": 8.83816001097368e-06, | |
| "loss": 0.4169, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.6134969325153374, | |
| "grad_norm": 0.13416039943695068, | |
| "learning_rate": 8.8292283425321e-06, | |
| "loss": 0.4749, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.6196319018404908, | |
| "grad_norm": 0.12816861271858215, | |
| "learning_rate": 8.820267024216937e-06, | |
| "loss": 0.4464, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.6257668711656441, | |
| "grad_norm": 0.1400822103023529, | |
| "learning_rate": 8.811276125416048e-06, | |
| "loss": 0.464, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.6319018404907975, | |
| "grad_norm": 0.1271728277206421, | |
| "learning_rate": 8.802255715746333e-06, | |
| "loss": 0.4798, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.6380368098159508, | |
| "grad_norm": 0.11546501517295837, | |
| "learning_rate": 8.7932058650532e-06, | |
| "loss": 0.4716, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.6441717791411041, | |
| "grad_norm": 0.12347917258739471, | |
| "learning_rate": 8.784126643410015e-06, | |
| "loss": 0.446, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.6503067484662577, | |
| "grad_norm": 0.1298205405473709, | |
| "learning_rate": 8.775018121117569e-06, | |
| "loss": 0.4569, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.656441717791411, | |
| "grad_norm": 0.12920261919498444, | |
| "learning_rate": 8.765880368703527e-06, | |
| "loss": 0.4946, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.6625766871165644, | |
| "grad_norm": 0.11939296126365662, | |
| "learning_rate": 8.756713456921885e-06, | |
| "loss": 0.4703, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.668711656441718, | |
| "grad_norm": 0.12618795037269592, | |
| "learning_rate": 8.747517456752419e-06, | |
| "loss": 0.4665, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.6748466257668713, | |
| "grad_norm": 0.1314852237701416, | |
| "learning_rate": 8.73829243940014e-06, | |
| "loss": 0.4577, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.6809815950920246, | |
| "grad_norm": 0.14323115348815918, | |
| "learning_rate": 8.729038476294737e-06, | |
| "loss": 0.428, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.687116564417178, | |
| "grad_norm": 0.1325852870941162, | |
| "learning_rate": 8.719755639090032e-06, | |
| "loss": 0.443, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.6932515337423313, | |
| "grad_norm": 0.12698835134506226, | |
| "learning_rate": 8.710443999663417e-06, | |
| "loss": 0.5045, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.6993865030674846, | |
| "grad_norm": 0.12905320525169373, | |
| "learning_rate": 8.701103630115303e-06, | |
| "loss": 0.4664, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.705521472392638, | |
| "grad_norm": 0.12309697270393372, | |
| "learning_rate": 8.691734602768554e-06, | |
| "loss": 0.4227, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.7116564417177913, | |
| "grad_norm": 0.12686602771282196, | |
| "learning_rate": 8.68233699016794e-06, | |
| "loss": 0.4878, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.7177914110429446, | |
| "grad_norm": 0.13946817815303802, | |
| "learning_rate": 8.672910865079564e-06, | |
| "loss": 0.4487, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.7239263803680982, | |
| "grad_norm": 0.13267400860786438, | |
| "learning_rate": 8.663456300490302e-06, | |
| "loss": 0.4481, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.7300613496932515, | |
| "grad_norm": 0.12559852004051208, | |
| "learning_rate": 8.65397336960724e-06, | |
| "loss": 0.449, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.7361963190184049, | |
| "grad_norm": 0.1440388262271881, | |
| "learning_rate": 8.644462145857104e-06, | |
| "loss": 0.4466, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.7423312883435584, | |
| "grad_norm": 0.1397363543510437, | |
| "learning_rate": 8.634922702885693e-06, | |
| "loss": 0.4113, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.7484662576687118, | |
| "grad_norm": 0.13909780979156494, | |
| "learning_rate": 8.62535511455731e-06, | |
| "loss": 0.4504, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.7546012269938651, | |
| "grad_norm": 0.128562331199646, | |
| "learning_rate": 8.615759454954187e-06, | |
| "loss": 0.4593, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.7607361963190185, | |
| "grad_norm": 0.12467021495103836, | |
| "learning_rate": 8.60613579837591e-06, | |
| "loss": 0.4728, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.7668711656441718, | |
| "grad_norm": 0.14602142572402954, | |
| "learning_rate": 8.596484219338856e-06, | |
| "loss": 0.4732, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.7730061349693251, | |
| "grad_norm": 0.14744794368743896, | |
| "learning_rate": 8.586804792575596e-06, | |
| "loss": 0.4308, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.7791411042944785, | |
| "grad_norm": 0.12893109023571014, | |
| "learning_rate": 8.577097593034338e-06, | |
| "loss": 0.4589, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.7852760736196318, | |
| "grad_norm": 0.1237974613904953, | |
| "learning_rate": 8.567362695878325e-06, | |
| "loss": 0.477, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.7914110429447851, | |
| "grad_norm": 0.12805530428886414, | |
| "learning_rate": 8.55760017648527e-06, | |
| "loss": 0.4544, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.7975460122699385, | |
| "grad_norm": 0.1318630576133728, | |
| "learning_rate": 8.547810110446766e-06, | |
| "loss": 0.4428, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.803680981595092, | |
| "grad_norm": 0.13845506310462952, | |
| "learning_rate": 8.537992573567698e-06, | |
| "loss": 0.4704, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.8098159509202454, | |
| "grad_norm": 0.14310558140277863, | |
| "learning_rate": 8.528147641865661e-06, | |
| "loss": 0.4534, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.8159509202453987, | |
| "grad_norm": 0.14174222946166992, | |
| "learning_rate": 8.518275391570368e-06, | |
| "loss": 0.435, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.8220858895705523, | |
| "grad_norm": 0.14490178227424622, | |
| "learning_rate": 8.508375899123064e-06, | |
| "loss": 0.4255, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.8282208588957056, | |
| "grad_norm": 0.13574454188346863, | |
| "learning_rate": 8.498449241175927e-06, | |
| "loss": 0.4384, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.834355828220859, | |
| "grad_norm": 0.12258850038051605, | |
| "learning_rate": 8.488495494591482e-06, | |
| "loss": 0.4114, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.8404907975460123, | |
| "grad_norm": 0.13615848124027252, | |
| "learning_rate": 8.478514736441998e-06, | |
| "loss": 0.4566, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.8466257668711656, | |
| "grad_norm": 0.1377963125705719, | |
| "learning_rate": 8.468507044008902e-06, | |
| "loss": 0.4452, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.852760736196319, | |
| "grad_norm": 0.13099685311317444, | |
| "learning_rate": 8.458472494782169e-06, | |
| "loss": 0.432, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.8588957055214723, | |
| "grad_norm": 0.13411100208759308, | |
| "learning_rate": 8.44841116645973e-06, | |
| "loss": 0.4551, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.8650306748466257, | |
| "grad_norm": 0.13292278349399567, | |
| "learning_rate": 8.438323136946865e-06, | |
| "loss": 0.4757, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.871165644171779, | |
| "grad_norm": 0.12634415924549103, | |
| "learning_rate": 8.428208484355606e-06, | |
| "loss": 0.4427, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.8773006134969326, | |
| "grad_norm": 0.147501140832901, | |
| "learning_rate": 8.418067287004125e-06, | |
| "loss": 0.4714, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.883435582822086, | |
| "grad_norm": 0.14923636615276337, | |
| "learning_rate": 8.407899623416136e-06, | |
| "loss": 0.4752, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.8895705521472392, | |
| "grad_norm": 0.126709446310997, | |
| "learning_rate": 8.397705572320275e-06, | |
| "loss": 0.476, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.8957055214723928, | |
| "grad_norm": 0.12852588295936584, | |
| "learning_rate": 8.387485212649505e-06, | |
| "loss": 0.4335, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.9018404907975461, | |
| "grad_norm": 0.13321667909622192, | |
| "learning_rate": 8.377238623540491e-06, | |
| "loss": 0.4517, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.9079754601226995, | |
| "grad_norm": 0.14450277388095856, | |
| "learning_rate": 8.366965884333001e-06, | |
| "loss": 0.4354, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.9141104294478528, | |
| "grad_norm": 0.12820766866207123, | |
| "learning_rate": 8.356667074569274e-06, | |
| "loss": 0.4088, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.9202453987730062, | |
| "grad_norm": 0.13799835741519928, | |
| "learning_rate": 8.346342273993427e-06, | |
| "loss": 0.47, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.9263803680981595, | |
| "grad_norm": 0.1558215469121933, | |
| "learning_rate": 8.335991562550813e-06, | |
| "loss": 0.4682, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.9325153374233128, | |
| "grad_norm": 0.14499524235725403, | |
| "learning_rate": 8.325615020387422e-06, | |
| "loss": 0.4896, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.9386503067484662, | |
| "grad_norm": 0.1416790634393692, | |
| "learning_rate": 8.31521272784925e-06, | |
| "loss": 0.4531, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.9447852760736195, | |
| "grad_norm": 0.1342548131942749, | |
| "learning_rate": 8.304784765481676e-06, | |
| "loss": 0.4403, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.9509202453987728, | |
| "grad_norm": 0.1562686264514923, | |
| "learning_rate": 8.294331214028845e-06, | |
| "loss": 0.4244, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.9570552147239264, | |
| "grad_norm": 0.13778996467590332, | |
| "learning_rate": 8.283852154433042e-06, | |
| "loss": 0.484, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.9631901840490797, | |
| "grad_norm": 0.14441350102424622, | |
| "learning_rate": 8.273347667834057e-06, | |
| "loss": 0.4534, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.969325153374233, | |
| "grad_norm": 0.15606369078159332, | |
| "learning_rate": 8.262817835568563e-06, | |
| "loss": 0.4403, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.9754601226993866, | |
| "grad_norm": 0.12662853300571442, | |
| "learning_rate": 8.25226273916949e-06, | |
| "loss": 0.4673, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.98159509202454, | |
| "grad_norm": 0.13923388719558716, | |
| "learning_rate": 8.241682460365383e-06, | |
| "loss": 0.4613, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.9877300613496933, | |
| "grad_norm": 0.13272419571876526, | |
| "learning_rate": 8.231077081079781e-06, | |
| "loss": 0.4716, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.9938650306748467, | |
| "grad_norm": 0.146401509642601, | |
| "learning_rate": 8.220446683430577e-06, | |
| "loss": 0.4427, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.15548086166381836, | |
| "learning_rate": 8.209791349729376e-06, | |
| "loss": 0.4837, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 2.0061349693251533, | |
| "grad_norm": 0.14179112017154694, | |
| "learning_rate": 8.199111162480871e-06, | |
| "loss": 0.4326, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 2.0122699386503067, | |
| "grad_norm": 0.15060554444789886, | |
| "learning_rate": 8.188406204382192e-06, | |
| "loss": 0.4513, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 2.01840490797546, | |
| "grad_norm": 0.1416303813457489, | |
| "learning_rate": 8.177676558322274e-06, | |
| "loss": 0.4639, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 2.0245398773006134, | |
| "grad_norm": 0.15585218369960785, | |
| "learning_rate": 8.16692230738121e-06, | |
| "loss": 0.4315, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.0306748466257667, | |
| "grad_norm": 0.1324591487646103, | |
| "learning_rate": 8.15614353482961e-06, | |
| "loss": 0.4231, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 2.03680981595092, | |
| "grad_norm": 0.1416635811328888, | |
| "learning_rate": 8.145340324127958e-06, | |
| "loss": 0.4459, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 2.042944785276074, | |
| "grad_norm": 0.13037635385990143, | |
| "learning_rate": 8.134512758925958e-06, | |
| "loss": 0.4439, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 2.049079754601227, | |
| "grad_norm": 0.1375626176595688, | |
| "learning_rate": 8.123660923061902e-06, | |
| "loss": 0.4516, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 2.0552147239263805, | |
| "grad_norm": 0.1339251846075058, | |
| "learning_rate": 8.112784900561997e-06, | |
| "loss": 0.4357, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 2.061349693251534, | |
| "grad_norm": 0.14214186370372772, | |
| "learning_rate": 8.10188477563974e-06, | |
| "loss": 0.4511, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 2.067484662576687, | |
| "grad_norm": 0.1451665312051773, | |
| "learning_rate": 8.090960632695246e-06, | |
| "loss": 0.4122, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 2.0736196319018405, | |
| "grad_norm": 0.1382775753736496, | |
| "learning_rate": 8.080012556314611e-06, | |
| "loss": 0.4119, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 2.079754601226994, | |
| "grad_norm": 0.13937808573246002, | |
| "learning_rate": 8.069040631269239e-06, | |
| "loss": 0.4312, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 2.085889570552147, | |
| "grad_norm": 0.15605536103248596, | |
| "learning_rate": 8.058044942515204e-06, | |
| "loss": 0.4578, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.0920245398773005, | |
| "grad_norm": 0.13876931369304657, | |
| "learning_rate": 8.047025575192576e-06, | |
| "loss": 0.4444, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 2.098159509202454, | |
| "grad_norm": 0.1334417313337326, | |
| "learning_rate": 8.035982614624774e-06, | |
| "loss": 0.4576, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 2.104294478527607, | |
| "grad_norm": 0.14770197868347168, | |
| "learning_rate": 8.024916146317896e-06, | |
| "loss": 0.4584, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 2.1104294478527605, | |
| "grad_norm": 0.1419200599193573, | |
| "learning_rate": 8.013826255960069e-06, | |
| "loss": 0.4213, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 2.116564417177914, | |
| "grad_norm": 0.14547352492809296, | |
| "learning_rate": 8.00271302942077e-06, | |
| "loss": 0.4589, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 2.1226993865030677, | |
| "grad_norm": 0.15418393909931183, | |
| "learning_rate": 7.991576552750173e-06, | |
| "loss": 0.4393, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 2.128834355828221, | |
| "grad_norm": 0.1524142473936081, | |
| "learning_rate": 7.980416912178478e-06, | |
| "loss": 0.4594, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 2.1349693251533743, | |
| "grad_norm": 0.21241053938865662, | |
| "learning_rate": 7.969234194115245e-06, | |
| "loss": 0.4578, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 2.1411042944785277, | |
| "grad_norm": 0.15679003298282623, | |
| "learning_rate": 7.95802848514872e-06, | |
| "loss": 0.4427, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 2.147239263803681, | |
| "grad_norm": 0.14540907740592957, | |
| "learning_rate": 7.946799872045173e-06, | |
| "loss": 0.4388, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.1533742331288344, | |
| "grad_norm": 0.14929597079753876, | |
| "learning_rate": 7.935548441748221e-06, | |
| "loss": 0.4787, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 2.1595092024539877, | |
| "grad_norm": 0.1697789877653122, | |
| "learning_rate": 7.924274281378153e-06, | |
| "loss": 0.4433, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 2.165644171779141, | |
| "grad_norm": 0.15002626180648804, | |
| "learning_rate": 7.912977478231262e-06, | |
| "loss": 0.4689, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 2.1717791411042944, | |
| "grad_norm": 0.15629152953624725, | |
| "learning_rate": 7.90165811977916e-06, | |
| "loss": 0.445, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 2.1779141104294477, | |
| "grad_norm": 0.14952926337718964, | |
| "learning_rate": 7.890316293668108e-06, | |
| "loss": 0.452, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 2.184049079754601, | |
| "grad_norm": 0.18062762916088104, | |
| "learning_rate": 7.878952087718336e-06, | |
| "loss": 0.4675, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 2.190184049079755, | |
| "grad_norm": 0.13083772361278534, | |
| "learning_rate": 7.867565589923364e-06, | |
| "loss": 0.4529, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 2.196319018404908, | |
| "grad_norm": 0.3461189270019531, | |
| "learning_rate": 7.856156888449312e-06, | |
| "loss": 0.4596, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 2.2024539877300615, | |
| "grad_norm": 0.1565181016921997, | |
| "learning_rate": 7.844726071634228e-06, | |
| "loss": 0.4523, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 2.208588957055215, | |
| "grad_norm": 0.1486126184463501, | |
| "learning_rate": 7.8332732279874e-06, | |
| "loss": 0.4408, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.214723926380368, | |
| "grad_norm": 0.161222904920578, | |
| "learning_rate": 7.82179844618867e-06, | |
| "loss": 0.4455, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 2.2208588957055215, | |
| "grad_norm": 0.14370033144950867, | |
| "learning_rate": 7.810301815087753e-06, | |
| "loss": 0.4628, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 2.226993865030675, | |
| "grad_norm": 0.1402759701013565, | |
| "learning_rate": 7.798783423703535e-06, | |
| "loss": 0.4103, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 2.233128834355828, | |
| "grad_norm": 0.16180723905563354, | |
| "learning_rate": 7.787243361223397e-06, | |
| "loss": 0.4501, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 2.2392638036809815, | |
| "grad_norm": 0.14314652979373932, | |
| "learning_rate": 7.775681717002523e-06, | |
| "loss": 0.4504, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 2.245398773006135, | |
| "grad_norm": 0.14083176851272583, | |
| "learning_rate": 7.764098580563203e-06, | |
| "loss": 0.4505, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 2.2515337423312882, | |
| "grad_norm": 0.14769653975963593, | |
| "learning_rate": 7.75249404159414e-06, | |
| "loss": 0.4289, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 2.2576687116564416, | |
| "grad_norm": 0.17489399015903473, | |
| "learning_rate": 7.740868189949762e-06, | |
| "loss": 0.4586, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 2.263803680981595, | |
| "grad_norm": 0.15733091533184052, | |
| "learning_rate": 7.729221115649516e-06, | |
| "loss": 0.4499, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 2.2699386503067487, | |
| "grad_norm": 0.1377793848514557, | |
| "learning_rate": 7.717552908877185e-06, | |
| "loss": 0.441, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.276073619631902, | |
| "grad_norm": 0.13807876408100128, | |
| "learning_rate": 7.705863659980175e-06, | |
| "loss": 0.4457, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 2.2822085889570554, | |
| "grad_norm": 0.14367403090000153, | |
| "learning_rate": 7.694153459468822e-06, | |
| "loss": 0.4598, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 2.2883435582822087, | |
| "grad_norm": 0.1540713906288147, | |
| "learning_rate": 7.682422398015696e-06, | |
| "loss": 0.4124, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 2.294478527607362, | |
| "grad_norm": 0.15163351595401764, | |
| "learning_rate": 7.67067056645489e-06, | |
| "loss": 0.4608, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 2.3006134969325154, | |
| "grad_norm": 0.13935914635658264, | |
| "learning_rate": 7.658898055781326e-06, | |
| "loss": 0.4198, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 2.3067484662576687, | |
| "grad_norm": 0.15032264590263367, | |
| "learning_rate": 7.647104957150037e-06, | |
| "loss": 0.4414, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 2.312883435582822, | |
| "grad_norm": 0.16133847832679749, | |
| "learning_rate": 7.635291361875474e-06, | |
| "loss": 0.4228, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 2.3190184049079754, | |
| "grad_norm": 0.16057458519935608, | |
| "learning_rate": 7.623457361430798e-06, | |
| "loss": 0.4763, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 2.3251533742331287, | |
| "grad_norm": 0.1559583991765976, | |
| "learning_rate": 7.611603047447161e-06, | |
| "loss": 0.4191, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 2.331288343558282, | |
| "grad_norm": 0.16082173585891724, | |
| "learning_rate": 7.5997285117130095e-06, | |
| "loss": 0.4454, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.3374233128834354, | |
| "grad_norm": 0.14822718501091003, | |
| "learning_rate": 7.587833846173363e-06, | |
| "loss": 0.433, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 2.3435582822085887, | |
| "grad_norm": 0.16007304191589355, | |
| "learning_rate": 7.57591914292911e-06, | |
| "loss": 0.4177, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 2.3496932515337425, | |
| "grad_norm": 0.15621638298034668, | |
| "learning_rate": 7.5639844942362915e-06, | |
| "loss": 0.4429, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 2.355828220858896, | |
| "grad_norm": 0.1551215648651123, | |
| "learning_rate": 7.552029992505385e-06, | |
| "loss": 0.4545, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 2.361963190184049, | |
| "grad_norm": 0.15754824876785278, | |
| "learning_rate": 7.540055730300595e-06, | |
| "loss": 0.4292, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 2.3680981595092025, | |
| "grad_norm": 0.14232178032398224, | |
| "learning_rate": 7.528061800339127e-06, | |
| "loss": 0.4494, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 2.374233128834356, | |
| "grad_norm": 0.1486847698688507, | |
| "learning_rate": 7.516048295490479e-06, | |
| "loss": 0.4478, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 2.3803680981595092, | |
| "grad_norm": 0.1655314713716507, | |
| "learning_rate": 7.504015308775714e-06, | |
| "loss": 0.4309, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 2.3865030674846626, | |
| "grad_norm": 0.14602237939834595, | |
| "learning_rate": 7.491962933366748e-06, | |
| "loss": 0.4472, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 2.392638036809816, | |
| "grad_norm": 0.14621604979038239, | |
| "learning_rate": 7.479891262585623e-06, | |
| "loss": 0.4567, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.3987730061349692, | |
| "grad_norm": 0.1583264321088791, | |
| "learning_rate": 7.467800389903786e-06, | |
| "loss": 0.45, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 2.4049079754601226, | |
| "grad_norm": 0.17010322213172913, | |
| "learning_rate": 7.455690408941363e-06, | |
| "loss": 0.4426, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 2.411042944785276, | |
| "grad_norm": 0.16225053369998932, | |
| "learning_rate": 7.443561413466439e-06, | |
| "loss": 0.421, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 2.4171779141104293, | |
| "grad_norm": 0.20231634378433228, | |
| "learning_rate": 7.431413497394328e-06, | |
| "loss": 0.447, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 2.4233128834355826, | |
| "grad_norm": 0.1540592461824417, | |
| "learning_rate": 7.419246754786847e-06, | |
| "loss": 0.463, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 2.4294478527607364, | |
| "grad_norm": 0.15883290767669678, | |
| "learning_rate": 7.407061279851589e-06, | |
| "loss": 0.4636, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 2.4355828220858897, | |
| "grad_norm": 0.15072090923786163, | |
| "learning_rate": 7.394857166941187e-06, | |
| "loss": 0.4539, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 2.441717791411043, | |
| "grad_norm": 0.14263494312763214, | |
| "learning_rate": 7.382634510552596e-06, | |
| "loss": 0.44, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 2.4478527607361964, | |
| "grad_norm": 0.15457823872566223, | |
| "learning_rate": 7.370393405326351e-06, | |
| "loss": 0.4534, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 2.4539877300613497, | |
| "grad_norm": 0.1526721864938736, | |
| "learning_rate": 7.358133946045834e-06, | |
| "loss": 0.4551, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.460122699386503, | |
| "grad_norm": 0.14920799434185028, | |
| "learning_rate": 7.345856227636548e-06, | |
| "loss": 0.4593, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 2.4662576687116564, | |
| "grad_norm": 0.1798851191997528, | |
| "learning_rate": 7.333560345165371e-06, | |
| "loss": 0.4074, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 2.4723926380368098, | |
| "grad_norm": 0.1653115600347519, | |
| "learning_rate": 7.321246393839836e-06, | |
| "loss": 0.44, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 2.478527607361963, | |
| "grad_norm": 0.13865536451339722, | |
| "learning_rate": 7.308914469007372e-06, | |
| "loss": 0.4464, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 2.4846625766871164, | |
| "grad_norm": 0.16312679648399353, | |
| "learning_rate": 7.296564666154589e-06, | |
| "loss": 0.4617, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.4907975460122698, | |
| "grad_norm": 0.1640763282775879, | |
| "learning_rate": 7.284197080906517e-06, | |
| "loss": 0.4744, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 2.4969325153374236, | |
| "grad_norm": 0.17815761268138885, | |
| "learning_rate": 7.271811809025882e-06, | |
| "loss": 0.4404, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 2.5030674846625764, | |
| "grad_norm": 0.15390528738498688, | |
| "learning_rate": 7.259408946412359e-06, | |
| "loss": 0.4353, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 2.5092024539877302, | |
| "grad_norm": 0.15139776468276978, | |
| "learning_rate": 7.246988589101825e-06, | |
| "loss": 0.4763, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 2.5153374233128836, | |
| "grad_norm": 0.15306057035923004, | |
| "learning_rate": 7.234550833265621e-06, | |
| "loss": 0.4415, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.521472392638037, | |
| "grad_norm": 0.1511690318584442, | |
| "learning_rate": 7.222095775209805e-06, | |
| "loss": 0.4316, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 2.5276073619631902, | |
| "grad_norm": 0.15759505331516266, | |
| "learning_rate": 7.209623511374407e-06, | |
| "loss": 0.4321, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 2.5337423312883436, | |
| "grad_norm": 0.15876421332359314, | |
| "learning_rate": 7.197134138332684e-06, | |
| "loss": 0.4431, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 2.539877300613497, | |
| "grad_norm": 0.15190748870372772, | |
| "learning_rate": 7.184627752790368e-06, | |
| "loss": 0.4498, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 2.5460122699386503, | |
| "grad_norm": 0.1476791799068451, | |
| "learning_rate": 7.1721044515849165e-06, | |
| "loss": 0.4314, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.5521472392638036, | |
| "grad_norm": 0.1838875710964203, | |
| "learning_rate": 7.159564331684774e-06, | |
| "loss": 0.4257, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 2.558282208588957, | |
| "grad_norm": 0.15660858154296875, | |
| "learning_rate": 7.1470074901886065e-06, | |
| "loss": 0.4575, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 2.5644171779141103, | |
| "grad_norm": 0.15932102501392365, | |
| "learning_rate": 7.134434024324557e-06, | |
| "loss": 0.4338, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 2.5705521472392636, | |
| "grad_norm": 0.16551874577999115, | |
| "learning_rate": 7.121844031449491e-06, | |
| "loss": 0.4746, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 2.5766871165644174, | |
| "grad_norm": 0.16659101843833923, | |
| "learning_rate": 7.109237609048247e-06, | |
| "loss": 0.4983, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.5828220858895703, | |
| "grad_norm": 0.14108383655548096, | |
| "learning_rate": 7.096614854732873e-06, | |
| "loss": 0.4698, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 2.588957055214724, | |
| "grad_norm": 0.14742513000965118, | |
| "learning_rate": 7.083975866241881e-06, | |
| "loss": 0.4466, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 2.5950920245398774, | |
| "grad_norm": 0.15716223418712616, | |
| "learning_rate": 7.071320741439481e-06, | |
| "loss": 0.4671, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 2.6012269938650308, | |
| "grad_norm": 0.1822831928730011, | |
| "learning_rate": 7.058649578314828e-06, | |
| "loss": 0.4133, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 2.607361963190184, | |
| "grad_norm": 0.16716191172599792, | |
| "learning_rate": 7.045962474981261e-06, | |
| "loss": 0.4084, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.6134969325153374, | |
| "grad_norm": 0.1516677588224411, | |
| "learning_rate": 7.03325952967555e-06, | |
| "loss": 0.4342, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 2.6196319018404908, | |
| "grad_norm": 0.15387603640556335, | |
| "learning_rate": 7.020540840757124e-06, | |
| "loss": 0.4211, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 2.625766871165644, | |
| "grad_norm": 0.1543998122215271, | |
| "learning_rate": 7.007806506707319e-06, | |
| "loss": 0.4273, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 2.6319018404907975, | |
| "grad_norm": 0.15574420988559723, | |
| "learning_rate": 6.995056626128609e-06, | |
| "loss": 0.4458, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 2.638036809815951, | |
| "grad_norm": 0.1659959852695465, | |
| "learning_rate": 6.982291297743848e-06, | |
| "loss": 0.4592, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.644171779141104, | |
| "grad_norm": 0.18856996297836304, | |
| "learning_rate": 6.969510620395503e-06, | |
| "loss": 0.4552, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 2.6503067484662575, | |
| "grad_norm": 0.14291857182979584, | |
| "learning_rate": 6.956714693044888e-06, | |
| "loss": 0.4204, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 2.6564417177914113, | |
| "grad_norm": 0.17281392216682434, | |
| "learning_rate": 6.943903614771397e-06, | |
| "loss": 0.4571, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 2.662576687116564, | |
| "grad_norm": 0.16019487380981445, | |
| "learning_rate": 6.931077484771739e-06, | |
| "loss": 0.4563, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 2.668711656441718, | |
| "grad_norm": 0.16260547935962677, | |
| "learning_rate": 6.9182364023591706e-06, | |
| "loss": 0.425, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.6748466257668713, | |
| "grad_norm": 0.1593744456768036, | |
| "learning_rate": 6.905380466962726e-06, | |
| "loss": 0.4211, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 2.6809815950920246, | |
| "grad_norm": 0.16132207214832306, | |
| "learning_rate": 6.892509778126442e-06, | |
| "loss": 0.4377, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 2.687116564417178, | |
| "grad_norm": 0.15323421359062195, | |
| "learning_rate": 6.879624435508596e-06, | |
| "loss": 0.4284, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 2.6932515337423313, | |
| "grad_norm": 0.1697331666946411, | |
| "learning_rate": 6.866724538880931e-06, | |
| "loss": 0.4341, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 2.6993865030674846, | |
| "grad_norm": 0.1691737174987793, | |
| "learning_rate": 6.85381018812788e-06, | |
| "loss": 0.4907, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.705521472392638, | |
| "grad_norm": 0.1564476490020752, | |
| "learning_rate": 6.840881483245797e-06, | |
| "loss": 0.4133, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 2.7116564417177913, | |
| "grad_norm": 0.15074561536312103, | |
| "learning_rate": 6.827938524342175e-06, | |
| "loss": 0.4424, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 2.7177914110429446, | |
| "grad_norm": 0.15723863244056702, | |
| "learning_rate": 6.814981411634885e-06, | |
| "loss": 0.4469, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 2.7239263803680984, | |
| "grad_norm": 0.15343016386032104, | |
| "learning_rate": 6.802010245451382e-06, | |
| "loss": 0.4495, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 2.7300613496932513, | |
| "grad_norm": 0.14901430904865265, | |
| "learning_rate": 6.789025126227948e-06, | |
| "loss": 0.4332, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.736196319018405, | |
| "grad_norm": 0.18850690126419067, | |
| "learning_rate": 6.7760261545088955e-06, | |
| "loss": 0.4741, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 2.7423312883435584, | |
| "grad_norm": 0.18473871052265167, | |
| "learning_rate": 6.763013430945803e-06, | |
| "loss": 0.4579, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 2.7484662576687118, | |
| "grad_norm": 0.16771845519542694, | |
| "learning_rate": 6.749987056296728e-06, | |
| "loss": 0.4281, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 2.754601226993865, | |
| "grad_norm": 0.15645870566368103, | |
| "learning_rate": 6.736947131425423e-06, | |
| "loss": 0.4399, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 2.7607361963190185, | |
| "grad_norm": 0.15851353108882904, | |
| "learning_rate": 6.723893757300572e-06, | |
| "loss": 0.4245, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.766871165644172, | |
| "grad_norm": 0.15412819385528564, | |
| "learning_rate": 6.710827034994991e-06, | |
| "loss": 0.457, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 2.773006134969325, | |
| "grad_norm": 0.21733322739601135, | |
| "learning_rate": 6.697747065684851e-06, | |
| "loss": 0.4235, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 2.7791411042944785, | |
| "grad_norm": 0.1558465212583542, | |
| "learning_rate": 6.684653950648893e-06, | |
| "loss": 0.3966, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 2.785276073619632, | |
| "grad_norm": 0.1692306250333786, | |
| "learning_rate": 6.671547791267652e-06, | |
| "loss": 0.469, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 2.791411042944785, | |
| "grad_norm": 0.1569252610206604, | |
| "learning_rate": 6.658428689022661e-06, | |
| "loss": 0.4432, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.7975460122699385, | |
| "grad_norm": 0.17490547895431519, | |
| "learning_rate": 6.6452967454956744e-06, | |
| "loss": 0.454, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 2.8036809815950923, | |
| "grad_norm": 0.16125169396400452, | |
| "learning_rate": 6.632152062367871e-06, | |
| "loss": 0.4699, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 2.809815950920245, | |
| "grad_norm": 0.16368430852890015, | |
| "learning_rate": 6.618994741419078e-06, | |
| "loss": 0.4194, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 2.815950920245399, | |
| "grad_norm": 0.1626632660627365, | |
| "learning_rate": 6.605824884526978e-06, | |
| "loss": 0.4289, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 2.8220858895705523, | |
| "grad_norm": 0.17192265391349792, | |
| "learning_rate": 6.592642593666316e-06, | |
| "loss": 0.4629, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.8282208588957056, | |
| "grad_norm": 0.15331393480300903, | |
| "learning_rate": 6.579447970908115e-06, | |
| "loss": 0.4261, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 2.834355828220859, | |
| "grad_norm": 0.16751109063625336, | |
| "learning_rate": 6.566241118418888e-06, | |
| "loss": 0.4398, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 2.8404907975460123, | |
| "grad_norm": 0.20704393088817596, | |
| "learning_rate": 6.553022138459839e-06, | |
| "loss": 0.4196, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 2.8466257668711656, | |
| "grad_norm": 0.17874981462955475, | |
| "learning_rate": 6.539791133386077e-06, | |
| "loss": 0.4542, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 2.852760736196319, | |
| "grad_norm": 0.1628667265176773, | |
| "learning_rate": 6.526548205645823e-06, | |
| "loss": 0.4278, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.8588957055214723, | |
| "grad_norm": 0.18325841426849365, | |
| "learning_rate": 6.513293457779614e-06, | |
| "loss": 0.4448, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 2.8650306748466257, | |
| "grad_norm": 0.17128658294677734, | |
| "learning_rate": 6.50002699241951e-06, | |
| "loss": 0.4778, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 2.871165644171779, | |
| "grad_norm": 0.18761861324310303, | |
| "learning_rate": 6.486748912288305e-06, | |
| "loss": 0.4313, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 2.8773006134969323, | |
| "grad_norm": 0.16829295456409454, | |
| "learning_rate": 6.4734593201987205e-06, | |
| "loss": 0.4589, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 2.883435582822086, | |
| "grad_norm": 0.16941910982131958, | |
| "learning_rate": 6.46015831905262e-06, | |
| "loss": 0.4844, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.889570552147239, | |
| "grad_norm": 0.18299958109855652, | |
| "learning_rate": 6.446846011840204e-06, | |
| "loss": 0.4204, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 2.895705521472393, | |
| "grad_norm": 0.1755492389202118, | |
| "learning_rate": 6.43352250163922e-06, | |
| "loss": 0.4265, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 2.901840490797546, | |
| "grad_norm": 0.17342810332775116, | |
| "learning_rate": 6.420187891614158e-06, | |
| "loss": 0.4314, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 2.9079754601226995, | |
| "grad_norm": 0.15891548991203308, | |
| "learning_rate": 6.406842285015455e-06, | |
| "loss": 0.4188, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 2.914110429447853, | |
| "grad_norm": 0.17201867699623108, | |
| "learning_rate": 6.393485785178699e-06, | |
| "loss": 0.4459, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.920245398773006, | |
| "grad_norm": 0.16666850447654724, | |
| "learning_rate": 6.380118495523816e-06, | |
| "loss": 0.437, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 2.9263803680981595, | |
| "grad_norm": 0.17189852893352509, | |
| "learning_rate": 6.366740519554286e-06, | |
| "loss": 0.4263, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 2.932515337423313, | |
| "grad_norm": 0.16016732156276703, | |
| "learning_rate": 6.353351960856332e-06, | |
| "loss": 0.4358, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 2.938650306748466, | |
| "grad_norm": 0.15281671285629272, | |
| "learning_rate": 6.339952923098117e-06, | |
| "loss": 0.4314, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 2.9447852760736195, | |
| "grad_norm": 0.19255416095256805, | |
| "learning_rate": 6.326543510028943e-06, | |
| "loss": 0.42, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.950920245398773, | |
| "grad_norm": 0.1635408252477646, | |
| "learning_rate": 6.3131238254784534e-06, | |
| "loss": 0.4247, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 2.957055214723926, | |
| "grad_norm": 0.16226807236671448, | |
| "learning_rate": 6.299693973355821e-06, | |
| "loss": 0.4348, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 2.96319018404908, | |
| "grad_norm": 0.17341448366641998, | |
| "learning_rate": 6.286254057648945e-06, | |
| "loss": 0.4174, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 2.969325153374233, | |
| "grad_norm": 0.16853678226470947, | |
| "learning_rate": 6.27280418242365e-06, | |
| "loss": 0.4529, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 2.9754601226993866, | |
| "grad_norm": 0.16225695610046387, | |
| "learning_rate": 6.259344451822877e-06, | |
| "loss": 0.4685, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 2.98159509202454, | |
| "grad_norm": 0.16942046582698822, | |
| "learning_rate": 6.245874970065877e-06, | |
| "loss": 0.4335, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 2.9877300613496933, | |
| "grad_norm": 0.1746375411748886, | |
| "learning_rate": 6.2323958414474065e-06, | |
| "loss": 0.4304, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 2.9938650306748467, | |
| "grad_norm": 0.19197514653205872, | |
| "learning_rate": 6.218907170336912e-06, | |
| "loss": 0.4651, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.16628479957580566, | |
| "learning_rate": 6.2054090611777385e-06, | |
| "loss": 0.4484, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 3.0061349693251533, | |
| "grad_norm": 0.1807141751050949, | |
| "learning_rate": 6.191901618486299e-06, | |
| "loss": 0.4403, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 3.0122699386503067, | |
| "grad_norm": 0.18243259191513062, | |
| "learning_rate": 6.178384946851284e-06, | |
| "loss": 0.4427, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 3.01840490797546, | |
| "grad_norm": 0.17769359052181244, | |
| "learning_rate": 6.164859150932839e-06, | |
| "loss": 0.4177, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 3.0245398773006134, | |
| "grad_norm": 0.17457637190818787, | |
| "learning_rate": 6.151324335461766e-06, | |
| "loss": 0.4623, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 3.0306748466257667, | |
| "grad_norm": 0.1725994348526001, | |
| "learning_rate": 6.137780605238698e-06, | |
| "loss": 0.4638, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 3.03680981595092, | |
| "grad_norm": 0.17166265845298767, | |
| "learning_rate": 6.1242280651332995e-06, | |
| "loss": 0.446, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 3.042944785276074, | |
| "grad_norm": 0.17501436173915863, | |
| "learning_rate": 6.11066682008345e-06, | |
| "loss": 0.4405, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 3.049079754601227, | |
| "grad_norm": 0.1676449030637741, | |
| "learning_rate": 6.097096975094432e-06, | |
| "loss": 0.4718, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 3.0552147239263805, | |
| "grad_norm": 0.1620589643716812, | |
| "learning_rate": 6.083518635238117e-06, | |
| "loss": 0.4353, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 3.061349693251534, | |
| "grad_norm": 0.17251572012901306, | |
| "learning_rate": 6.069931905652151e-06, | |
| "loss": 0.4409, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 3.067484662576687, | |
| "grad_norm": 0.1733006089925766, | |
| "learning_rate": 6.056336891539144e-06, | |
| "loss": 0.4323, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.0736196319018405, | |
| "grad_norm": 0.1714855581521988, | |
| "learning_rate": 6.042733698165855e-06, | |
| "loss": 0.4297, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 3.079754601226994, | |
| "grad_norm": 0.15556511282920837, | |
| "learning_rate": 6.029122430862373e-06, | |
| "loss": 0.4546, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 3.085889570552147, | |
| "grad_norm": 0.18687503039836884, | |
| "learning_rate": 6.015503195021303e-06, | |
| "loss": 0.4586, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 3.0920245398773005, | |
| "grad_norm": 0.1955818086862564, | |
| "learning_rate": 6.001876096096951e-06, | |
| "loss": 0.4405, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 3.098159509202454, | |
| "grad_norm": 0.18032221496105194, | |
| "learning_rate": 5.988241239604511e-06, | |
| "loss": 0.4621, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 3.104294478527607, | |
| "grad_norm": 0.1692107915878296, | |
| "learning_rate": 5.97459873111924e-06, | |
| "loss": 0.4557, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 3.1104294478527605, | |
| "grad_norm": 0.18335744738578796, | |
| "learning_rate": 5.9609486762756465e-06, | |
| "loss": 0.43, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 3.116564417177914, | |
| "grad_norm": 0.16377264261245728, | |
| "learning_rate": 5.947291180766668e-06, | |
| "loss": 0.4473, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 3.1226993865030677, | |
| "grad_norm": 0.1665698140859604, | |
| "learning_rate": 5.933626350342858e-06, | |
| "loss": 0.4262, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 3.128834355828221, | |
| "grad_norm": 0.1706293821334839, | |
| "learning_rate": 5.9199542908115694e-06, | |
| "loss": 0.4118, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 3.1349693251533743, | |
| "grad_norm": 0.17646320164203644, | |
| "learning_rate": 5.906275108036119e-06, | |
| "loss": 0.4503, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 3.1411042944785277, | |
| "grad_norm": 0.18442606925964355, | |
| "learning_rate": 5.892588907934988e-06, | |
| "loss": 0.448, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 3.147239263803681, | |
| "grad_norm": 0.16845177114009857, | |
| "learning_rate": 5.87889579648099e-06, | |
| "loss": 0.4533, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 3.1533742331288344, | |
| "grad_norm": 0.17161841690540314, | |
| "learning_rate": 5.865195879700454e-06, | |
| "loss": 0.4358, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 3.1595092024539877, | |
| "grad_norm": 0.1597752720117569, | |
| "learning_rate": 5.8514892636724005e-06, | |
| "loss": 0.4444, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 3.165644171779141, | |
| "grad_norm": 0.1626145988702774, | |
| "learning_rate": 5.83777605452773e-06, | |
| "loss": 0.4317, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 3.1717791411042944, | |
| "grad_norm": 0.17902852594852448, | |
| "learning_rate": 5.8240563584483855e-06, | |
| "loss": 0.4604, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 3.1779141104294477, | |
| "grad_norm": 0.1681181639432907, | |
| "learning_rate": 5.810330281666542e-06, | |
| "loss": 0.4577, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 3.184049079754601, | |
| "grad_norm": 0.18032066524028778, | |
| "learning_rate": 5.796597930463776e-06, | |
| "loss": 0.4214, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 3.190184049079755, | |
| "grad_norm": 0.18553897738456726, | |
| "learning_rate": 5.782859411170261e-06, | |
| "loss": 0.3931, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 3.196319018404908, | |
| "grad_norm": 0.16626308858394623, | |
| "learning_rate": 5.769114830163913e-06, | |
| "loss": 0.4307, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 3.2024539877300615, | |
| "grad_norm": 0.17122527956962585, | |
| "learning_rate": 5.7553642938695945e-06, | |
| "loss": 0.4582, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 3.208588957055215, | |
| "grad_norm": 0.1699482649564743, | |
| "learning_rate": 5.741607908758275e-06, | |
| "loss": 0.4575, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 3.214723926380368, | |
| "grad_norm": 0.185217946767807, | |
| "learning_rate": 5.727845781346217e-06, | |
| "loss": 0.4063, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 3.2208588957055215, | |
| "grad_norm": 0.17612098157405853, | |
| "learning_rate": 5.714078018194141e-06, | |
| "loss": 0.4555, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 3.226993865030675, | |
| "grad_norm": 0.17292365431785583, | |
| "learning_rate": 5.7003047259064095e-06, | |
| "loss": 0.4402, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 3.233128834355828, | |
| "grad_norm": 0.18183457851409912, | |
| "learning_rate": 5.68652601113019e-06, | |
| "loss": 0.4694, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 3.2392638036809815, | |
| "grad_norm": 0.1628892868757248, | |
| "learning_rate": 5.672741980554646e-06, | |
| "loss": 0.4472, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 3.245398773006135, | |
| "grad_norm": 0.17642362415790558, | |
| "learning_rate": 5.658952740910094e-06, | |
| "loss": 0.4239, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 3.2515337423312882, | |
| "grad_norm": 0.1806386113166809, | |
| "learning_rate": 5.645158398967191e-06, | |
| "loss": 0.4282, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 3.2576687116564416, | |
| "grad_norm": 0.1763850599527359, | |
| "learning_rate": 5.6313590615360935e-06, | |
| "loss": 0.4193, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 3.263803680981595, | |
| "grad_norm": 0.16797596216201782, | |
| "learning_rate": 5.617554835465646e-06, | |
| "loss": 0.4358, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 3.2699386503067487, | |
| "grad_norm": 0.16996163129806519, | |
| "learning_rate": 5.6037458276425394e-06, | |
| "loss": 0.4048, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 3.276073619631902, | |
| "grad_norm": 0.16143372654914856, | |
| "learning_rate": 5.589932144990495e-06, | |
| "loss": 0.4421, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 3.2822085889570554, | |
| "grad_norm": 0.16983363032341003, | |
| "learning_rate": 5.5761138944694295e-06, | |
| "loss": 0.4253, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 3.2883435582822087, | |
| "grad_norm": 0.17256614565849304, | |
| "learning_rate": 5.562291183074627e-06, | |
| "loss": 0.4216, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 3.294478527607362, | |
| "grad_norm": 0.18795952200889587, | |
| "learning_rate": 5.548464117835917e-06, | |
| "loss": 0.4246, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 3.3006134969325154, | |
| "grad_norm": 0.18537187576293945, | |
| "learning_rate": 5.534632805816835e-06, | |
| "loss": 0.4124, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 3.3067484662576687, | |
| "grad_norm": 0.19903261959552765, | |
| "learning_rate": 5.520797354113804e-06, | |
| "loss": 0.46, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 3.312883435582822, | |
| "grad_norm": 0.1933709681034088, | |
| "learning_rate": 5.5069578698553e-06, | |
| "loss": 0.431, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 3.3190184049079754, | |
| "grad_norm": 0.17167270183563232, | |
| "learning_rate": 5.4931144602010224e-06, | |
| "loss": 0.4773, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 3.3251533742331287, | |
| "grad_norm": 0.16057482361793518, | |
| "learning_rate": 5.479267232341064e-06, | |
| "loss": 0.4089, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 3.331288343558282, | |
| "grad_norm": 0.17433694005012512, | |
| "learning_rate": 5.465416293495083e-06, | |
| "loss": 0.4502, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 3.3374233128834354, | |
| "grad_norm": 0.1779199242591858, | |
| "learning_rate": 5.451561750911475e-06, | |
| "loss": 0.4048, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 3.3435582822085887, | |
| "grad_norm": 0.17335927486419678, | |
| "learning_rate": 5.437703711866534e-06, | |
| "loss": 0.4341, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 3.3496932515337425, | |
| "grad_norm": 0.17468775808811188, | |
| "learning_rate": 5.4238422836636315e-06, | |
| "loss": 0.4448, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 3.355828220858896, | |
| "grad_norm": 0.18919560313224792, | |
| "learning_rate": 5.40997757363238e-06, | |
| "loss": 0.4489, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 3.361963190184049, | |
| "grad_norm": 0.17396590113639832, | |
| "learning_rate": 5.3961096891278035e-06, | |
| "loss": 0.4325, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 3.3680981595092025, | |
| "grad_norm": 0.16672684252262115, | |
| "learning_rate": 5.382238737529505e-06, | |
| "loss": 0.4077, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 3.374233128834356, | |
| "grad_norm": 0.1844366043806076, | |
| "learning_rate": 5.368364826240836e-06, | |
| "loss": 0.4196, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 3.3803680981595092, | |
| "grad_norm": 0.18350937962532043, | |
| "learning_rate": 5.354488062688068e-06, | |
| "loss": 0.4523, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 3.3865030674846626, | |
| "grad_norm": 0.26154661178588867, | |
| "learning_rate": 5.3406085543195555e-06, | |
| "loss": 0.4761, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 3.392638036809816, | |
| "grad_norm": 0.1603960394859314, | |
| "learning_rate": 5.3267264086049054e-06, | |
| "loss": 0.4433, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 3.3987730061349692, | |
| "grad_norm": 0.19381873309612274, | |
| "learning_rate": 5.312841733034147e-06, | |
| "loss": 0.4031, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 3.4049079754601226, | |
| "grad_norm": 0.21427270770072937, | |
| "learning_rate": 5.2989546351168985e-06, | |
| "loss": 0.4391, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 3.411042944785276, | |
| "grad_norm": 0.1873149424791336, | |
| "learning_rate": 5.285065222381533e-06, | |
| "loss": 0.4409, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 3.4171779141104293, | |
| "grad_norm": 0.1799328476190567, | |
| "learning_rate": 5.27117360237435e-06, | |
| "loss": 0.4381, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 3.4233128834355826, | |
| "grad_norm": 0.1705305278301239, | |
| "learning_rate": 5.257279882658737e-06, | |
| "loss": 0.3905, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 3.4294478527607364, | |
| "grad_norm": 0.1969294250011444, | |
| "learning_rate": 5.2433841708143405e-06, | |
| "loss": 0.4264, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 3.4355828220858897, | |
| "grad_norm": 0.17147500813007355, | |
| "learning_rate": 5.229486574436236e-06, | |
| "loss": 0.4592, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 3.441717791411043, | |
| "grad_norm": 0.1796891838312149, | |
| "learning_rate": 5.215587201134081e-06, | |
| "loss": 0.4223, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 3.4478527607361964, | |
| "grad_norm": 0.17482995986938477, | |
| "learning_rate": 5.201686158531304e-06, | |
| "loss": 0.4143, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 3.4539877300613497, | |
| "grad_norm": 0.18087799847126007, | |
| "learning_rate": 5.187783554264253e-06, | |
| "loss": 0.4611, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 3.460122699386503, | |
| "grad_norm": 0.1888713240623474, | |
| "learning_rate": 5.173879495981367e-06, | |
| "loss": 0.4569, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 3.4662576687116564, | |
| "grad_norm": 0.20657221972942352, | |
| "learning_rate": 5.1599740913423435e-06, | |
| "loss": 0.4762, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 3.4723926380368098, | |
| "grad_norm": 0.18362171947956085, | |
| "learning_rate": 5.146067448017308e-06, | |
| "loss": 0.4268, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 3.478527607361963, | |
| "grad_norm": 0.18593919277191162, | |
| "learning_rate": 5.132159673685976e-06, | |
| "loss": 0.4506, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 3.4846625766871164, | |
| "grad_norm": 0.1854875087738037, | |
| "learning_rate": 5.1182508760368195e-06, | |
| "loss": 0.4209, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 3.4907975460122698, | |
| "grad_norm": 0.18463847041130066, | |
| "learning_rate": 5.104341162766234e-06, | |
| "loss": 0.4349, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 3.4969325153374236, | |
| "grad_norm": 0.18153201043605804, | |
| "learning_rate": 5.090430641577705e-06, | |
| "loss": 0.459, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 3.5030674846625764, | |
| "grad_norm": 0.20472465455532074, | |
| "learning_rate": 5.0765194201809755e-06, | |
| "loss": 0.4294, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 3.5092024539877302, | |
| "grad_norm": 0.16046775877475739, | |
| "learning_rate": 5.062607606291208e-06, | |
| "loss": 0.4442, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 3.5153374233128836, | |
| "grad_norm": 0.17173658311367035, | |
| "learning_rate": 5.048695307628152e-06, | |
| "loss": 0.4155, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 3.521472392638037, | |
| "grad_norm": 0.18764469027519226, | |
| "learning_rate": 5.034782631915314e-06, | |
| "loss": 0.4081, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 3.5276073619631902, | |
| "grad_norm": 0.18143120408058167, | |
| "learning_rate": 5.020869686879115e-06, | |
| "loss": 0.4273, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 3.5337423312883436, | |
| "grad_norm": 0.16679701209068298, | |
| "learning_rate": 5.006956580248069e-06, | |
| "loss": 0.445, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 3.539877300613497, | |
| "grad_norm": 0.1785566657781601, | |
| "learning_rate": 4.993043419751933e-06, | |
| "loss": 0.4325, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 3.5460122699386503, | |
| "grad_norm": 0.18561996519565582, | |
| "learning_rate": 4.979130313120885e-06, | |
| "loss": 0.4432, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 3.5521472392638036, | |
| "grad_norm": 0.17507800459861755, | |
| "learning_rate": 4.965217368084688e-06, | |
| "loss": 0.4358, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 3.558282208588957, | |
| "grad_norm": 0.17406027019023895, | |
| "learning_rate": 4.95130469237185e-06, | |
| "loss": 0.4551, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 3.5644171779141103, | |
| "grad_norm": 0.18630944192409515, | |
| "learning_rate": 4.937392393708794e-06, | |
| "loss": 0.4398, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 3.5705521472392636, | |
| "grad_norm": 0.15710729360580444, | |
| "learning_rate": 4.923480579819025e-06, | |
| "loss": 0.428, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 3.5766871165644174, | |
| "grad_norm": 0.17362196743488312, | |
| "learning_rate": 4.909569358422296e-06, | |
| "loss": 0.4388, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 3.5828220858895703, | |
| "grad_norm": 0.1887211948633194, | |
| "learning_rate": 4.895658837233767e-06, | |
| "loss": 0.4241, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 3.588957055214724, | |
| "grad_norm": 0.18522702157497406, | |
| "learning_rate": 4.881749123963183e-06, | |
| "loss": 0.443, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 3.5950920245398774, | |
| "grad_norm": 0.17625677585601807, | |
| "learning_rate": 4.867840326314024e-06, | |
| "loss": 0.4345, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 3.6012269938650308, | |
| "grad_norm": 0.17641369998455048, | |
| "learning_rate": 4.853932551982692e-06, | |
| "loss": 0.4018, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 3.607361963190184, | |
| "grad_norm": 0.18613062798976898, | |
| "learning_rate": 4.840025908657658e-06, | |
| "loss": 0.4787, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 3.6134969325153374, | |
| "grad_norm": 0.17531706392765045, | |
| "learning_rate": 4.826120504018635e-06, | |
| "loss": 0.4197, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 3.6196319018404908, | |
| "grad_norm": 0.18549470603466034, | |
| "learning_rate": 4.812216445735749e-06, | |
| "loss": 0.4228, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 3.625766871165644, | |
| "grad_norm": 0.17408756911754608, | |
| "learning_rate": 4.798313841468697e-06, | |
| "loss": 0.4054, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 3.6319018404907975, | |
| "grad_norm": 0.19779986143112183, | |
| "learning_rate": 4.7844127988659204e-06, | |
| "loss": 0.4277, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 3.638036809815951, | |
| "grad_norm": 0.16971039772033691, | |
| "learning_rate": 4.7705134255637676e-06, | |
| "loss": 0.4098, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 3.644171779141104, | |
| "grad_norm": 0.18103785812854767, | |
| "learning_rate": 4.756615829185661e-06, | |
| "loss": 0.4254, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 3.6503067484662575, | |
| "grad_norm": 0.18030977249145508, | |
| "learning_rate": 4.742720117341265e-06, | |
| "loss": 0.4203, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 3.6564417177914113, | |
| "grad_norm": 0.17600463330745697, | |
| "learning_rate": 4.728826397625651e-06, | |
| "loss": 0.429, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 3.662576687116564, | |
| "grad_norm": 0.18706922233104706, | |
| "learning_rate": 4.714934777618468e-06, | |
| "loss": 0.4562, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 3.668711656441718, | |
| "grad_norm": 0.18239618837833405, | |
| "learning_rate": 4.701045364883103e-06, | |
| "loss": 0.4327, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 3.6748466257668713, | |
| "grad_norm": 0.20267638564109802, | |
| "learning_rate": 4.6871582669658545e-06, | |
| "loss": 0.4167, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 3.6809815950920246, | |
| "grad_norm": 0.1839340478181839, | |
| "learning_rate": 4.673273591395095e-06, | |
| "loss": 0.4224, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.687116564417178, | |
| "grad_norm": 0.18033233284950256, | |
| "learning_rate": 4.659391445680446e-06, | |
| "loss": 0.4043, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 3.6932515337423313, | |
| "grad_norm": 0.20073480904102325, | |
| "learning_rate": 4.645511937311934e-06, | |
| "loss": 0.4511, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 3.6993865030674846, | |
| "grad_norm": 0.20318840444087982, | |
| "learning_rate": 4.631635173759165e-06, | |
| "loss": 0.4279, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 3.705521472392638, | |
| "grad_norm": 0.17758074402809143, | |
| "learning_rate": 4.6177612624704975e-06, | |
| "loss": 0.4735, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 3.7116564417177913, | |
| "grad_norm": 0.17162683606147766, | |
| "learning_rate": 4.603890310872197e-06, | |
| "loss": 0.4514, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 3.7177914110429446, | |
| "grad_norm": 0.19262997806072235, | |
| "learning_rate": 4.590022426367621e-06, | |
| "loss": 0.4313, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 3.7239263803680984, | |
| "grad_norm": 0.19909274578094482, | |
| "learning_rate": 4.576157716336369e-06, | |
| "loss": 0.4347, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 3.7300613496932513, | |
| "grad_norm": 0.17881116271018982, | |
| "learning_rate": 4.5622962881334666e-06, | |
| "loss": 0.4581, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 3.736196319018405, | |
| "grad_norm": 0.18698081374168396, | |
| "learning_rate": 4.5484382490885265e-06, | |
| "loss": 0.4273, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 3.7423312883435584, | |
| "grad_norm": 0.17165279388427734, | |
| "learning_rate": 4.534583706504919e-06, | |
| "loss": 0.4009, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 3.7484662576687118, | |
| "grad_norm": 0.17795486748218536, | |
| "learning_rate": 4.520732767658938e-06, | |
| "loss": 0.4287, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 3.754601226993865, | |
| "grad_norm": 0.18492695689201355, | |
| "learning_rate": 4.50688553979898e-06, | |
| "loss": 0.4405, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 3.7607361963190185, | |
| "grad_norm": 0.20490054786205292, | |
| "learning_rate": 4.493042130144702e-06, | |
| "loss": 0.4344, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 3.766871165644172, | |
| "grad_norm": 0.1859641969203949, | |
| "learning_rate": 4.479202645886196e-06, | |
| "loss": 0.4283, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 3.773006134969325, | |
| "grad_norm": 0.20046532154083252, | |
| "learning_rate": 4.4653671941831665e-06, | |
| "loss": 0.444, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 3.7791411042944785, | |
| "grad_norm": 0.1931602507829666, | |
| "learning_rate": 4.451535882164084e-06, | |
| "loss": 0.4278, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 3.785276073619632, | |
| "grad_norm": 0.19650715589523315, | |
| "learning_rate": 4.437708816925374e-06, | |
| "loss": 0.4441, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 3.791411042944785, | |
| "grad_norm": 0.18395523726940155, | |
| "learning_rate": 4.423886105530573e-06, | |
| "loss": 0.4286, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 3.7975460122699385, | |
| "grad_norm": 0.19610300660133362, | |
| "learning_rate": 4.410067855009506e-06, | |
| "loss": 0.4051, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 3.8036809815950923, | |
| "grad_norm": 0.18188579380512238, | |
| "learning_rate": 4.396254172357462e-06, | |
| "loss": 0.4446, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 3.809815950920245, | |
| "grad_norm": 0.19300203025341034, | |
| "learning_rate": 4.382445164534357e-06, | |
| "loss": 0.4438, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 3.815950920245399, | |
| "grad_norm": 0.1908637285232544, | |
| "learning_rate": 4.368640938463909e-06, | |
| "loss": 0.4448, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 3.8220858895705523, | |
| "grad_norm": 0.17132696509361267, | |
| "learning_rate": 4.354841601032811e-06, | |
| "loss": 0.4108, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 3.8282208588957056, | |
| "grad_norm": 0.16772066056728363, | |
| "learning_rate": 4.341047259089906e-06, | |
| "loss": 0.4376, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 3.834355828220859, | |
| "grad_norm": 0.20349860191345215, | |
| "learning_rate": 4.327258019445355e-06, | |
| "loss": 0.4809, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 3.8404907975460123, | |
| "grad_norm": 0.1808754950761795, | |
| "learning_rate": 4.313473988869811e-06, | |
| "loss": 0.4217, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 3.8466257668711656, | |
| "grad_norm": 0.17573867738246918, | |
| "learning_rate": 4.299695274093593e-06, | |
| "loss": 0.4149, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 3.852760736196319, | |
| "grad_norm": 0.1589432954788208, | |
| "learning_rate": 4.28592198180586e-06, | |
| "loss": 0.4216, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 3.8588957055214723, | |
| "grad_norm": 0.17543958127498627, | |
| "learning_rate": 4.272154218653784e-06, | |
| "loss": 0.474, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 3.8650306748466257, | |
| "grad_norm": 0.19895213842391968, | |
| "learning_rate": 4.258392091241727e-06, | |
| "loss": 0.3786, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 3.871165644171779, | |
| "grad_norm": 0.2045029103755951, | |
| "learning_rate": 4.244635706130408e-06, | |
| "loss": 0.4593, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 3.8773006134969323, | |
| "grad_norm": 0.2224407196044922, | |
| "learning_rate": 4.23088516983609e-06, | |
| "loss": 0.4171, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 3.883435582822086, | |
| "grad_norm": 0.17593076825141907, | |
| "learning_rate": 4.21714058882974e-06, | |
| "loss": 0.4178, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 3.889570552147239, | |
| "grad_norm": 0.18816006183624268, | |
| "learning_rate": 4.203402069536224e-06, | |
| "loss": 0.4484, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 3.895705521472393, | |
| "grad_norm": 0.18676427006721497, | |
| "learning_rate": 4.18966971833346e-06, | |
| "loss": 0.4609, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 3.901840490797546, | |
| "grad_norm": 0.18539687991142273, | |
| "learning_rate": 4.175943641551616e-06, | |
| "loss": 0.4407, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 3.9079754601226995, | |
| "grad_norm": 0.18633869290351868, | |
| "learning_rate": 4.162223945472271e-06, | |
| "loss": 0.4665, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 3.914110429447853, | |
| "grad_norm": 0.1992420107126236, | |
| "learning_rate": 4.1485107363276e-06, | |
| "loss": 0.4493, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 3.920245398773006, | |
| "grad_norm": 0.1835312396287918, | |
| "learning_rate": 4.1348041202995484e-06, | |
| "loss": 0.4172, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 3.9263803680981595, | |
| "grad_norm": 0.1969442069530487, | |
| "learning_rate": 4.121104203519012e-06, | |
| "loss": 0.4374, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 3.932515337423313, | |
| "grad_norm": 0.20216475427150726, | |
| "learning_rate": 4.107411092065015e-06, | |
| "loss": 0.4207, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 3.938650306748466, | |
| "grad_norm": 0.17508967220783234, | |
| "learning_rate": 4.093724891963882e-06, | |
| "loss": 0.4164, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 3.9447852760736195, | |
| "grad_norm": 0.19727467000484467, | |
| "learning_rate": 4.080045709188431e-06, | |
| "loss": 0.4335, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 3.950920245398773, | |
| "grad_norm": 0.1708287000656128, | |
| "learning_rate": 4.066373649657142e-06, | |
| "loss": 0.4187, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 3.957055214723926, | |
| "grad_norm": 0.21632403135299683, | |
| "learning_rate": 4.052708819233334e-06, | |
| "loss": 0.4237, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 3.96319018404908, | |
| "grad_norm": 0.17434976994991302, | |
| "learning_rate": 4.039051323724355e-06, | |
| "loss": 0.4238, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 3.969325153374233, | |
| "grad_norm": 0.18432791531085968, | |
| "learning_rate": 4.025401268880762e-06, | |
| "loss": 0.4483, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 3.9754601226993866, | |
| "grad_norm": 0.1773991733789444, | |
| "learning_rate": 4.011758760395491e-06, | |
| "loss": 0.3934, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 3.98159509202454, | |
| "grad_norm": 0.19982725381851196, | |
| "learning_rate": 3.998123903903051e-06, | |
| "loss": 0.4381, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 3.9877300613496933, | |
| "grad_norm": 0.21029439568519592, | |
| "learning_rate": 3.9844968049786995e-06, | |
| "loss": 0.4267, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 3.9938650306748467, | |
| "grad_norm": 0.21180294454097748, | |
| "learning_rate": 3.97087756913763e-06, | |
| "loss": 0.4185, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.17993085086345673, | |
| "learning_rate": 3.957266301834145e-06, | |
| "loss": 0.4382, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 4.006134969325154, | |
| "grad_norm": 0.2000538408756256, | |
| "learning_rate": 3.943663108460857e-06, | |
| "loss": 0.4189, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 4.012269938650307, | |
| "grad_norm": 0.1886722296476364, | |
| "learning_rate": 3.93006809434785e-06, | |
| "loss": 0.4464, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 4.0184049079754605, | |
| "grad_norm": 0.1883607655763626, | |
| "learning_rate": 3.916481364761885e-06, | |
| "loss": 0.417, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 4.024539877300613, | |
| "grad_norm": 0.1941026896238327, | |
| "learning_rate": 3.90290302490557e-06, | |
| "loss": 0.4515, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 4.030674846625767, | |
| "grad_norm": 0.1868050992488861, | |
| "learning_rate": 3.889333179916552e-06, | |
| "loss": 0.415, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 4.03680981595092, | |
| "grad_norm": 0.21741467714309692, | |
| "learning_rate": 3.875771934866702e-06, | |
| "loss": 0.3978, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 4.042944785276074, | |
| "grad_norm": 0.18438977003097534, | |
| "learning_rate": 3.862219394761305e-06, | |
| "loss": 0.4197, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 4.049079754601227, | |
| "grad_norm": 0.21055185794830322, | |
| "learning_rate": 3.848675664538238e-06, | |
| "loss": 0.4022, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 4.0552147239263805, | |
| "grad_norm": 0.1822698414325714, | |
| "learning_rate": 3.8351408490671614e-06, | |
| "loss": 0.4707, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 4.061349693251533, | |
| "grad_norm": 0.19663125276565552, | |
| "learning_rate": 3.821615053148717e-06, | |
| "loss": 0.4332, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 4.067484662576687, | |
| "grad_norm": 0.20445053279399872, | |
| "learning_rate": 3.8080983815137017e-06, | |
| "loss": 0.4197, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 4.07361963190184, | |
| "grad_norm": 0.17627210915088654, | |
| "learning_rate": 3.7945909388222636e-06, | |
| "loss": 0.4325, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 4.079754601226994, | |
| "grad_norm": 0.17431975901126862, | |
| "learning_rate": 3.781092829663089e-06, | |
| "loss": 0.4354, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 4.085889570552148, | |
| "grad_norm": 0.1871926486492157, | |
| "learning_rate": 3.7676041585525956e-06, | |
| "loss": 0.4155, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 4.0920245398773005, | |
| "grad_norm": 0.19517648220062256, | |
| "learning_rate": 3.7541250299341243e-06, | |
| "loss": 0.4327, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 4.098159509202454, | |
| "grad_norm": 0.1866348683834076, | |
| "learning_rate": 3.740655548177125e-06, | |
| "loss": 0.4042, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 4.104294478527607, | |
| "grad_norm": 0.17768587172031403, | |
| "learning_rate": 3.7271958175763518e-06, | |
| "loss": 0.4258, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 4.110429447852761, | |
| "grad_norm": 0.17544397711753845, | |
| "learning_rate": 3.713745942351056e-06, | |
| "loss": 0.4033, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 4.116564417177914, | |
| "grad_norm": 0.18574115633964539, | |
| "learning_rate": 3.7003060266441804e-06, | |
| "loss": 0.4229, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 4.122699386503068, | |
| "grad_norm": 0.1867496371269226, | |
| "learning_rate": 3.6868761745215474e-06, | |
| "loss": 0.432, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 4.128834355828221, | |
| "grad_norm": 0.19182687997817993, | |
| "learning_rate": 3.6734564899710577e-06, | |
| "loss": 0.4408, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 4.134969325153374, | |
| "grad_norm": 0.18049779534339905, | |
| "learning_rate": 3.660047076901885e-06, | |
| "loss": 0.4351, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 4.141104294478527, | |
| "grad_norm": 0.19350336492061615, | |
| "learning_rate": 3.646648039143669e-06, | |
| "loss": 0.4494, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 4.147239263803681, | |
| "grad_norm": 0.20113109052181244, | |
| "learning_rate": 3.633259480445715e-06, | |
| "loss": 0.4387, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 4.153374233128835, | |
| "grad_norm": 0.18117064237594604, | |
| "learning_rate": 3.6198815044761847e-06, | |
| "loss": 0.4792, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 4.159509202453988, | |
| "grad_norm": 0.1871654987335205, | |
| "learning_rate": 3.6065142148213033e-06, | |
| "loss": 0.4623, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 4.1656441717791415, | |
| "grad_norm": 0.17341911792755127, | |
| "learning_rate": 3.5931577149845465e-06, | |
| "loss": 0.4431, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 4.171779141104294, | |
| "grad_norm": 0.1755349338054657, | |
| "learning_rate": 3.579812108385843e-06, | |
| "loss": 0.4163, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 4.177914110429448, | |
| "grad_norm": 0.20305219292640686, | |
| "learning_rate": 3.566477498360782e-06, | |
| "loss": 0.4365, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 4.184049079754601, | |
| "grad_norm": 0.1874455362558365, | |
| "learning_rate": 3.5531539881597967e-06, | |
| "loss": 0.4727, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 4.190184049079755, | |
| "grad_norm": 0.20131394267082214, | |
| "learning_rate": 3.5398416809473813e-06, | |
| "loss": 0.4145, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 4.196319018404908, | |
| "grad_norm": 0.18988259136676788, | |
| "learning_rate": 3.5265406798012804e-06, | |
| "loss": 0.4457, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 4.2024539877300615, | |
| "grad_norm": 0.1912226378917694, | |
| "learning_rate": 3.5132510877116953e-06, | |
| "loss": 0.4358, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 4.208588957055214, | |
| "grad_norm": 0.17754784226417542, | |
| "learning_rate": 3.4999730075804907e-06, | |
| "loss": 0.4116, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 4.214723926380368, | |
| "grad_norm": 0.1884073168039322, | |
| "learning_rate": 3.4867065422203885e-06, | |
| "loss": 0.4366, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 4.220858895705521, | |
| "grad_norm": 0.18211857974529266, | |
| "learning_rate": 3.473451794354179e-06, | |
| "loss": 0.4276, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 4.226993865030675, | |
| "grad_norm": 0.20465627312660217, | |
| "learning_rate": 3.460208866613923e-06, | |
| "loss": 0.4266, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 4.233128834355828, | |
| "grad_norm": 0.26603659987449646, | |
| "learning_rate": 3.4469778615401616e-06, | |
| "loss": 0.4693, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 4.2392638036809815, | |
| "grad_norm": 0.19085724651813507, | |
| "learning_rate": 3.4337588815811128e-06, | |
| "loss": 0.4322, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 4.245398773006135, | |
| "grad_norm": 0.19165652990341187, | |
| "learning_rate": 3.420552029091886e-06, | |
| "loss": 0.4401, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 4.251533742331288, | |
| "grad_norm": 0.21447667479515076, | |
| "learning_rate": 3.4073574063336857e-06, | |
| "loss": 0.4327, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 4.257668711656442, | |
| "grad_norm": 0.21556028723716736, | |
| "learning_rate": 3.394175115473024e-06, | |
| "loss": 0.4507, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 4.263803680981595, | |
| "grad_norm": 0.16888567805290222, | |
| "learning_rate": 3.3810052585809233e-06, | |
| "loss": 0.4292, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 4.269938650306749, | |
| "grad_norm": 0.19774934649467468, | |
| "learning_rate": 3.3678479376321304e-06, | |
| "loss": 0.4345, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 4.276073619631902, | |
| "grad_norm": 0.18974056839942932, | |
| "learning_rate": 3.354703254504328e-06, | |
| "loss": 0.401, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 4.282208588957055, | |
| "grad_norm": 0.18433569371700287, | |
| "learning_rate": 3.3415713109773386e-06, | |
| "loss": 0.4327, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 4.288343558282208, | |
| "grad_norm": 0.1922151893377304, | |
| "learning_rate": 3.328452208732349e-06, | |
| "loss": 0.4239, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 4.294478527607362, | |
| "grad_norm": 0.18833968043327332, | |
| "learning_rate": 3.3153460493511086e-06, | |
| "loss": 0.4287, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 4.300613496932515, | |
| "grad_norm": 0.18338987231254578, | |
| "learning_rate": 3.302252934315151e-06, | |
| "loss": 0.4467, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 4.306748466257669, | |
| "grad_norm": 0.19972403347492218, | |
| "learning_rate": 3.2891729650050096e-06, | |
| "loss": 0.4153, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 4.3128834355828225, | |
| "grad_norm": 0.19376607239246368, | |
| "learning_rate": 3.276106242699429e-06, | |
| "loss": 0.4117, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 4.319018404907975, | |
| "grad_norm": 0.18517455458641052, | |
| "learning_rate": 3.263052868574578e-06, | |
| "loss": 0.4168, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 4.325153374233129, | |
| "grad_norm": 0.18251436948776245, | |
| "learning_rate": 3.2500129437032756e-06, | |
| "loss": 0.4552, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 4.331288343558282, | |
| "grad_norm": 0.1943109780550003, | |
| "learning_rate": 3.236986569054199e-06, | |
| "loss": 0.431, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 4.337423312883436, | |
| "grad_norm": 0.18778099119663239, | |
| "learning_rate": 3.2239738454911057e-06, | |
| "loss": 0.413, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 4.343558282208589, | |
| "grad_norm": 0.18466182053089142, | |
| "learning_rate": 3.2109748737720537e-06, | |
| "loss": 0.4332, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 4.3496932515337425, | |
| "grad_norm": 0.19483985006809235, | |
| "learning_rate": 3.197989754548618e-06, | |
| "loss": 0.436, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 4.355828220858895, | |
| "grad_norm": 0.17666223645210266, | |
| "learning_rate": 3.1850185883651175e-06, | |
| "loss": 0.4102, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 4.361963190184049, | |
| "grad_norm": 0.19580869376659393, | |
| "learning_rate": 3.1720614756578267e-06, | |
| "loss": 0.4151, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 4.368098159509202, | |
| "grad_norm": 0.20442509651184082, | |
| "learning_rate": 3.1591185167542047e-06, | |
| "loss": 0.4353, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 4.374233128834356, | |
| "grad_norm": 0.19967563450336456, | |
| "learning_rate": 3.14618981187212e-06, | |
| "loss": 0.4239, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 4.38036809815951, | |
| "grad_norm": 0.18093548715114594, | |
| "learning_rate": 3.1332754611190695e-06, | |
| "loss": 0.4312, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 4.386503067484663, | |
| "grad_norm": 0.2043561488389969, | |
| "learning_rate": 3.1203755644914046e-06, | |
| "loss": 0.4263, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 4.392638036809816, | |
| "grad_norm": 0.18950863182544708, | |
| "learning_rate": 3.1074902218735602e-06, | |
| "loss": 0.4453, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 4.398773006134969, | |
| "grad_norm": 0.19419826567173004, | |
| "learning_rate": 3.0946195330372754e-06, | |
| "loss": 0.4252, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 4.404907975460123, | |
| "grad_norm": 0.1931648850440979, | |
| "learning_rate": 3.08176359764083e-06, | |
| "loss": 0.422, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 4.411042944785276, | |
| "grad_norm": 0.19961263239383698, | |
| "learning_rate": 3.0689225152282627e-06, | |
| "loss": 0.4541, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 4.41717791411043, | |
| "grad_norm": 0.1944529414176941, | |
| "learning_rate": 3.0560963852286046e-06, | |
| "loss": 0.424, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 4.423312883435583, | |
| "grad_norm": 0.19636866450309753, | |
| "learning_rate": 3.043285306955114e-06, | |
| "loss": 0.4994, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 4.429447852760736, | |
| "grad_norm": 0.18336038291454315, | |
| "learning_rate": 3.0304893796044988e-06, | |
| "loss": 0.4109, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 4.435582822085889, | |
| "grad_norm": 0.18142063915729523, | |
| "learning_rate": 3.017708702256153e-06, | |
| "loss": 0.4464, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 4.441717791411043, | |
| "grad_norm": 0.17481929063796997, | |
| "learning_rate": 3.004943373871393e-06, | |
| "loss": 0.4341, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 4.447852760736196, | |
| "grad_norm": 0.19967781007289886, | |
| "learning_rate": 2.9921934932926837e-06, | |
| "loss": 0.3999, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 4.45398773006135, | |
| "grad_norm": 0.19884563982486725, | |
| "learning_rate": 2.9794591592428767e-06, | |
| "loss": 0.4417, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 4.460122699386503, | |
| "grad_norm": 0.1929159313440323, | |
| "learning_rate": 2.966740470324451e-06, | |
| "loss": 0.4371, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 4.466257668711656, | |
| "grad_norm": 0.19332391023635864, | |
| "learning_rate": 2.954037525018739e-06, | |
| "loss": 0.4668, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 4.47239263803681, | |
| "grad_norm": 0.18918125331401825, | |
| "learning_rate": 2.9413504216851742e-06, | |
| "loss": 0.4179, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 4.478527607361963, | |
| "grad_norm": 0.19312560558319092, | |
| "learning_rate": 2.9286792585605206e-06, | |
| "loss": 0.4045, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 4.484662576687117, | |
| "grad_norm": 0.19809098541736603, | |
| "learning_rate": 2.9160241337581198e-06, | |
| "loss": 0.4417, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 4.49079754601227, | |
| "grad_norm": 0.19697564840316772, | |
| "learning_rate": 2.903385145267129e-06, | |
| "loss": 0.4272, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 4.4969325153374236, | |
| "grad_norm": 0.2234029769897461, | |
| "learning_rate": 2.8907623909517555e-06, | |
| "loss": 0.4365, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 4.5030674846625764, | |
| "grad_norm": 0.2001408040523529, | |
| "learning_rate": 2.8781559685505106e-06, | |
| "loss": 0.4427, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 4.50920245398773, | |
| "grad_norm": 0.18446412682533264, | |
| "learning_rate": 2.8655659756754474e-06, | |
| "loss": 0.4511, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 4.515337423312883, | |
| "grad_norm": 0.20659488439559937, | |
| "learning_rate": 2.8529925098113943e-06, | |
| "loss": 0.4249, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 4.521472392638037, | |
| "grad_norm": 0.20220941305160522, | |
| "learning_rate": 2.8404356683152256e-06, | |
| "loss": 0.4228, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 4.52760736196319, | |
| "grad_norm": 0.21185429394245148, | |
| "learning_rate": 2.827895548415084e-06, | |
| "loss": 0.402, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 4.533742331288344, | |
| "grad_norm": 0.189244344830513, | |
| "learning_rate": 2.8153722472096334e-06, | |
| "loss": 0.4236, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 4.539877300613497, | |
| "grad_norm": 0.1980522722005844, | |
| "learning_rate": 2.8028658616673184e-06, | |
| "loss": 0.4302, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 4.54601226993865, | |
| "grad_norm": 0.19940122961997986, | |
| "learning_rate": 2.7903764886255942e-06, | |
| "loss": 0.408, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 4.552147239263804, | |
| "grad_norm": 0.19964726269245148, | |
| "learning_rate": 2.777904224790197e-06, | |
| "loss": 0.4316, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 4.558282208588957, | |
| "grad_norm": 0.20940682291984558, | |
| "learning_rate": 2.765449166734382e-06, | |
| "loss": 0.4208, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 4.564417177914111, | |
| "grad_norm": 0.1969623565673828, | |
| "learning_rate": 2.7530114108981775e-06, | |
| "loss": 0.4035, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 4.570552147239264, | |
| "grad_norm": 0.20081190764904022, | |
| "learning_rate": 2.7405910535876407e-06, | |
| "loss": 0.4469, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 4.576687116564417, | |
| "grad_norm": 0.1978064477443695, | |
| "learning_rate": 2.7281881909741185e-06, | |
| "loss": 0.4336, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 4.58282208588957, | |
| "grad_norm": 0.2098206877708435, | |
| "learning_rate": 2.715802919093484e-06, | |
| "loss": 0.4359, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 4.588957055214724, | |
| "grad_norm": 0.20042365789413452, | |
| "learning_rate": 2.7034353338454142e-06, | |
| "loss": 0.417, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 4.595092024539877, | |
| "grad_norm": 0.2288142442703247, | |
| "learning_rate": 2.691085530992629e-06, | |
| "loss": 0.4272, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 4.601226993865031, | |
| "grad_norm": 0.2015533745288849, | |
| "learning_rate": 2.678753606160166e-06, | |
| "loss": 0.407, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 4.6073619631901845, | |
| "grad_norm": 0.19977515935897827, | |
| "learning_rate": 2.6664396548346303e-06, | |
| "loss": 0.4324, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 4.613496932515337, | |
| "grad_norm": 0.1808634102344513, | |
| "learning_rate": 2.654143772363455e-06, | |
| "loss": 0.3987, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 4.61963190184049, | |
| "grad_norm": 0.20292384922504425, | |
| "learning_rate": 2.6418660539541674e-06, | |
| "loss": 0.405, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 4.625766871165644, | |
| "grad_norm": 0.19244877994060516, | |
| "learning_rate": 2.6296065946736506e-06, | |
| "loss": 0.4307, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 4.631901840490798, | |
| "grad_norm": 0.20017580687999725, | |
| "learning_rate": 2.617365489447404e-06, | |
| "loss": 0.4162, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 4.638036809815951, | |
| "grad_norm": 0.18756996095180511, | |
| "learning_rate": 2.6051428330588147e-06, | |
| "loss": 0.4307, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 4.644171779141105, | |
| "grad_norm": 0.1955227255821228, | |
| "learning_rate": 2.5929387201484133e-06, | |
| "loss": 0.4546, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 4.6503067484662575, | |
| "grad_norm": 0.1984601467847824, | |
| "learning_rate": 2.5807532452131533e-06, | |
| "loss": 0.4017, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 4.656441717791411, | |
| "grad_norm": 0.19388411939144135, | |
| "learning_rate": 2.5685865026056745e-06, | |
| "loss": 0.4706, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 4.662576687116564, | |
| "grad_norm": 0.18463459610939026, | |
| "learning_rate": 2.5564385865335628e-06, | |
| "loss": 0.4108, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 4.668711656441718, | |
| "grad_norm": 0.18664699792861938, | |
| "learning_rate": 2.544309591058638e-06, | |
| "loss": 0.4299, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 4.674846625766871, | |
| "grad_norm": 0.18623848259449005, | |
| "learning_rate": 2.5321996100962163e-06, | |
| "loss": 0.4277, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 4.680981595092025, | |
| "grad_norm": 0.20391374826431274, | |
| "learning_rate": 2.5201087374143783e-06, | |
| "loss": 0.4436, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 4.6871165644171775, | |
| "grad_norm": 0.20646578073501587, | |
| "learning_rate": 2.5080370666332532e-06, | |
| "loss": 0.4502, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 4.693251533742331, | |
| "grad_norm": 0.20507745444774628, | |
| "learning_rate": 2.495984691224287e-06, | |
| "loss": 0.4164, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 4.699386503067485, | |
| "grad_norm": 0.19352522492408752, | |
| "learning_rate": 2.4839517045095225e-06, | |
| "loss": 0.412, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 4.705521472392638, | |
| "grad_norm": 0.19516626000404358, | |
| "learning_rate": 2.4719381996608748e-06, | |
| "loss": 0.4253, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 4.711656441717792, | |
| "grad_norm": 0.19005592167377472, | |
| "learning_rate": 2.459944269699407e-06, | |
| "loss": 0.4253, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 4.717791411042945, | |
| "grad_norm": 0.18863846361637115, | |
| "learning_rate": 2.4479700074946154e-06, | |
| "loss": 0.4063, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 4.723926380368098, | |
| "grad_norm": 0.2115408331155777, | |
| "learning_rate": 2.4360155057637115e-06, | |
| "loss": 0.439, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 4.730061349693251, | |
| "grad_norm": 0.1989760547876358, | |
| "learning_rate": 2.4240808570708926e-06, | |
| "loss": 0.4358, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 4.736196319018405, | |
| "grad_norm": 0.18984295427799225, | |
| "learning_rate": 2.412166153826639e-06, | |
| "loss": 0.4262, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 4.742331288343558, | |
| "grad_norm": 0.17575299739837646, | |
| "learning_rate": 2.400271488286992e-06, | |
| "loss": 0.4263, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 4.748466257668712, | |
| "grad_norm": 0.18755771219730377, | |
| "learning_rate": 2.3883969525528396e-06, | |
| "loss": 0.4089, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 4.754601226993865, | |
| "grad_norm": 0.2058447301387787, | |
| "learning_rate": 2.3765426385692044e-06, | |
| "loss": 0.4427, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 4.7607361963190185, | |
| "grad_norm": 0.20019108057022095, | |
| "learning_rate": 2.3647086381245267e-06, | |
| "loss": 0.4548, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 4.766871165644172, | |
| "grad_norm": 0.17807462811470032, | |
| "learning_rate": 2.352895042849965e-06, | |
| "loss": 0.4172, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 4.773006134969325, | |
| "grad_norm": 0.18490706384181976, | |
| "learning_rate": 2.3411019442186766e-06, | |
| "loss": 0.4319, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 4.779141104294479, | |
| "grad_norm": 0.1955891102552414, | |
| "learning_rate": 2.3293294335451104e-06, | |
| "loss": 0.4516, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 4.785276073619632, | |
| "grad_norm": 0.19553668797016144, | |
| "learning_rate": 2.317577601984305e-06, | |
| "loss": 0.4211, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 4.791411042944786, | |
| "grad_norm": 0.1864728480577469, | |
| "learning_rate": 2.30584654053118e-06, | |
| "loss": 0.3994, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 4.7975460122699385, | |
| "grad_norm": 0.1869369000196457, | |
| "learning_rate": 2.294136340019826e-06, | |
| "loss": 0.4526, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 4.803680981595092, | |
| "grad_norm": 0.19105935096740723, | |
| "learning_rate": 2.282447091122816e-06, | |
| "loss": 0.473, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 4.809815950920245, | |
| "grad_norm": 0.18605674803256989, | |
| "learning_rate": 2.2707788843504836e-06, | |
| "loss": 0.4634, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 4.815950920245399, | |
| "grad_norm": 0.18209435045719147, | |
| "learning_rate": 2.2591318100502385e-06, | |
| "loss": 0.4566, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 4.822085889570552, | |
| "grad_norm": 0.19514133036136627, | |
| "learning_rate": 2.2475059584058612e-06, | |
| "loss": 0.4082, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 4.828220858895706, | |
| "grad_norm": 0.192851722240448, | |
| "learning_rate": 2.2359014194367986e-06, | |
| "loss": 0.4739, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 4.8343558282208585, | |
| "grad_norm": 0.2023897022008896, | |
| "learning_rate": 2.2243182829974775e-06, | |
| "loss": 0.4416, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 4.840490797546012, | |
| "grad_norm": 0.18871212005615234, | |
| "learning_rate": 2.2127566387766045e-06, | |
| "loss": 0.4408, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 4.846625766871165, | |
| "grad_norm": 0.20089678466320038, | |
| "learning_rate": 2.2012165762964677e-06, | |
| "loss": 0.4236, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 4.852760736196319, | |
| "grad_norm": 0.2131776064634323, | |
| "learning_rate": 2.189698184912249e-06, | |
| "loss": 0.4368, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 4.858895705521473, | |
| "grad_norm": 0.2049061506986618, | |
| "learning_rate": 2.17820155381133e-06, | |
| "loss": 0.4444, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 4.865030674846626, | |
| "grad_norm": 0.18744398653507233, | |
| "learning_rate": 2.1667267720126014e-06, | |
| "loss": 0.4292, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 4.871165644171779, | |
| "grad_norm": 0.20928369462490082, | |
| "learning_rate": 2.1552739283657753e-06, | |
| "loss": 0.4321, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 4.877300613496932, | |
| "grad_norm": 0.18477670848369598, | |
| "learning_rate": 2.1438431115506908e-06, | |
| "loss": 0.4629, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 4.883435582822086, | |
| "grad_norm": 0.17708535492420197, | |
| "learning_rate": 2.1324344100766376e-06, | |
| "loss": 0.4647, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 4.889570552147239, | |
| "grad_norm": 0.1869410127401352, | |
| "learning_rate": 2.1210479122816646e-06, | |
| "loss": 0.4141, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 4.895705521472393, | |
| "grad_norm": 0.18549315631389618, | |
| "learning_rate": 2.109683706331893e-06, | |
| "loss": 0.4649, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 4.901840490797546, | |
| "grad_norm": 0.21219757199287415, | |
| "learning_rate": 2.0983418802208416e-06, | |
| "loss": 0.4252, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 4.9079754601226995, | |
| "grad_norm": 0.1829107105731964, | |
| "learning_rate": 2.0870225217687408e-06, | |
| "loss": 0.4095, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 4.914110429447852, | |
| "grad_norm": 0.2121373414993286, | |
| "learning_rate": 2.0757257186218465e-06, | |
| "loss": 0.3889, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 4.920245398773006, | |
| "grad_norm": 0.19195012748241425, | |
| "learning_rate": 2.0644515582517803e-06, | |
| "loss": 0.4324, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 4.92638036809816, | |
| "grad_norm": 0.20068559050559998, | |
| "learning_rate": 2.053200127954828e-06, | |
| "loss": 0.4484, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 4.932515337423313, | |
| "grad_norm": 0.18420425057411194, | |
| "learning_rate": 2.0419715148512807e-06, | |
| "loss": 0.4237, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 4.938650306748467, | |
| "grad_norm": 0.2045467048883438, | |
| "learning_rate": 2.0307658058847577e-06, | |
| "loss": 0.4114, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 4.9447852760736195, | |
| "grad_norm": 0.19473262131214142, | |
| "learning_rate": 2.0195830878215236e-06, | |
| "loss": 0.4305, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 4.950920245398773, | |
| "grad_norm": 0.1820230484008789, | |
| "learning_rate": 2.0084234472498274e-06, | |
| "loss": 0.4232, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 4.957055214723926, | |
| "grad_norm": 0.20162828266620636, | |
| "learning_rate": 1.997286970579232e-06, | |
| "loss": 0.3802, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 4.96319018404908, | |
| "grad_norm": 0.194149449467659, | |
| "learning_rate": 1.9861737440399327e-06, | |
| "loss": 0.4432, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 4.969325153374233, | |
| "grad_norm": 0.21044090390205383, | |
| "learning_rate": 1.9750838536821048e-06, | |
| "loss": 0.4031, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 4.975460122699387, | |
| "grad_norm": 0.18757662177085876, | |
| "learning_rate": 1.964017385375228e-06, | |
| "loss": 0.4047, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 4.9815950920245395, | |
| "grad_norm": 0.2228487730026245, | |
| "learning_rate": 1.952974424807425e-06, | |
| "loss": 0.432, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 4.987730061349693, | |
| "grad_norm": 0.21380779147148132, | |
| "learning_rate": 1.9419550574847986e-06, | |
| "loss": 0.4277, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 4.993865030674847, | |
| "grad_norm": 0.21621406078338623, | |
| "learning_rate": 1.9309593687307622e-06, | |
| "loss": 0.4616, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.19357597827911377, | |
| "learning_rate": 1.9199874436853904e-06, | |
| "loss": 0.4202, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 5.006134969325154, | |
| "grad_norm": 0.18243946135044098, | |
| "learning_rate": 1.9090393673047557e-06, | |
| "loss": 0.4184, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 5.012269938650307, | |
| "grad_norm": 0.21097031235694885, | |
| "learning_rate": 1.898115224360263e-06, | |
| "loss": 0.4323, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 5.0184049079754605, | |
| "grad_norm": 0.19240233302116394, | |
| "learning_rate": 1.8872150994380045e-06, | |
| "loss": 0.4077, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 5.024539877300613, | |
| "grad_norm": 0.20225709676742554, | |
| "learning_rate": 1.8763390769381017e-06, | |
| "loss": 0.4164, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 5.030674846625767, | |
| "grad_norm": 0.22095705568790436, | |
| "learning_rate": 1.865487241074041e-06, | |
| "loss": 0.417, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 5.03680981595092, | |
| "grad_norm": 0.18732763826847076, | |
| "learning_rate": 1.8546596758720437e-06, | |
| "loss": 0.406, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 5.042944785276074, | |
| "grad_norm": 0.19568726420402527, | |
| "learning_rate": 1.84385646517039e-06, | |
| "loss": 0.4335, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 5.049079754601227, | |
| "grad_norm": 0.22073768079280853, | |
| "learning_rate": 1.8330776926187904e-06, | |
| "loss": 0.4485, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 5.0552147239263805, | |
| "grad_norm": 0.22986707091331482, | |
| "learning_rate": 1.8223234416777275e-06, | |
| "loss": 0.4347, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 5.061349693251533, | |
| "grad_norm": 0.17984724044799805, | |
| "learning_rate": 1.8115937956178093e-06, | |
| "loss": 0.4037, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 5.067484662576687, | |
| "grad_norm": 0.2089032679796219, | |
| "learning_rate": 1.8008888375191302e-06, | |
| "loss": 0.4223, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 5.07361963190184, | |
| "grad_norm": 0.18969732522964478, | |
| "learning_rate": 1.7902086502706256e-06, | |
| "loss": 0.4375, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 5.079754601226994, | |
| "grad_norm": 0.203078955411911, | |
| "learning_rate": 1.779553316569425e-06, | |
| "loss": 0.4424, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 5.085889570552148, | |
| "grad_norm": 0.20583704113960266, | |
| "learning_rate": 1.7689229189202196e-06, | |
| "loss": 0.4161, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 5.0920245398773005, | |
| "grad_norm": 0.20547014474868774, | |
| "learning_rate": 1.758317539634618e-06, | |
| "loss": 0.4291, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 5.098159509202454, | |
| "grad_norm": 0.20702853798866272, | |
| "learning_rate": 1.747737260830512e-06, | |
| "loss": 0.3902, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 5.104294478527607, | |
| "grad_norm": 0.20404289662837982, | |
| "learning_rate": 1.7371821644314392e-06, | |
| "loss": 0.4249, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 5.110429447852761, | |
| "grad_norm": 0.19528424739837646, | |
| "learning_rate": 1.726652332165945e-06, | |
| "loss": 0.4356, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 5.116564417177914, | |
| "grad_norm": 0.18722812831401825, | |
| "learning_rate": 1.716147845566959e-06, | |
| "loss": 0.4146, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 5.122699386503068, | |
| "grad_norm": 0.20289267599582672, | |
| "learning_rate": 1.7056687859711563e-06, | |
| "loss": 0.4343, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 5.128834355828221, | |
| "grad_norm": 0.19126549363136292, | |
| "learning_rate": 1.695215234518326e-06, | |
| "loss": 0.4834, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 5.134969325153374, | |
| "grad_norm": 0.20215460658073425, | |
| "learning_rate": 1.6847872721507525e-06, | |
| "loss": 0.4272, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 5.141104294478527, | |
| "grad_norm": 0.200180321931839, | |
| "learning_rate": 1.674384979612579e-06, | |
| "loss": 0.4332, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 5.147239263803681, | |
| "grad_norm": 0.20032553374767303, | |
| "learning_rate": 1.6640084374491872e-06, | |
| "loss": 0.459, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 5.153374233128835, | |
| "grad_norm": 0.20815366506576538, | |
| "learning_rate": 1.653657726006575e-06, | |
| "loss": 0.4564, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 5.159509202453988, | |
| "grad_norm": 0.19371084868907928, | |
| "learning_rate": 1.6433329254307261e-06, | |
| "loss": 0.4595, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 5.1656441717791415, | |
| "grad_norm": 0.179207444190979, | |
| "learning_rate": 1.633034115667001e-06, | |
| "loss": 0.4564, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 5.171779141104294, | |
| "grad_norm": 0.19510361552238464, | |
| "learning_rate": 1.6227613764595107e-06, | |
| "loss": 0.4207, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 5.177914110429448, | |
| "grad_norm": 0.19818256795406342, | |
| "learning_rate": 1.6125147873504971e-06, | |
| "loss": 0.4274, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 5.184049079754601, | |
| "grad_norm": 0.18541868031024933, | |
| "learning_rate": 1.6022944276797265e-06, | |
| "loss": 0.4103, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 5.190184049079755, | |
| "grad_norm": 0.19620756804943085, | |
| "learning_rate": 1.5921003765838673e-06, | |
| "loss": 0.4019, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 5.196319018404908, | |
| "grad_norm": 0.19282202422618866, | |
| "learning_rate": 1.5819327129958762e-06, | |
| "loss": 0.4135, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 5.2024539877300615, | |
| "grad_norm": 0.20788726210594177, | |
| "learning_rate": 1.5717915156443953e-06, | |
| "loss": 0.3979, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 5.208588957055214, | |
| "grad_norm": 0.20662984251976013, | |
| "learning_rate": 1.5616768630531353e-06, | |
| "loss": 0.4236, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 5.214723926380368, | |
| "grad_norm": 0.1801910698413849, | |
| "learning_rate": 1.5515888335402706e-06, | |
| "loss": 0.4094, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 5.220858895705521, | |
| "grad_norm": 0.21069598197937012, | |
| "learning_rate": 1.5415275052178318e-06, | |
| "loss": 0.4229, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 5.226993865030675, | |
| "grad_norm": 0.18792353570461273, | |
| "learning_rate": 1.5314929559910985e-06, | |
| "loss": 0.3851, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 5.233128834355828, | |
| "grad_norm": 0.21205176413059235, | |
| "learning_rate": 1.5214852635580018e-06, | |
| "loss": 0.4524, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 5.2392638036809815, | |
| "grad_norm": 0.20375634729862213, | |
| "learning_rate": 1.5115045054085204e-06, | |
| "loss": 0.4293, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 5.245398773006135, | |
| "grad_norm": 0.19179388880729675, | |
| "learning_rate": 1.5015507588240742e-06, | |
| "loss": 0.4438, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 5.251533742331288, | |
| "grad_norm": 0.1827925592660904, | |
| "learning_rate": 1.4916241008769372e-06, | |
| "loss": 0.4183, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 5.257668711656442, | |
| "grad_norm": 0.21141961216926575, | |
| "learning_rate": 1.4817246084296327e-06, | |
| "loss": 0.4388, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 5.263803680981595, | |
| "grad_norm": 0.18689335882663727, | |
| "learning_rate": 1.4718523581343403e-06, | |
| "loss": 0.4312, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 5.269938650306749, | |
| "grad_norm": 0.195315420627594, | |
| "learning_rate": 1.4620074264323048e-06, | |
| "loss": 0.4287, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 5.276073619631902, | |
| "grad_norm": 0.1855771541595459, | |
| "learning_rate": 1.452189889553236e-06, | |
| "loss": 0.4252, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 5.282208588957055, | |
| "grad_norm": 0.18047569692134857, | |
| "learning_rate": 1.4423998235147307e-06, | |
| "loss": 0.3753, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 5.288343558282208, | |
| "grad_norm": 0.19961078464984894, | |
| "learning_rate": 1.4326373041216774e-06, | |
| "loss": 0.4277, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 5.294478527607362, | |
| "grad_norm": 0.19070571660995483, | |
| "learning_rate": 1.422902406965664e-06, | |
| "loss": 0.4258, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 5.300613496932515, | |
| "grad_norm": 0.18950672447681427, | |
| "learning_rate": 1.4131952074244037e-06, | |
| "loss": 0.4223, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 5.306748466257669, | |
| "grad_norm": 0.1713619977235794, | |
| "learning_rate": 1.4035157806611465e-06, | |
| "loss": 0.4282, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 5.3128834355828225, | |
| "grad_norm": 0.18861691653728485, | |
| "learning_rate": 1.39386420162409e-06, | |
| "loss": 0.4332, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 5.319018404907975, | |
| "grad_norm": 0.20134612917900085, | |
| "learning_rate": 1.3842405450458158e-06, | |
| "loss": 0.431, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 5.325153374233129, | |
| "grad_norm": 0.19348467886447906, | |
| "learning_rate": 1.3746448854426908e-06, | |
| "loss": 0.3873, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 5.331288343558282, | |
| "grad_norm": 0.20825737714767456, | |
| "learning_rate": 1.3650772971143067e-06, | |
| "loss": 0.409, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 5.337423312883436, | |
| "grad_norm": 0.20485500991344452, | |
| "learning_rate": 1.355537854142897e-06, | |
| "loss": 0.4514, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 5.343558282208589, | |
| "grad_norm": 0.19826869666576385, | |
| "learning_rate": 1.3460266303927604e-06, | |
| "loss": 0.4406, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 5.3496932515337425, | |
| "grad_norm": 0.1876692771911621, | |
| "learning_rate": 1.336543699509698e-06, | |
| "loss": 0.445, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 5.355828220858895, | |
| "grad_norm": 0.19540998339653015, | |
| "learning_rate": 1.3270891349204378e-06, | |
| "loss": 0.4144, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 5.361963190184049, | |
| "grad_norm": 0.20348355174064636, | |
| "learning_rate": 1.3176630098320615e-06, | |
| "loss": 0.4201, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 5.368098159509202, | |
| "grad_norm": 0.1816159337759018, | |
| "learning_rate": 1.3082653972314475e-06, | |
| "loss": 0.4181, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 5.374233128834356, | |
| "grad_norm": 0.19366130232810974, | |
| "learning_rate": 1.2988963698846997e-06, | |
| "loss": 0.4246, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 5.38036809815951, | |
| "grad_norm": 0.1955079287290573, | |
| "learning_rate": 1.2895560003365837e-06, | |
| "loss": 0.4378, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 5.386503067484663, | |
| "grad_norm": 0.20214815437793732, | |
| "learning_rate": 1.2802443609099696e-06, | |
| "loss": 0.4306, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 5.392638036809816, | |
| "grad_norm": 0.1942247748374939, | |
| "learning_rate": 1.270961523705264e-06, | |
| "loss": 0.4158, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 5.398773006134969, | |
| "grad_norm": 0.19706179201602936, | |
| "learning_rate": 1.2617075605998618e-06, | |
| "loss": 0.4227, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 5.404907975460123, | |
| "grad_norm": 0.19230398535728455, | |
| "learning_rate": 1.2524825432475828e-06, | |
| "loss": 0.4289, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 5.411042944785276, | |
| "grad_norm": 0.1848897784948349, | |
| "learning_rate": 1.2432865430781166e-06, | |
| "loss": 0.4216, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 5.41717791411043, | |
| "grad_norm": 0.20822712779045105, | |
| "learning_rate": 1.234119631296473e-06, | |
| "loss": 0.4419, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 5.423312883435583, | |
| "grad_norm": 0.20798738300800323, | |
| "learning_rate": 1.2249818788824324e-06, | |
| "loss": 0.4567, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 5.429447852760736, | |
| "grad_norm": 0.2089272290468216, | |
| "learning_rate": 1.2158733565899855e-06, | |
| "loss": 0.4499, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 5.435582822085889, | |
| "grad_norm": 0.18762832880020142, | |
| "learning_rate": 1.2067941349468021e-06, | |
| "loss": 0.4365, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 5.441717791411043, | |
| "grad_norm": 0.22340965270996094, | |
| "learning_rate": 1.1977442842536685e-06, | |
| "loss": 0.4296, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 5.447852760736196, | |
| "grad_norm": 0.18473917245864868, | |
| "learning_rate": 1.1887238745839536e-06, | |
| "loss": 0.4347, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 5.45398773006135, | |
| "grad_norm": 0.2267259657382965, | |
| "learning_rate": 1.179732975783065e-06, | |
| "loss": 0.4137, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 5.460122699386503, | |
| "grad_norm": 0.20931611955165863, | |
| "learning_rate": 1.1707716574679007e-06, | |
| "loss": 0.3982, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 5.466257668711656, | |
| "grad_norm": 0.19619382917881012, | |
| "learning_rate": 1.1618399890263215e-06, | |
| "loss": 0.4455, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 5.47239263803681, | |
| "grad_norm": 0.18555289506912231, | |
| "learning_rate": 1.1529380396166074e-06, | |
| "loss": 0.4423, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 5.478527607361963, | |
| "grad_norm": 0.18679551780223846, | |
| "learning_rate": 1.1440658781669179e-06, | |
| "loss": 0.4392, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 5.484662576687117, | |
| "grad_norm": 0.19644379615783691, | |
| "learning_rate": 1.1352235733747685e-06, | |
| "loss": 0.4744, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 5.49079754601227, | |
| "grad_norm": 0.18453319370746613, | |
| "learning_rate": 1.1264111937064902e-06, | |
| "loss": 0.4172, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 5.4969325153374236, | |
| "grad_norm": 0.19223150610923767, | |
| "learning_rate": 1.117628807396705e-06, | |
| "loss": 0.435, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 5.5030674846625764, | |
| "grad_norm": 0.1897895187139511, | |
| "learning_rate": 1.1088764824477938e-06, | |
| "loss": 0.4312, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 5.50920245398773, | |
| "grad_norm": 0.20507152378559113, | |
| "learning_rate": 1.100154286629369e-06, | |
| "loss": 0.4133, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 5.515337423312883, | |
| "grad_norm": 0.183609277009964, | |
| "learning_rate": 1.0914622874777547e-06, | |
| "loss": 0.4109, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 5.521472392638037, | |
| "grad_norm": 0.18945349752902985, | |
| "learning_rate": 1.0828005522954626e-06, | |
| "loss": 0.4336, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 5.52760736196319, | |
| "grad_norm": 0.22751018404960632, | |
| "learning_rate": 1.0741691481506627e-06, | |
| "loss": 0.4164, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 5.533742331288344, | |
| "grad_norm": 0.21165773272514343, | |
| "learning_rate": 1.0655681418766772e-06, | |
| "loss": 0.4538, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 5.539877300613497, | |
| "grad_norm": 0.18855099380016327, | |
| "learning_rate": 1.0569976000714544e-06, | |
| "loss": 0.4405, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 5.54601226993865, | |
| "grad_norm": 0.19720859825611115, | |
| "learning_rate": 1.0484575890970505e-06, | |
| "loss": 0.4143, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 5.552147239263804, | |
| "grad_norm": 0.19412867724895477, | |
| "learning_rate": 1.0399481750791291e-06, | |
| "loss": 0.4443, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 5.558282208588957, | |
| "grad_norm": 0.19512735307216644, | |
| "learning_rate": 1.0314694239064315e-06, | |
| "loss": 0.4435, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 5.564417177914111, | |
| "grad_norm": 0.19326624274253845, | |
| "learning_rate": 1.0230214012302807e-06, | |
| "loss": 0.4268, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 5.570552147239264, | |
| "grad_norm": 0.18657195568084717, | |
| "learning_rate": 1.014604172464067e-06, | |
| "loss": 0.4158, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 5.576687116564417, | |
| "grad_norm": 0.20302285254001617, | |
| "learning_rate": 1.0062178027827385e-06, | |
| "loss": 0.4361, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 5.58282208588957, | |
| "grad_norm": 0.18828590214252472, | |
| "learning_rate": 9.978623571223045e-07, | |
| "loss": 0.439, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 5.588957055214724, | |
| "grad_norm": 0.1946876496076584, | |
| "learning_rate": 9.89537900179327e-07, | |
| "loss": 0.4277, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 5.595092024539877, | |
| "grad_norm": 0.19324910640716553, | |
| "learning_rate": 9.812444964104195e-07, | |
| "loss": 0.4745, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 5.601226993865031, | |
| "grad_norm": 0.21109642088413239, | |
| "learning_rate": 9.72982210031751e-07, | |
| "loss": 0.4066, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 5.6073619631901845, | |
| "grad_norm": 0.2006876915693283, | |
| "learning_rate": 9.647511050185475e-07, | |
| "loss": 0.459, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 5.613496932515337, | |
| "grad_norm": 0.21445654332637787, | |
| "learning_rate": 9.56551245104596e-07, | |
| "loss": 0.4202, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 5.61963190184049, | |
| "grad_norm": 0.2019462138414383, | |
| "learning_rate": 9.48382693781752e-07, | |
| "loss": 0.4487, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 5.625766871165644, | |
| "grad_norm": 0.21154631674289703, | |
| "learning_rate": 9.402455142994443e-07, | |
| "loss": 0.4411, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 5.631901840490798, | |
| "grad_norm": 0.2066999077796936, | |
| "learning_rate": 9.321397696641916e-07, | |
| "loss": 0.4351, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 5.638036809815951, | |
| "grad_norm": 0.21491585671901703, | |
| "learning_rate": 9.240655226391121e-07, | |
| "loss": 0.4178, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 5.644171779141105, | |
| "grad_norm": 0.20312045514583588, | |
| "learning_rate": 9.160228357434314e-07, | |
| "loss": 0.4321, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 5.6503067484662575, | |
| "grad_norm": 0.20171020925045013, | |
| "learning_rate": 9.080117712520087e-07, | |
| "loss": 0.4243, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 5.656441717791411, | |
| "grad_norm": 0.19424133002758026, | |
| "learning_rate": 9.000323911948483e-07, | |
| "loss": 0.4096, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 5.662576687116564, | |
| "grad_norm": 0.20163863897323608, | |
| "learning_rate": 8.920847573566204e-07, | |
| "loss": 0.4381, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 5.668711656441718, | |
| "grad_norm": 0.1946740597486496, | |
| "learning_rate": 8.841689312761837e-07, | |
| "loss": 0.4318, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 5.674846625766871, | |
| "grad_norm": 0.19821976125240326, | |
| "learning_rate": 8.762849742461044e-07, | |
| "loss": 0.4143, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 5.680981595092025, | |
| "grad_norm": 0.1881324201822281, | |
| "learning_rate": 8.6843294731219e-07, | |
| "loss": 0.4343, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 5.6871165644171775, | |
| "grad_norm": 0.1956545114517212, | |
| "learning_rate": 8.60612911273011e-07, | |
| "loss": 0.4057, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 5.693251533742331, | |
| "grad_norm": 0.17811179161071777, | |
| "learning_rate": 8.528249266794286e-07, | |
| "loss": 0.4045, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 5.699386503067485, | |
| "grad_norm": 0.21344789862632751, | |
| "learning_rate": 8.450690538341299e-07, | |
| "loss": 0.3894, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 5.705521472392638, | |
| "grad_norm": 0.20089948177337646, | |
| "learning_rate": 8.373453527911618e-07, | |
| "loss": 0.4302, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 5.711656441717792, | |
| "grad_norm": 0.19986341893672943, | |
| "learning_rate": 8.29653883355458e-07, | |
| "loss": 0.4502, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 5.717791411042945, | |
| "grad_norm": 0.21868270635604858, | |
| "learning_rate": 8.219947050823862e-07, | |
| "loss": 0.4134, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 5.723926380368098, | |
| "grad_norm": 0.17949172854423523, | |
| "learning_rate": 8.143678772772811e-07, | |
| "loss": 0.4315, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 5.730061349693251, | |
| "grad_norm": 0.20468908548355103, | |
| "learning_rate": 8.06773458994986e-07, | |
| "loss": 0.4446, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 5.736196319018405, | |
| "grad_norm": 0.21158595383167267, | |
| "learning_rate": 7.99211509039397e-07, | |
| "loss": 0.4719, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 5.742331288343558, | |
| "grad_norm": 0.22054874897003174, | |
| "learning_rate": 7.916820859630031e-07, | |
| "loss": 0.4244, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 5.748466257668712, | |
| "grad_norm": 0.2146049439907074, | |
| "learning_rate": 7.841852480664414e-07, | |
| "loss": 0.419, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 5.754601226993865, | |
| "grad_norm": 0.20754463970661163, | |
| "learning_rate": 7.767210533980373e-07, | |
| "loss": 0.4288, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 5.7607361963190185, | |
| "grad_norm": 0.2121291607618332, | |
| "learning_rate": 7.692895597533584e-07, | |
| "loss": 0.4149, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 5.766871165644172, | |
| "grad_norm": 0.21436981856822968, | |
| "learning_rate": 7.618908246747686e-07, | |
| "loss": 0.4888, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 5.773006134969325, | |
| "grad_norm": 0.20958930253982544, | |
| "learning_rate": 7.54524905450979e-07, | |
| "loss": 0.423, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 5.779141104294479, | |
| "grad_norm": 0.20395760238170624, | |
| "learning_rate": 7.471918591166078e-07, | |
| "loss": 0.4115, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 5.785276073619632, | |
| "grad_norm": 0.18619965016841888, | |
| "learning_rate": 7.398917424517377e-07, | |
| "loss": 0.4395, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 5.791411042944786, | |
| "grad_norm": 0.18316420912742615, | |
| "learning_rate": 7.326246119814712e-07, | |
| "loss": 0.4201, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 5.7975460122699385, | |
| "grad_norm": 0.18815039098262787, | |
| "learning_rate": 7.253905239755021e-07, | |
| "loss": 0.4224, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 5.803680981595092, | |
| "grad_norm": 0.2220267653465271, | |
| "learning_rate": 7.181895344476747e-07, | |
| "loss": 0.4289, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 5.809815950920245, | |
| "grad_norm": 0.20367726683616638, | |
| "learning_rate": 7.110216991555457e-07, | |
| "loss": 0.4518, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 5.815950920245399, | |
| "grad_norm": 0.2034081518650055, | |
| "learning_rate": 7.038870735999631e-07, | |
| "loss": 0.4344, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 5.822085889570552, | |
| "grad_norm": 0.1995425820350647, | |
| "learning_rate": 6.96785713024628e-07, | |
| "loss": 0.4201, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 5.828220858895706, | |
| "grad_norm": 0.2018972784280777, | |
| "learning_rate": 6.897176724156663e-07, | |
| "loss": 0.4358, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 5.8343558282208585, | |
| "grad_norm": 0.20849905908107758, | |
| "learning_rate": 6.82683006501213e-07, | |
| "loss": 0.4192, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 5.840490797546012, | |
| "grad_norm": 0.18639759719371796, | |
| "learning_rate": 6.756817697509755e-07, | |
| "loss": 0.4372, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 5.846625766871165, | |
| "grad_norm": 0.19128410518169403, | |
| "learning_rate": 6.687140163758194e-07, | |
| "loss": 0.4123, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 5.852760736196319, | |
| "grad_norm": 0.21019329130649567, | |
| "learning_rate": 6.617798003273496e-07, | |
| "loss": 0.4193, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 5.858895705521473, | |
| "grad_norm": 0.17338204383850098, | |
| "learning_rate": 6.548791752974853e-07, | |
| "loss": 0.4051, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 5.865030674846626, | |
| "grad_norm": 0.20529429614543915, | |
| "learning_rate": 6.480121947180534e-07, | |
| "loss": 0.4616, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 5.871165644171779, | |
| "grad_norm": 0.19057132303714752, | |
| "learning_rate": 6.411789117603701e-07, | |
| "loss": 0.423, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 5.877300613496932, | |
| "grad_norm": 0.1914457529783249, | |
| "learning_rate": 6.343793793348247e-07, | |
| "loss": 0.4017, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 5.883435582822086, | |
| "grad_norm": 0.20379380881786346, | |
| "learning_rate": 6.276136500904823e-07, | |
| "loss": 0.4191, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 5.889570552147239, | |
| "grad_norm": 0.19549894332885742, | |
| "learning_rate": 6.208817764146596e-07, | |
| "loss": 0.4201, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 5.895705521472393, | |
| "grad_norm": 0.2176375687122345, | |
| "learning_rate": 6.141838104325376e-07, | |
| "loss": 0.443, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 5.901840490797546, | |
| "grad_norm": 0.18703949451446533, | |
| "learning_rate": 6.075198040067432e-07, | |
| "loss": 0.4184, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 5.9079754601226995, | |
| "grad_norm": 0.19391953945159912, | |
| "learning_rate": 6.00889808736953e-07, | |
| "loss": 0.4547, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 5.914110429447852, | |
| "grad_norm": 0.21159960329532623, | |
| "learning_rate": 5.942938759594952e-07, | |
| "loss": 0.4461, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 5.920245398773006, | |
| "grad_norm": 0.17359314858913422, | |
| "learning_rate": 5.877320567469514e-07, | |
| "loss": 0.3967, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 5.92638036809816, | |
| "grad_norm": 0.2088865041732788, | |
| "learning_rate": 5.812044019077578e-07, | |
| "loss": 0.4448, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 5.932515337423313, | |
| "grad_norm": 0.20267169177532196, | |
| "learning_rate": 5.747109619858176e-07, | |
| "loss": 0.4304, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 5.938650306748467, | |
| "grad_norm": 0.22262024879455566, | |
| "learning_rate": 5.682517872601034e-07, | |
| "loss": 0.44, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 5.9447852760736195, | |
| "grad_norm": 0.20060719549655914, | |
| "learning_rate": 5.618269277442723e-07, | |
| "loss": 0.4505, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 5.950920245398773, | |
| "grad_norm": 0.20760180056095123, | |
| "learning_rate": 5.554364331862799e-07, | |
| "loss": 0.4116, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 5.957055214723926, | |
| "grad_norm": 0.19337566196918488, | |
| "learning_rate": 5.490803530679883e-07, | |
| "loss": 0.4193, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 5.96319018404908, | |
| "grad_norm": 0.17379286885261536, | |
| "learning_rate": 5.427587366047893e-07, | |
| "loss": 0.428, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 5.969325153374233, | |
| "grad_norm": 0.19079700112342834, | |
| "learning_rate": 5.364716327452219e-07, | |
| "loss": 0.4439, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 5.975460122699387, | |
| "grad_norm": 0.21255679428577423, | |
| "learning_rate": 5.3021909017059e-07, | |
| "loss": 0.4143, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 5.9815950920245395, | |
| "grad_norm": 0.2059745490550995, | |
| "learning_rate": 5.240011572945896e-07, | |
| "loss": 0.4334, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 5.987730061349693, | |
| "grad_norm": 0.19311602413654327, | |
| "learning_rate": 5.178178822629348e-07, | |
| "loss": 0.421, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 5.993865030674847, | |
| "grad_norm": 0.20050852000713348, | |
| "learning_rate": 5.11669312952977e-07, | |
| "loss": 0.4219, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.19952666759490967, | |
| "learning_rate": 5.05555496973344e-07, | |
| "loss": 0.4253, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 6.006134969325154, | |
| "grad_norm": 0.19700153172016144, | |
| "learning_rate": 4.994764816635666e-07, | |
| "loss": 0.4002, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 6.012269938650307, | |
| "grad_norm": 0.19736388325691223, | |
| "learning_rate": 4.934323140937125e-07, | |
| "loss": 0.4436, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 6.0184049079754605, | |
| "grad_norm": 0.20198658108711243, | |
| "learning_rate": 4.874230410640207e-07, | |
| "loss": 0.4454, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 6.024539877300613, | |
| "grad_norm": 0.18840666115283966, | |
| "learning_rate": 4.814487091045405e-07, | |
| "loss": 0.448, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 6.030674846625767, | |
| "grad_norm": 0.18371936678886414, | |
| "learning_rate": 4.7550936447477215e-07, | |
| "loss": 0.3951, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 6.03680981595092, | |
| "grad_norm": 0.20922499895095825, | |
| "learning_rate": 4.6960505316330783e-07, | |
| "loss": 0.4124, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 6.042944785276074, | |
| "grad_norm": 0.1890077441930771, | |
| "learning_rate": 4.637358208874726e-07, | |
| "loss": 0.4318, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 6.049079754601227, | |
| "grad_norm": 0.1936153620481491, | |
| "learning_rate": 4.579017130929775e-07, | |
| "loss": 0.4279, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 6.0552147239263805, | |
| "grad_norm": 0.19301314651966095, | |
| "learning_rate": 4.521027749535578e-07, | |
| "loss": 0.4398, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 6.061349693251533, | |
| "grad_norm": 0.18920563161373138, | |
| "learning_rate": 4.463390513706317e-07, | |
| "loss": 0.4367, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 6.067484662576687, | |
| "grad_norm": 0.20188719034194946, | |
| "learning_rate": 4.406105869729532e-07, | |
| "loss": 0.4287, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 6.07361963190184, | |
| "grad_norm": 0.19266493618488312, | |
| "learning_rate": 4.3491742611625587e-07, | |
| "loss": 0.4405, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 6.079754601226994, | |
| "grad_norm": 0.1787448674440384, | |
| "learning_rate": 4.292596128829207e-07, | |
| "loss": 0.3901, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 6.085889570552148, | |
| "grad_norm": 0.20086398720741272, | |
| "learning_rate": 4.2363719108163113e-07, | |
| "loss": 0.3748, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 6.0920245398773005, | |
| "grad_norm": 0.2082991749048233, | |
| "learning_rate": 4.1805020424703024e-07, | |
| "loss": 0.4626, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 6.098159509202454, | |
| "grad_norm": 0.20025362074375153, | |
| "learning_rate": 4.124986956393895e-07, | |
| "loss": 0.466, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 6.104294478527607, | |
| "grad_norm": 0.18952858448028564, | |
| "learning_rate": 4.0698270824426846e-07, | |
| "loss": 0.4022, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 6.110429447852761, | |
| "grad_norm": 0.19780947268009186, | |
| "learning_rate": 4.0150228477218664e-07, | |
| "loss": 0.4298, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 6.116564417177914, | |
| "grad_norm": 0.19643534719944, | |
| "learning_rate": 3.960574676582901e-07, | |
| "loss": 0.4248, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 6.122699386503068, | |
| "grad_norm": 0.1984051913022995, | |
| "learning_rate": 3.906482990620236e-07, | |
| "loss": 0.4615, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 6.128834355828221, | |
| "grad_norm": 0.20450861752033234, | |
| "learning_rate": 3.8527482086680277e-07, | |
| "loss": 0.4138, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 6.134969325153374, | |
| "grad_norm": 0.1938575804233551, | |
| "learning_rate": 3.7993707467969267e-07, | |
| "loss": 0.4107, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 6.141104294478527, | |
| "grad_norm": 0.19877435266971588, | |
| "learning_rate": 3.746351018310812e-07, | |
| "loss": 0.4304, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 6.147239263803681, | |
| "grad_norm": 0.19607821106910706, | |
| "learning_rate": 3.693689433743658e-07, | |
| "loss": 0.4031, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 6.153374233128835, | |
| "grad_norm": 0.208278626203537, | |
| "learning_rate": 3.6413864008562785e-07, | |
| "loss": 0.4144, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 6.159509202453988, | |
| "grad_norm": 0.19486194849014282, | |
| "learning_rate": 3.589442324633224e-07, | |
| "loss": 0.4729, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 6.1656441717791415, | |
| "grad_norm": 0.1959027498960495, | |
| "learning_rate": 3.537857607279638e-07, | |
| "loss": 0.4195, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 6.171779141104294, | |
| "grad_norm": 0.2013474851846695, | |
| "learning_rate": 3.4866326482181025e-07, | |
| "loss": 0.4364, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 6.177914110429448, | |
| "grad_norm": 0.19486162066459656, | |
| "learning_rate": 3.4357678440856136e-07, | |
| "loss": 0.413, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 6.184049079754601, | |
| "grad_norm": 0.18348956108093262, | |
| "learning_rate": 3.385263588730464e-07, | |
| "loss": 0.427, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 6.190184049079755, | |
| "grad_norm": 0.18834654986858368, | |
| "learning_rate": 3.3351202732091693e-07, | |
| "loss": 0.4004, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 6.196319018404908, | |
| "grad_norm": 0.22560544312000275, | |
| "learning_rate": 3.2853382857835124e-07, | |
| "loss": 0.382, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 6.2024539877300615, | |
| "grad_norm": 0.19283907115459442, | |
| "learning_rate": 3.235918011917477e-07, | |
| "loss": 0.4918, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 6.208588957055214, | |
| "grad_norm": 0.21347446739673615, | |
| "learning_rate": 3.186859834274292e-07, | |
| "loss": 0.4492, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 6.214723926380368, | |
| "grad_norm": 0.2025686800479889, | |
| "learning_rate": 3.1381641327134616e-07, | |
| "loss": 0.4489, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 6.220858895705521, | |
| "grad_norm": 0.23214280605316162, | |
| "learning_rate": 3.0898312842878144e-07, | |
| "loss": 0.4648, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 6.226993865030675, | |
| "grad_norm": 0.19760558009147644, | |
| "learning_rate": 3.041861663240592e-07, | |
| "loss": 0.4261, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 6.233128834355828, | |
| "grad_norm": 0.18486978113651276, | |
| "learning_rate": 2.994255641002564e-07, | |
| "loss": 0.443, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 6.2392638036809815, | |
| "grad_norm": 0.2192317545413971, | |
| "learning_rate": 2.947013586189124e-07, | |
| "loss": 0.4466, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 6.245398773006135, | |
| "grad_norm": 0.22236081957817078, | |
| "learning_rate": 2.9001358645974696e-07, | |
| "loss": 0.401, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 6.251533742331288, | |
| "grad_norm": 0.1953679621219635, | |
| "learning_rate": 2.85362283920374e-07, | |
| "loss": 0.403, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 6.257668711656442, | |
| "grad_norm": 0.19780360162258148, | |
| "learning_rate": 2.8074748701601984e-07, | |
| "loss": 0.4384, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 6.263803680981595, | |
| "grad_norm": 0.20466431975364685, | |
| "learning_rate": 2.761692314792502e-07, | |
| "loss": 0.4312, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 6.269938650306749, | |
| "grad_norm": 0.18782320618629456, | |
| "learning_rate": 2.7162755275968513e-07, | |
| "loss": 0.4042, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 6.276073619631902, | |
| "grad_norm": 0.1981232464313507, | |
| "learning_rate": 2.6712248602373205e-07, | |
| "loss": 0.4474, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 6.282208588957055, | |
| "grad_norm": 0.2031874656677246, | |
| "learning_rate": 2.626540661543103e-07, | |
| "loss": 0.4364, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 6.288343558282208, | |
| "grad_norm": 0.18200956284999847, | |
| "learning_rate": 2.582223277505769e-07, | |
| "loss": 0.4289, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 6.294478527607362, | |
| "grad_norm": 0.18988868594169617, | |
| "learning_rate": 2.538273051276685e-07, | |
| "loss": 0.4263, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 6.300613496932515, | |
| "grad_norm": 0.1987670212984085, | |
| "learning_rate": 2.4946903231642727e-07, | |
| "loss": 0.4316, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 6.306748466257669, | |
| "grad_norm": 0.21598701179027557, | |
| "learning_rate": 2.451475430631384e-07, | |
| "loss": 0.4416, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 6.3128834355828225, | |
| "grad_norm": 0.20041204988956451, | |
| "learning_rate": 2.408628708292732e-07, | |
| "loss": 0.4592, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 6.319018404907975, | |
| "grad_norm": 0.22675997018814087, | |
| "learning_rate": 2.3661504879122554e-07, | |
| "loss": 0.4562, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 6.325153374233129, | |
| "grad_norm": 0.2256935089826584, | |
| "learning_rate": 2.3240410984005701e-07, | |
| "loss": 0.4044, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 6.331288343558282, | |
| "grad_norm": 0.21728554368019104, | |
| "learning_rate": 2.2823008658124425e-07, | |
| "loss": 0.3976, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 6.337423312883436, | |
| "grad_norm": 0.19935831427574158, | |
| "learning_rate": 2.2409301133441918e-07, | |
| "loss": 0.4648, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 6.343558282208589, | |
| "grad_norm": 0.1918664127588272, | |
| "learning_rate": 2.1999291613312824e-07, | |
| "loss": 0.4083, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 6.3496932515337425, | |
| "grad_norm": 0.18631911277770996, | |
| "learning_rate": 2.15929832724579e-07, | |
| "loss": 0.4585, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 6.355828220858895, | |
| "grad_norm": 0.20234107971191406, | |
| "learning_rate": 2.1190379256939342e-07, | |
| "loss": 0.4341, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 6.361963190184049, | |
| "grad_norm": 0.20566298067569733, | |
| "learning_rate": 2.0791482684136833e-07, | |
| "loss": 0.4054, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 6.368098159509202, | |
| "grad_norm": 0.19845980405807495, | |
| "learning_rate": 2.0396296642722856e-07, | |
| "loss": 0.4592, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 6.374233128834356, | |
| "grad_norm": 0.19033271074295044, | |
| "learning_rate": 2.0004824192639437e-07, | |
| "loss": 0.4201, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 6.38036809815951, | |
| "grad_norm": 0.20748300850391388, | |
| "learning_rate": 1.9617068365073987e-07, | |
| "loss": 0.4349, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 6.386503067484663, | |
| "grad_norm": 0.19087129831314087, | |
| "learning_rate": 1.9233032162435828e-07, | |
| "loss": 0.4122, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 6.392638036809816, | |
| "grad_norm": 0.2218639850616455, | |
| "learning_rate": 1.885271855833315e-07, | |
| "loss": 0.4133, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 6.398773006134969, | |
| "grad_norm": 0.19954140484333038, | |
| "learning_rate": 1.847613049755015e-07, | |
| "loss": 0.4433, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 6.404907975460123, | |
| "grad_norm": 0.18886399269104004, | |
| "learning_rate": 1.8103270896023427e-07, | |
| "loss": 0.3899, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 6.411042944785276, | |
| "grad_norm": 0.20953959226608276, | |
| "learning_rate": 1.7734142640820684e-07, | |
| "loss": 0.4357, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 6.41717791411043, | |
| "grad_norm": 0.20415045320987701, | |
| "learning_rate": 1.7368748590117058e-07, | |
| "loss": 0.434, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 6.423312883435583, | |
| "grad_norm": 0.2486049383878708, | |
| "learning_rate": 1.7007091573173818e-07, | |
| "loss": 0.4239, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 6.429447852760736, | |
| "grad_norm": 0.19526998698711395, | |
| "learning_rate": 1.6649174390316325e-07, | |
| "loss": 0.3995, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 6.435582822085889, | |
| "grad_norm": 0.19858838617801666, | |
| "learning_rate": 1.629499981291205e-07, | |
| "loss": 0.4451, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 6.441717791411043, | |
| "grad_norm": 0.1742226481437683, | |
| "learning_rate": 1.5944570583349416e-07, | |
| "loss": 0.4165, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 6.447852760736196, | |
| "grad_norm": 0.20162968337535858, | |
| "learning_rate": 1.5597889415016609e-07, | |
| "loss": 0.4152, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 6.45398773006135, | |
| "grad_norm": 0.22722604870796204, | |
| "learning_rate": 1.5254958992280022e-07, | |
| "loss": 0.4472, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 6.460122699386503, | |
| "grad_norm": 0.20080749690532684, | |
| "learning_rate": 1.4915781970464226e-07, | |
| "loss": 0.4206, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 6.466257668711656, | |
| "grad_norm": 0.20352134108543396, | |
| "learning_rate": 1.4580360975830988e-07, | |
| "loss": 0.4739, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 6.47239263803681, | |
| "grad_norm": 0.2271064966917038, | |
| "learning_rate": 1.4248698605558887e-07, | |
| "loss": 0.4051, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 6.478527607361963, | |
| "grad_norm": 0.21113221347332, | |
| "learning_rate": 1.3920797427723454e-07, | |
| "loss": 0.4652, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 6.484662576687117, | |
| "grad_norm": 0.19622337818145752, | |
| "learning_rate": 1.3596659981277016e-07, | |
| "loss": 0.4079, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 6.49079754601227, | |
| "grad_norm": 0.20004568994045258, | |
| "learning_rate": 1.3276288776029267e-07, | |
| "loss": 0.437, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 6.4969325153374236, | |
| "grad_norm": 0.19560450315475464, | |
| "learning_rate": 1.2959686292627782e-07, | |
| "loss": 0.4172, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 6.5030674846625764, | |
| "grad_norm": 0.20478709042072296, | |
| "learning_rate": 1.2646854982538593e-07, | |
| "loss": 0.4426, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 6.50920245398773, | |
| "grad_norm": 0.20099417865276337, | |
| "learning_rate": 1.2337797268027475e-07, | |
| "loss": 0.4246, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 6.515337423312883, | |
| "grad_norm": 0.19599393010139465, | |
| "learning_rate": 1.2032515542141188e-07, | |
| "loss": 0.4089, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 6.521472392638037, | |
| "grad_norm": 0.20901450514793396, | |
| "learning_rate": 1.1731012168688715e-07, | |
| "loss": 0.4031, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 6.52760736196319, | |
| "grad_norm": 0.20133835077285767, | |
| "learning_rate": 1.1433289482223276e-07, | |
| "loss": 0.429, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 6.533742331288344, | |
| "grad_norm": 0.2047884315252304, | |
| "learning_rate": 1.1139349788023779e-07, | |
| "loss": 0.4024, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 6.539877300613497, | |
| "grad_norm": 0.18399380147457123, | |
| "learning_rate": 1.084919536207757e-07, | |
| "loss": 0.4377, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 6.54601226993865, | |
| "grad_norm": 0.18939992785453796, | |
| "learning_rate": 1.0562828451062323e-07, | |
| "loss": 0.4141, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 6.552147239263804, | |
| "grad_norm": 0.21095708012580872, | |
| "learning_rate": 1.0280251272328956e-07, | |
| "loss": 0.4187, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 6.558282208588957, | |
| "grad_norm": 0.20305517315864563, | |
| "learning_rate": 1.0001466013884131e-07, | |
| "loss": 0.4238, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 6.564417177914111, | |
| "grad_norm": 0.20672884583473206, | |
| "learning_rate": 9.726474834373833e-08, | |
| "loss": 0.3993, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 6.570552147239264, | |
| "grad_norm": 0.19539766013622284, | |
| "learning_rate": 9.455279863065936e-08, | |
| "loss": 0.422, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 6.576687116564417, | |
| "grad_norm": 0.2007475346326828, | |
| "learning_rate": 9.187883199834491e-08, | |
| "loss": 0.3905, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 6.58282208588957, | |
| "grad_norm": 0.2838771641254425, | |
| "learning_rate": 8.924286915142854e-08, | |
| "loss": 0.4672, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 6.588957055214724, | |
| "grad_norm": 0.19780975580215454, | |
| "learning_rate": 8.664493050028033e-08, | |
| "loss": 0.4534, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 6.595092024539877, | |
| "grad_norm": 0.18463049829006195, | |
| "learning_rate": 8.40850361608464e-08, | |
| "loss": 0.4187, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 6.601226993865031, | |
| "grad_norm": 0.20436058938503265, | |
| "learning_rate": 8.156320595449463e-08, | |
| "loss": 0.4615, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 6.6073619631901845, | |
| "grad_norm": 0.1944579929113388, | |
| "learning_rate": 7.907945940786033e-08, | |
| "loss": 0.4576, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 6.613496932515337, | |
| "grad_norm": 0.20546551048755646, | |
| "learning_rate": 7.66338157526969e-08, | |
| "loss": 0.4644, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 6.61963190184049, | |
| "grad_norm": 0.1846093386411667, | |
| "learning_rate": 7.422629392572323e-08, | |
| "loss": 0.4321, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 6.625766871165644, | |
| "grad_norm": 0.1991308182477951, | |
| "learning_rate": 7.185691256848093e-08, | |
| "loss": 0.441, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 6.631901840490798, | |
| "grad_norm": 0.1854790449142456, | |
| "learning_rate": 6.95256900271879e-08, | |
| "loss": 0.4384, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 6.638036809815951, | |
| "grad_norm": 0.20065605640411377, | |
| "learning_rate": 6.723264435259725e-08, | |
| "loss": 0.4157, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 6.644171779141105, | |
| "grad_norm": 0.1937716007232666, | |
| "learning_rate": 6.497779329985631e-08, | |
| "loss": 0.4157, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 6.6503067484662575, | |
| "grad_norm": 0.18098995089530945, | |
| "learning_rate": 6.27611543283696e-08, | |
| "loss": 0.4376, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 6.656441717791411, | |
| "grad_norm": 0.1942816972732544, | |
| "learning_rate": 6.058274460166547e-08, | |
| "loss": 0.4482, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 6.662576687116564, | |
| "grad_norm": 0.1918404996395111, | |
| "learning_rate": 5.8442580987259637e-08, | |
| "loss": 0.41, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 6.668711656441718, | |
| "grad_norm": 0.20013810694217682, | |
| "learning_rate": 5.634068005652804e-08, | |
| "loss": 0.383, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 6.674846625766871, | |
| "grad_norm": 0.1878250539302826, | |
| "learning_rate": 5.4277058084576394e-08, | |
| "loss": 0.4637, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 6.680981595092025, | |
| "grad_norm": 0.20763535797595978, | |
| "learning_rate": 5.225173105011583e-08, | |
| "loss": 0.4177, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 6.6871165644171775, | |
| "grad_norm": 0.19868603348731995, | |
| "learning_rate": 5.026471463533578e-08, | |
| "loss": 0.405, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 6.693251533742331, | |
| "grad_norm": 0.19065415859222412, | |
| "learning_rate": 4.831602422578852e-08, | |
| "loss": 0.3965, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 6.699386503067485, | |
| "grad_norm": 0.2003355175256729, | |
| "learning_rate": 4.640567491026316e-08, | |
| "loss": 0.4255, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 6.705521472392638, | |
| "grad_norm": 0.22053086757659912, | |
| "learning_rate": 4.453368148067405e-08, | |
| "loss": 0.4219, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 6.711656441717792, | |
| "grad_norm": 0.18970713019371033, | |
| "learning_rate": 4.2700058431943694e-08, | |
| "loss": 0.445, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 6.717791411042945, | |
| "grad_norm": 0.19756844639778137, | |
| "learning_rate": 4.090481996189166e-08, | |
| "loss": 0.4183, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 6.723926380368098, | |
| "grad_norm": 0.20232686400413513, | |
| "learning_rate": 3.914797997112307e-08, | |
| "loss": 0.42, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 6.730061349693251, | |
| "grad_norm": 0.1890253722667694, | |
| "learning_rate": 3.7429552062923644e-08, | |
| "loss": 0.4202, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 6.736196319018405, | |
| "grad_norm": 0.18478119373321533, | |
| "learning_rate": 3.574954954315091e-08, | |
| "loss": 0.3997, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 6.742331288343558, | |
| "grad_norm": 0.18402665853500366, | |
| "learning_rate": 3.410798542013483e-08, | |
| "loss": 0.4306, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 6.748466257668712, | |
| "grad_norm": 0.2112373560667038, | |
| "learning_rate": 3.25048724045729e-08, | |
| "loss": 0.4423, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 6.754601226993865, | |
| "grad_norm": 0.18628022074699402, | |
| "learning_rate": 3.0940222909437434e-08, | |
| "loss": 0.4172, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 6.7607361963190185, | |
| "grad_norm": 0.18921811878681183, | |
| "learning_rate": 2.9414049049872883e-08, | |
| "loss": 0.4129, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 6.766871165644172, | |
| "grad_norm": 0.20240849256515503, | |
| "learning_rate": 2.7926362643106997e-08, | |
| "loss": 0.4621, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 6.773006134969325, | |
| "grad_norm": 0.18727080523967743, | |
| "learning_rate": 2.647717520835702e-08, | |
| "loss": 0.4166, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 6.779141104294479, | |
| "grad_norm": 0.19762536883354187, | |
| "learning_rate": 2.5066497966741987e-08, | |
| "loss": 0.4223, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 6.785276073619632, | |
| "grad_norm": 0.2059212028980255, | |
| "learning_rate": 2.3694341841193902e-08, | |
| "loss": 0.4323, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 6.791411042944786, | |
| "grad_norm": 0.21039064228534698, | |
| "learning_rate": 2.236071745637336e-08, | |
| "loss": 0.4219, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 6.7975460122699385, | |
| "grad_norm": 0.1902393400669098, | |
| "learning_rate": 2.1065635138590724e-08, | |
| "loss": 0.4157, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 6.803680981595092, | |
| "grad_norm": 0.20540878176689148, | |
| "learning_rate": 1.980910491572119e-08, | |
| "loss": 0.4339, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 6.809815950920245, | |
| "grad_norm": 0.22091785073280334, | |
| "learning_rate": 1.8591136517130404e-08, | |
| "loss": 0.4306, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 6.815950920245399, | |
| "grad_norm": 0.20457226037979126, | |
| "learning_rate": 1.7411739373598413e-08, | |
| "loss": 0.4332, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 6.822085889570552, | |
| "grad_norm": 0.23524275422096252, | |
| "learning_rate": 1.627092261724583e-08, | |
| "loss": 0.4358, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 6.828220858895706, | |
| "grad_norm": 0.18031160533428192, | |
| "learning_rate": 1.5168695081463904e-08, | |
| "loss": 0.4402, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 6.8343558282208585, | |
| "grad_norm": 0.21345287561416626, | |
| "learning_rate": 1.410506530084621e-08, | |
| "loss": 0.4317, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 6.840490797546012, | |
| "grad_norm": 0.2095663696527481, | |
| "learning_rate": 1.3080041511122077e-08, | |
| "loss": 0.4239, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 6.846625766871165, | |
| "grad_norm": 0.2007758915424347, | |
| "learning_rate": 1.2093631649093828e-08, | |
| "loss": 0.3961, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 6.852760736196319, | |
| "grad_norm": 0.20119042694568634, | |
| "learning_rate": 1.1145843352572406e-08, | |
| "loss": 0.441, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 6.858895705521473, | |
| "grad_norm": 0.18511272966861725, | |
| "learning_rate": 1.0236683960323512e-08, | |
| "loss": 0.4392, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 6.865030674846626, | |
| "grad_norm": 0.198281928896904, | |
| "learning_rate": 9.36616051200434e-09, | |
| "loss": 0.4244, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 6.871165644171779, | |
| "grad_norm": 0.18932987749576569, | |
| "learning_rate": 8.53427974811527e-09, | |
| "loss": 0.4387, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 6.877300613496932, | |
| "grad_norm": 0.19044747948646545, | |
| "learning_rate": 7.74104810994325e-09, | |
| "loss": 0.4252, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 6.883435582822086, | |
| "grad_norm": 0.1831924319267273, | |
| "learning_rate": 6.986471739513501e-09, | |
| "loss": 0.4281, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 6.889570552147239, | |
| "grad_norm": 0.18994773924350739, | |
| "learning_rate": 6.270556479541778e-09, | |
| "loss": 0.4423, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 6.895705521472393, | |
| "grad_norm": 0.20347394049167633, | |
| "learning_rate": 5.593307873389963e-09, | |
| "loss": 0.4119, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 6.901840490797546, | |
| "grad_norm": 0.19256287813186646, | |
| "learning_rate": 4.954731165022209e-09, | |
| "loss": 0.4218, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 6.9079754601226995, | |
| "grad_norm": 0.2047315537929535, | |
| "learning_rate": 4.354831298963858e-09, | |
| "loss": 0.428, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 6.914110429447852, | |
| "grad_norm": 0.2375705987215042, | |
| "learning_rate": 3.7936129202648106e-09, | |
| "loss": 0.4402, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 6.920245398773006, | |
| "grad_norm": 0.19552327692508698, | |
| "learning_rate": 3.271080374462332e-09, | |
| "loss": 0.4127, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 6.92638036809816, | |
| "grad_norm": 0.22805239260196686, | |
| "learning_rate": 2.7872377075471856e-09, | |
| "loss": 0.4411, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 6.932515337423313, | |
| "grad_norm": 0.2000674456357956, | |
| "learning_rate": 2.3420886659331067e-09, | |
| "loss": 0.4226, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 6.938650306748467, | |
| "grad_norm": 0.22863265872001648, | |
| "learning_rate": 1.9356366964279338e-09, | |
| "loss": 0.4253, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 6.9447852760736195, | |
| "grad_norm": 0.20869435369968414, | |
| "learning_rate": 1.5678849462058554e-09, | |
| "loss": 0.4374, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 6.950920245398773, | |
| "grad_norm": 0.20107249915599823, | |
| "learning_rate": 1.2388362627840934e-09, | |
| "loss": 0.4414, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 6.957055214723926, | |
| "grad_norm": 0.1913885623216629, | |
| "learning_rate": 9.484931940001442e-10, | |
| "loss": 0.4179, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 6.96319018404908, | |
| "grad_norm": 0.19177880883216858, | |
| "learning_rate": 6.968579879923498e-10, | |
| "loss": 0.438, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 6.969325153374233, | |
| "grad_norm": 0.21289180219173431, | |
| "learning_rate": 4.8393259318269e-10, | |
| "loss": 0.4331, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 6.975460122699387, | |
| "grad_norm": 0.20402106642723083, | |
| "learning_rate": 3.097186582606826e-10, | |
| "loss": 0.4162, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 6.9815950920245395, | |
| "grad_norm": 0.19780924916267395, | |
| "learning_rate": 1.7421753217283788e-10, | |
| "loss": 0.4331, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 6.987730061349693, | |
| "grad_norm": 0.19273407757282257, | |
| "learning_rate": 7.743026410989007e-11, | |
| "loss": 0.3837, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 6.993865030674847, | |
| "grad_norm": 0.19189181923866272, | |
| "learning_rate": 1.9357603499026157e-11, | |
| "loss": 0.4353, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.21704550087451935, | |
| "learning_rate": 0.0, | |
| "loss": 0.4444, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "step": 1141, | |
| "total_flos": 2.996752297507999e+19, | |
| "train_loss": 0.44955052823272323, | |
| "train_runtime": 45593.8747, | |
| "train_samples_per_second": 3.202, | |
| "train_steps_per_second": 0.025 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1141, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 1000000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.996752297507999e+19, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |