| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9999607119003654, | |
| "eval_steps": 500, | |
| "global_step": 6363, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00015715239853848268, | |
| "grad_norm": 29.15457534790039, | |
| "learning_rate": 1.5698587127158556e-06, | |
| "loss": 5.9252, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0007857619926924135, | |
| "grad_norm": 24.992176055908203, | |
| "learning_rate": 7.849293563579277e-06, | |
| "loss": 5.9618, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.001571523985384827, | |
| "grad_norm": 25.485349655151367, | |
| "learning_rate": 1.5698587127158555e-05, | |
| "loss": 5.9265, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0023572859780772405, | |
| "grad_norm": 20.05711555480957, | |
| "learning_rate": 2.3547880690737836e-05, | |
| "loss": 5.9391, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.003143047970769654, | |
| "grad_norm": 8.419732093811035, | |
| "learning_rate": 3.139717425431711e-05, | |
| "loss": 5.6583, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.003928809963462067, | |
| "grad_norm": 7.963233470916748, | |
| "learning_rate": 3.924646781789639e-05, | |
| "loss": 5.4406, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.004714571956154481, | |
| "grad_norm": 4.564691066741943, | |
| "learning_rate": 4.709576138147567e-05, | |
| "loss": 5.1726, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.005500333948846894, | |
| "grad_norm": 4.1633124351501465, | |
| "learning_rate": 5.4945054945054945e-05, | |
| "loss": 4.9458, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.006286095941539308, | |
| "grad_norm": 3.9529995918273926, | |
| "learning_rate": 6.279434850863422e-05, | |
| "loss": 4.6674, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0070718579342317215, | |
| "grad_norm": 3.318066120147705, | |
| "learning_rate": 7.06436420722135e-05, | |
| "loss": 4.4785, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.007857619926924134, | |
| "grad_norm": 3.131993055343628, | |
| "learning_rate": 7.849293563579278e-05, | |
| "loss": 4.3329, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.008643381919616548, | |
| "grad_norm": 2.6008388996124268, | |
| "learning_rate": 8.634222919937205e-05, | |
| "loss": 4.1992, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.009429143912308962, | |
| "grad_norm": 2.183000326156616, | |
| "learning_rate": 9.419152276295134e-05, | |
| "loss": 4.0823, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.010214905905001376, | |
| "grad_norm": 1.9434701204299927, | |
| "learning_rate": 0.00010204081632653062, | |
| "loss": 3.8471, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.011000667897693788, | |
| "grad_norm": 1.625468134880066, | |
| "learning_rate": 0.00010989010989010989, | |
| "loss": 3.6757, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.011786429890386202, | |
| "grad_norm": 1.4538207054138184, | |
| "learning_rate": 0.00011773940345368916, | |
| "loss": 3.5552, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.012572191883078615, | |
| "grad_norm": 1.18235182762146, | |
| "learning_rate": 0.00012558869701726844, | |
| "loss": 3.5154, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.01335795387577103, | |
| "grad_norm": 0.815110981464386, | |
| "learning_rate": 0.00013343799058084774, | |
| "loss": 3.3802, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.014143715868463443, | |
| "grad_norm": 0.662001371383667, | |
| "learning_rate": 0.000141287284144427, | |
| "loss": 3.2555, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.014929477861155857, | |
| "grad_norm": 0.5505642890930176, | |
| "learning_rate": 0.0001491365777080063, | |
| "loss": 3.0703, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.01571523985384827, | |
| "grad_norm": 0.4972209334373474, | |
| "learning_rate": 0.00015698587127158556, | |
| "loss": 3.0455, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.016501001846540683, | |
| "grad_norm": 0.427781879901886, | |
| "learning_rate": 0.00016483516483516484, | |
| "loss": 2.9642, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.017286763839233096, | |
| "grad_norm": 0.3651106655597687, | |
| "learning_rate": 0.0001726844583987441, | |
| "loss": 2.8522, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.01807252583192551, | |
| "grad_norm": 0.3085882067680359, | |
| "learning_rate": 0.00018053375196232338, | |
| "loss": 2.8798, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.018858287824617924, | |
| "grad_norm": 0.277054101228714, | |
| "learning_rate": 0.00018838304552590268, | |
| "loss": 2.8542, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.019644049817310338, | |
| "grad_norm": 0.2966115474700928, | |
| "learning_rate": 0.00019623233908948196, | |
| "loss": 2.8472, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.02042981181000275, | |
| "grad_norm": 0.3365791440010071, | |
| "learning_rate": 0.00020408163265306123, | |
| "loss": 2.7396, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.021215573802695165, | |
| "grad_norm": 0.2897559702396393, | |
| "learning_rate": 0.0002119309262166405, | |
| "loss": 2.7275, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.022001335795387576, | |
| "grad_norm": 0.3298715651035309, | |
| "learning_rate": 0.00021978021978021978, | |
| "loss": 2.6553, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.02278709778807999, | |
| "grad_norm": 0.4270775020122528, | |
| "learning_rate": 0.00022762951334379905, | |
| "loss": 2.6166, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.023572859780772403, | |
| "grad_norm": 0.29401084780693054, | |
| "learning_rate": 0.00023547880690737833, | |
| "loss": 2.5642, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.024358621773464817, | |
| "grad_norm": 0.2512624263763428, | |
| "learning_rate": 0.00024332810047095763, | |
| "loss": 2.5249, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.02514438376615723, | |
| "grad_norm": 0.27092209458351135, | |
| "learning_rate": 0.0002511773940345369, | |
| "loss": 2.5108, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.025930145758849645, | |
| "grad_norm": 0.2265215367078781, | |
| "learning_rate": 0.0002590266875981162, | |
| "loss": 2.5287, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.02671590775154206, | |
| "grad_norm": 0.2427075356245041, | |
| "learning_rate": 0.0002668759811616955, | |
| "loss": 2.5689, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.027501669744234472, | |
| "grad_norm": 0.20640349388122559, | |
| "learning_rate": 0.0002747252747252748, | |
| "loss": 2.5005, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.028287431736926886, | |
| "grad_norm": 0.2663057744503021, | |
| "learning_rate": 0.000282574568288854, | |
| "loss": 2.4339, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.0290731937296193, | |
| "grad_norm": 0.29693251848220825, | |
| "learning_rate": 0.00029042386185243333, | |
| "loss": 2.4894, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.029858955722311713, | |
| "grad_norm": 0.2689124643802643, | |
| "learning_rate": 0.0002982731554160126, | |
| "loss": 2.525, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.030644717715004124, | |
| "grad_norm": 0.23312485218048096, | |
| "learning_rate": 0.0003061224489795919, | |
| "loss": 2.4748, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.03143047970769654, | |
| "grad_norm": 0.21752777695655823, | |
| "learning_rate": 0.0003139717425431711, | |
| "loss": 2.4346, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.032216241700388955, | |
| "grad_norm": 0.18333539366722107, | |
| "learning_rate": 0.0003218210361067504, | |
| "loss": 2.3613, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.033002003693081365, | |
| "grad_norm": 0.2482461780309677, | |
| "learning_rate": 0.00032967032967032967, | |
| "loss": 2.3628, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.03378776568577378, | |
| "grad_norm": 0.36294370889663696, | |
| "learning_rate": 0.00033751962323390897, | |
| "loss": 2.4492, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.03457352767846619, | |
| "grad_norm": 0.2805122137069702, | |
| "learning_rate": 0.0003453689167974882, | |
| "loss": 2.376, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.0353592896711586, | |
| "grad_norm": 0.2519840598106384, | |
| "learning_rate": 0.0003532182103610675, | |
| "loss": 2.309, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.03614505166385102, | |
| "grad_norm": 0.19297201931476593, | |
| "learning_rate": 0.00036106750392464677, | |
| "loss": 2.299, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.03693081365654343, | |
| "grad_norm": 0.18939702212810516, | |
| "learning_rate": 0.00036891679748822607, | |
| "loss": 2.4143, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.03771657564923585, | |
| "grad_norm": 0.20321442186832428, | |
| "learning_rate": 0.00037676609105180537, | |
| "loss": 2.3443, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.03850233764192826, | |
| "grad_norm": 0.1922532320022583, | |
| "learning_rate": 0.00038461538461538467, | |
| "loss": 2.2984, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.039288099634620675, | |
| "grad_norm": 0.2797047197818756, | |
| "learning_rate": 0.0003924646781789639, | |
| "loss": 2.2656, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.040073861627313086, | |
| "grad_norm": 0.21200767159461975, | |
| "learning_rate": 0.0004003139717425432, | |
| "loss": 2.285, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.0408596236200055, | |
| "grad_norm": 0.21136586368083954, | |
| "learning_rate": 0.00040816326530612246, | |
| "loss": 2.284, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.04164538561269791, | |
| "grad_norm": 0.16410328447818756, | |
| "learning_rate": 0.00041601255886970177, | |
| "loss": 2.1946, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.04243114760539033, | |
| "grad_norm": 0.2457076907157898, | |
| "learning_rate": 0.000423861852433281, | |
| "loss": 2.332, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.04321690959808274, | |
| "grad_norm": 0.23471523821353912, | |
| "learning_rate": 0.0004317111459968603, | |
| "loss": 2.2363, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.04400267159077515, | |
| "grad_norm": 0.1952430009841919, | |
| "learning_rate": 0.00043956043956043956, | |
| "loss": 2.2182, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.04478843358346757, | |
| "grad_norm": 0.23113080859184265, | |
| "learning_rate": 0.00044740973312401886, | |
| "loss": 2.2092, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.04557419557615998, | |
| "grad_norm": 0.18108302354812622, | |
| "learning_rate": 0.0004552590266875981, | |
| "loss": 2.2655, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.046359957568852396, | |
| "grad_norm": 0.15540535748004913, | |
| "learning_rate": 0.0004631083202511774, | |
| "loss": 2.1482, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.047145719561544806, | |
| "grad_norm": 0.18835408985614777, | |
| "learning_rate": 0.00047095761381475666, | |
| "loss": 2.2144, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.047931481554237224, | |
| "grad_norm": 0.2090383619070053, | |
| "learning_rate": 0.000478806907378336, | |
| "loss": 2.1736, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.048717243546929634, | |
| "grad_norm": 0.2130228579044342, | |
| "learning_rate": 0.00048665620094191526, | |
| "loss": 2.2297, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.04950300553962205, | |
| "grad_norm": 0.2176492065191269, | |
| "learning_rate": 0.0004945054945054945, | |
| "loss": 2.171, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.05028876753231446, | |
| "grad_norm": 0.15854674577713013, | |
| "learning_rate": 0.0005023547880690738, | |
| "loss": 2.2807, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.05107452952500688, | |
| "grad_norm": 0.2400057166814804, | |
| "learning_rate": 0.0005102040816326531, | |
| "loss": 2.2298, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.05186029151769929, | |
| "grad_norm": 0.18969252705574036, | |
| "learning_rate": 0.0005180533751962324, | |
| "loss": 2.1822, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.0526460535103917, | |
| "grad_norm": 0.35932809114456177, | |
| "learning_rate": 0.0005259026687598116, | |
| "loss": 2.2482, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.05343181550308412, | |
| "grad_norm": 0.21439987421035767, | |
| "learning_rate": 0.000533751962323391, | |
| "loss": 2.2391, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.05421757749577653, | |
| "grad_norm": 0.2086169719696045, | |
| "learning_rate": 0.0005416012558869702, | |
| "loss": 2.1001, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.055003339488468944, | |
| "grad_norm": 0.20357146859169006, | |
| "learning_rate": 0.0005494505494505496, | |
| "loss": 2.1642, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.055789101481161354, | |
| "grad_norm": 0.25846609473228455, | |
| "learning_rate": 0.0005572998430141287, | |
| "loss": 2.2399, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.05657486347385377, | |
| "grad_norm": 0.22251342236995697, | |
| "learning_rate": 0.000565149136577708, | |
| "loss": 2.1899, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.05736062546654618, | |
| "grad_norm": 0.15347051620483398, | |
| "learning_rate": 0.0005729984301412873, | |
| "loss": 2.202, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.0581463874592386, | |
| "grad_norm": 0.2482958287000656, | |
| "learning_rate": 0.0005808477237048667, | |
| "loss": 2.181, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.05893214945193101, | |
| "grad_norm": 0.18029803037643433, | |
| "learning_rate": 0.0005886970172684458, | |
| "loss": 2.1423, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.05971791144462343, | |
| "grad_norm": 0.20792289078235626, | |
| "learning_rate": 0.0005965463108320251, | |
| "loss": 2.1015, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.06050367343731584, | |
| "grad_norm": 0.1740165799856186, | |
| "learning_rate": 0.0006043956043956044, | |
| "loss": 2.0581, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.06128943543000825, | |
| "grad_norm": 0.22941261529922485, | |
| "learning_rate": 0.0006122448979591838, | |
| "loss": 2.1631, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.062075197422700665, | |
| "grad_norm": 0.1646764874458313, | |
| "learning_rate": 0.0006200941915227629, | |
| "loss": 2.142, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.06286095941539308, | |
| "grad_norm": 0.2238229662179947, | |
| "learning_rate": 0.0006279434850863422, | |
| "loss": 2.1254, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.06364672140808549, | |
| "grad_norm": 0.19127151370048523, | |
| "learning_rate": 0.0006357927786499215, | |
| "loss": 2.1019, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.06443248340077791, | |
| "grad_norm": 0.19444602727890015, | |
| "learning_rate": 0.0006436420722135008, | |
| "loss": 2.0932, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.06521824539347032, | |
| "grad_norm": 0.20162132382392883, | |
| "learning_rate": 0.0006514913657770801, | |
| "loss": 2.0709, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.06600400738616273, | |
| "grad_norm": 0.24843716621398926, | |
| "learning_rate": 0.0006593406593406593, | |
| "loss": 2.0435, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.06678976937885514, | |
| "grad_norm": 0.19505095481872559, | |
| "learning_rate": 0.0006671899529042387, | |
| "loss": 2.1971, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.06757553137154756, | |
| "grad_norm": 0.16956081986427307, | |
| "learning_rate": 0.0006750392464678179, | |
| "loss": 2.0889, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.06836129336423998, | |
| "grad_norm": 0.16742552816867828, | |
| "learning_rate": 0.0006828885400313972, | |
| "loss": 2.0928, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.06914705535693239, | |
| "grad_norm": 0.20322011411190033, | |
| "learning_rate": 0.0006907378335949764, | |
| "loss": 2.0823, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.0699328173496248, | |
| "grad_norm": 0.20075753331184387, | |
| "learning_rate": 0.0006985871271585558, | |
| "loss": 2.0312, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.0707185793423172, | |
| "grad_norm": 0.26476067304611206, | |
| "learning_rate": 0.000706436420722135, | |
| "loss": 2.1047, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.07150434133500963, | |
| "grad_norm": 0.1607220470905304, | |
| "learning_rate": 0.0007142857142857143, | |
| "loss": 2.0503, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.07229010332770204, | |
| "grad_norm": 0.25718483328819275, | |
| "learning_rate": 0.0007221350078492935, | |
| "loss": 2.0814, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.07307586532039445, | |
| "grad_norm": 0.19512560963630676, | |
| "learning_rate": 0.0007299843014128729, | |
| "loss": 2.0276, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.07386162731308686, | |
| "grad_norm": 0.5019248127937317, | |
| "learning_rate": 0.0007378335949764521, | |
| "loss": 1.9824, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.07464738930577929, | |
| "grad_norm": 0.21145927906036377, | |
| "learning_rate": 0.0007456828885400314, | |
| "loss": 2.0159, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.0754331512984717, | |
| "grad_norm": 0.19788451492786407, | |
| "learning_rate": 0.0007535321821036107, | |
| "loss": 2.1007, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.0762189132911641, | |
| "grad_norm": 0.1825549602508545, | |
| "learning_rate": 0.00076138147566719, | |
| "loss": 2.0288, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.07700467528385652, | |
| "grad_norm": 0.1614513248205185, | |
| "learning_rate": 0.0007692307692307693, | |
| "loss": 2.0782, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.07779043727654893, | |
| "grad_norm": 0.15320807695388794, | |
| "learning_rate": 0.0007770800627943485, | |
| "loss": 1.9095, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.07857619926924135, | |
| "grad_norm": 0.18029426038265228, | |
| "learning_rate": 0.0007849293563579278, | |
| "loss": 2.0567, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.07936196126193376, | |
| "grad_norm": 0.23825803399085999, | |
| "learning_rate": 0.0007927786499215071, | |
| "loss": 1.9964, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.08014772325462617, | |
| "grad_norm": 0.18158216774463654, | |
| "learning_rate": 0.0008006279434850864, | |
| "loss": 1.9442, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.08093348524731858, | |
| "grad_norm": 0.22512096166610718, | |
| "learning_rate": 0.0008084772370486656, | |
| "loss": 2.0562, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.081719247240011, | |
| "grad_norm": 0.19693829119205475, | |
| "learning_rate": 0.0008163265306122449, | |
| "loss": 2.0231, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.08250500923270342, | |
| "grad_norm": 0.17301593720912933, | |
| "learning_rate": 0.0008241758241758242, | |
| "loss": 1.9871, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.08329077122539583, | |
| "grad_norm": 0.22018562257289886, | |
| "learning_rate": 0.0008320251177394035, | |
| "loss": 2.0008, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.08407653321808824, | |
| "grad_norm": 0.23574061691761017, | |
| "learning_rate": 0.0008398744113029827, | |
| "loss": 1.9259, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.08486229521078066, | |
| "grad_norm": 0.16205668449401855, | |
| "learning_rate": 0.000847723704866562, | |
| "loss": 1.9598, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.08564805720347307, | |
| "grad_norm": 0.19473537802696228, | |
| "learning_rate": 0.0008555729984301414, | |
| "loss": 1.9231, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.08643381919616548, | |
| "grad_norm": 0.205925852060318, | |
| "learning_rate": 0.0008634222919937206, | |
| "loss": 1.9964, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.08721958118885789, | |
| "grad_norm": 0.27399733662605286, | |
| "learning_rate": 0.0008712715855572999, | |
| "loss": 1.9949, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.0880053431815503, | |
| "grad_norm": 0.18213149905204773, | |
| "learning_rate": 0.0008791208791208791, | |
| "loss": 1.9875, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.08879110517424273, | |
| "grad_norm": 0.27155083417892456, | |
| "learning_rate": 0.0008869701726844585, | |
| "loss": 1.9682, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.08957686716693514, | |
| "grad_norm": 0.163107231259346, | |
| "learning_rate": 0.0008948194662480377, | |
| "loss": 1.9654, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.09036262915962755, | |
| "grad_norm": 0.31603917479515076, | |
| "learning_rate": 0.000902668759811617, | |
| "loss": 1.9086, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.09114839115231996, | |
| "grad_norm": 0.3522120416164398, | |
| "learning_rate": 0.0009105180533751962, | |
| "loss": 2.0186, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.09193415314501238, | |
| "grad_norm": 0.16749052703380585, | |
| "learning_rate": 0.0009183673469387756, | |
| "loss": 1.9922, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.09271991513770479, | |
| "grad_norm": 0.18135912716388702, | |
| "learning_rate": 0.0009262166405023548, | |
| "loss": 2.018, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.0935056771303972, | |
| "grad_norm": 0.26006466150283813, | |
| "learning_rate": 0.0009340659340659341, | |
| "loss": 1.9165, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.09429143912308961, | |
| "grad_norm": 0.1768893599510193, | |
| "learning_rate": 0.0009419152276295133, | |
| "loss": 1.9371, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.09507720111578202, | |
| "grad_norm": 0.2588590681552887, | |
| "learning_rate": 0.0009497645211930927, | |
| "loss": 1.9935, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.09586296310847445, | |
| "grad_norm": 0.20219220221042633, | |
| "learning_rate": 0.000957613814756672, | |
| "loss": 1.9188, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.09664872510116686, | |
| "grad_norm": 0.21311190724372864, | |
| "learning_rate": 0.0009654631083202512, | |
| "loss": 1.9969, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.09743448709385927, | |
| "grad_norm": 0.2351716160774231, | |
| "learning_rate": 0.0009733124018838305, | |
| "loss": 1.9984, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.09822024908655168, | |
| "grad_norm": 0.1859682947397232, | |
| "learning_rate": 0.0009811616954474097, | |
| "loss": 1.9381, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.0990060110792441, | |
| "grad_norm": 0.15183129906654358, | |
| "learning_rate": 0.000989010989010989, | |
| "loss": 1.9573, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.09979177307193651, | |
| "grad_norm": 0.25219467282295227, | |
| "learning_rate": 0.0009968602825745684, | |
| "loss": 1.9229, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.10057753506462892, | |
| "grad_norm": 0.1858813762664795, | |
| "learning_rate": 0.0009999993227024916, | |
| "loss": 1.9226, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.10136329705732133, | |
| "grad_norm": 0.16762517392635345, | |
| "learning_rate": 0.0009999951836688061, | |
| "loss": 1.9119, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.10214905905001376, | |
| "grad_norm": 0.28066954016685486, | |
| "learning_rate": 0.00099998728190894, | |
| "loss": 2.0073, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.10293482104270617, | |
| "grad_norm": 0.21832288801670074, | |
| "learning_rate": 0.0009999756174823573, | |
| "loss": 1.9843, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.10372058303539858, | |
| "grad_norm": 0.1832871437072754, | |
| "learning_rate": 0.0009999601904768399, | |
| "loss": 1.8482, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.10450634502809099, | |
| "grad_norm": 0.18006999790668488, | |
| "learning_rate": 0.0009999410010084833, | |
| "loss": 1.846, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.1052921070207834, | |
| "grad_norm": 0.18294790387153625, | |
| "learning_rate": 0.0009999180492216988, | |
| "loss": 1.9233, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.10607786901347582, | |
| "grad_norm": 0.17135338485240936, | |
| "learning_rate": 0.0009998913352892106, | |
| "loss": 1.8711, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.10686363100616823, | |
| "grad_norm": 0.1738177239894867, | |
| "learning_rate": 0.0009998608594120547, | |
| "loss": 1.872, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.10764939299886064, | |
| "grad_norm": 0.16996446251869202, | |
| "learning_rate": 0.0009998266218195786, | |
| "loss": 1.9671, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.10843515499155305, | |
| "grad_norm": 0.17967042326927185, | |
| "learning_rate": 0.000999788622769438, | |
| "loss": 1.8942, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.10922091698424548, | |
| "grad_norm": 0.2092641294002533, | |
| "learning_rate": 0.0009997468625475953, | |
| "loss": 1.919, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.11000667897693789, | |
| "grad_norm": 0.16578802466392517, | |
| "learning_rate": 0.0009997013414683184, | |
| "loss": 1.8921, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.1107924409696303, | |
| "grad_norm": 0.15281111001968384, | |
| "learning_rate": 0.0009996520598741774, | |
| "loss": 1.9357, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.11157820296232271, | |
| "grad_norm": 0.16658322513103485, | |
| "learning_rate": 0.000999599018136042, | |
| "loss": 1.97, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.11236396495501512, | |
| "grad_norm": 0.18053396046161652, | |
| "learning_rate": 0.0009995422166530791, | |
| "loss": 1.8677, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.11314972694770754, | |
| "grad_norm": 0.1864767223596573, | |
| "learning_rate": 0.0009994816558527497, | |
| "loss": 1.8493, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.11393548894039995, | |
| "grad_norm": 0.27378034591674805, | |
| "learning_rate": 0.000999417336190806, | |
| "loss": 1.9325, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.11472125093309236, | |
| "grad_norm": 0.29733896255493164, | |
| "learning_rate": 0.0009993492581512864, | |
| "loss": 1.8759, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.11550701292578477, | |
| "grad_norm": 0.25246742367744446, | |
| "learning_rate": 0.0009992774222465147, | |
| "loss": 1.9363, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.1162927749184772, | |
| "grad_norm": 0.17535606026649475, | |
| "learning_rate": 0.0009992018290170932, | |
| "loss": 1.848, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.11707853691116961, | |
| "grad_norm": 0.16660931706428528, | |
| "learning_rate": 0.0009991224790319008, | |
| "loss": 1.8419, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.11786429890386202, | |
| "grad_norm": 0.18775136768817902, | |
| "learning_rate": 0.000999039372888088, | |
| "loss": 1.8606, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.11865006089655443, | |
| "grad_norm": 0.16369999945163727, | |
| "learning_rate": 0.000998952511211072, | |
| "loss": 1.9784, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.11943582288924685, | |
| "grad_norm": 0.18088851869106293, | |
| "learning_rate": 0.0009988618946545325, | |
| "loss": 1.8971, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.12022158488193926, | |
| "grad_norm": 0.15194383263587952, | |
| "learning_rate": 0.0009987675239004066, | |
| "loss": 1.8897, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.12100734687463167, | |
| "grad_norm": 0.16550414264202118, | |
| "learning_rate": 0.0009986693996588837, | |
| "loss": 1.9305, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.12179310886732408, | |
| "grad_norm": 0.1574241816997528, | |
| "learning_rate": 0.0009985675226684004, | |
| "loss": 1.905, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.1225788708600165, | |
| "grad_norm": 0.1789279580116272, | |
| "learning_rate": 0.0009984618936956344, | |
| "loss": 1.9603, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.12336463285270892, | |
| "grad_norm": 0.17177298665046692, | |
| "learning_rate": 0.0009983525135354994, | |
| "loss": 1.9059, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.12415039484540133, | |
| "grad_norm": 1.0995100736618042, | |
| "learning_rate": 0.000998239383011138, | |
| "loss": 1.8245, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.12493615683809374, | |
| "grad_norm": 0.1880197674036026, | |
| "learning_rate": 0.0009981225029739172, | |
| "loss": 1.8176, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.12572191883078615, | |
| "grad_norm": 0.21664464473724365, | |
| "learning_rate": 0.0009980018743034208, | |
| "loss": 1.8941, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.12650768082347857, | |
| "grad_norm": 0.18087147176265717, | |
| "learning_rate": 0.0009978774979074422, | |
| "loss": 1.8696, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.12729344281617097, | |
| "grad_norm": 0.31465378403663635, | |
| "learning_rate": 0.0009977493747219794, | |
| "loss": 1.8643, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.1280792048088634, | |
| "grad_norm": 0.15885500609874725, | |
| "learning_rate": 0.000997617505711227, | |
| "loss": 1.8692, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.12886496680155582, | |
| "grad_norm": 0.19823002815246582, | |
| "learning_rate": 0.0009974818918675678, | |
| "loss": 1.8476, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.12965072879424822, | |
| "grad_norm": 0.33955878019332886, | |
| "learning_rate": 0.0009973425342115678, | |
| "loss": 1.8371, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.13043649078694064, | |
| "grad_norm": 0.15947683155536652, | |
| "learning_rate": 0.0009971994337919662, | |
| "loss": 1.8525, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.13122225277963304, | |
| "grad_norm": 0.22582250833511353, | |
| "learning_rate": 0.0009970525916856686, | |
| "loss": 1.8344, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.13200801477232546, | |
| "grad_norm": 0.16655315458774567, | |
| "learning_rate": 0.0009969020089977392, | |
| "loss": 1.8229, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.13279377676501788, | |
| "grad_norm": 0.22109279036521912, | |
| "learning_rate": 0.0009967476868613916, | |
| "loss": 1.8169, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.13357953875771028, | |
| "grad_norm": 0.23577183485031128, | |
| "learning_rate": 0.0009965896264379811, | |
| "loss": 1.8485, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.1343653007504027, | |
| "grad_norm": 0.16854895651340485, | |
| "learning_rate": 0.000996427828916995, | |
| "loss": 1.8947, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.13515106274309513, | |
| "grad_norm": 0.2264672815799713, | |
| "learning_rate": 0.000996262295516045, | |
| "loss": 1.8915, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.13593682473578753, | |
| "grad_norm": 0.22935688495635986, | |
| "learning_rate": 0.0009960930274808563, | |
| "loss": 1.836, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.13672258672847995, | |
| "grad_norm": 0.15960639715194702, | |
| "learning_rate": 0.0009959200260852602, | |
| "loss": 1.8687, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.13750834872117235, | |
| "grad_norm": 0.17692284286022186, | |
| "learning_rate": 0.0009957432926311824, | |
| "loss": 1.8443, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.13829411071386477, | |
| "grad_norm": 0.190086230635643, | |
| "learning_rate": 0.0009955628284486356, | |
| "loss": 1.8876, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.1390798727065572, | |
| "grad_norm": 0.16075825691223145, | |
| "learning_rate": 0.000995378634895707, | |
| "loss": 1.8239, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.1398656346992496, | |
| "grad_norm": 0.16710132360458374, | |
| "learning_rate": 0.0009951907133585501, | |
| "loss": 1.8647, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.14065139669194202, | |
| "grad_norm": 0.16804201900959015, | |
| "learning_rate": 0.000994999065251373, | |
| "loss": 1.7629, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.1414371586846344, | |
| "grad_norm": 0.1883011907339096, | |
| "learning_rate": 0.0009948036920164282, | |
| "loss": 1.8401, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.14222292067732684, | |
| "grad_norm": 0.1541757583618164, | |
| "learning_rate": 0.0009946045951240016, | |
| "loss": 1.8263, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.14300868267001926, | |
| "grad_norm": 0.1607087254524231, | |
| "learning_rate": 0.000994401776072402, | |
| "loss": 1.8431, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.14379444466271166, | |
| "grad_norm": 0.155876025557518, | |
| "learning_rate": 0.0009941952363879496, | |
| "loss": 1.835, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.14458020665540408, | |
| "grad_norm": 0.1786189079284668, | |
| "learning_rate": 0.0009939849776249635, | |
| "loss": 1.8537, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.1453659686480965, | |
| "grad_norm": 0.1885564625263214, | |
| "learning_rate": 0.0009937710013657513, | |
| "loss": 1.8781, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.1461517306407889, | |
| "grad_norm": 0.20302869379520416, | |
| "learning_rate": 0.000993553309220597, | |
| "loss": 1.7989, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.14693749263348133, | |
| "grad_norm": 0.15116022527217865, | |
| "learning_rate": 0.0009933319028277475, | |
| "loss": 1.7587, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.14772325462617372, | |
| "grad_norm": 0.20895975828170776, | |
| "learning_rate": 0.0009931067838534029, | |
| "loss": 1.901, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.14850901661886615, | |
| "grad_norm": 0.19565752148628235, | |
| "learning_rate": 0.000992877953991701, | |
| "loss": 1.8388, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.14929477861155857, | |
| "grad_norm": 0.233185276389122, | |
| "learning_rate": 0.0009926454149647072, | |
| "loss": 1.8021, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.15008054060425097, | |
| "grad_norm": 0.19039976596832275, | |
| "learning_rate": 0.0009924091685223995, | |
| "loss": 1.8105, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.1508663025969434, | |
| "grad_norm": 0.15231332182884216, | |
| "learning_rate": 0.0009921692164426561, | |
| "loss": 1.8455, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.1516520645896358, | |
| "grad_norm": 0.16343708336353302, | |
| "learning_rate": 0.0009919255605312428, | |
| "loss": 1.9254, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.1524378265823282, | |
| "grad_norm": 0.19997046887874603, | |
| "learning_rate": 0.0009916782026217976, | |
| "loss": 1.7809, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.15322358857502064, | |
| "grad_norm": 0.15831786394119263, | |
| "learning_rate": 0.0009914271445758192, | |
| "loss": 1.8139, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.15400935056771303, | |
| "grad_norm": 0.23234544694423676, | |
| "learning_rate": 0.000991172388282651, | |
| "loss": 1.8541, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.15479511256040546, | |
| "grad_norm": 0.16235429048538208, | |
| "learning_rate": 0.0009909139356594678, | |
| "loss": 1.8377, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.15558087455309785, | |
| "grad_norm": 0.1860172301530838, | |
| "learning_rate": 0.000990651788651261, | |
| "loss": 1.8644, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.15636663654579028, | |
| "grad_norm": 0.18624064326286316, | |
| "learning_rate": 0.0009903859492308247, | |
| "loss": 1.8254, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.1571523985384827, | |
| "grad_norm": 0.2140810191631317, | |
| "learning_rate": 0.00099011641939874, | |
| "loss": 1.7965, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1579381605311751, | |
| "grad_norm": 0.17189514636993408, | |
| "learning_rate": 0.0009898432011833601, | |
| "loss": 1.7997, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.15872392252386752, | |
| "grad_norm": 0.16160257160663605, | |
| "learning_rate": 0.000989566296640796, | |
| "loss": 1.7915, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.15950968451655995, | |
| "grad_norm": 0.15080223977565765, | |
| "learning_rate": 0.0009892857078548994, | |
| "loss": 1.8035, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.16029544650925234, | |
| "grad_norm": 0.20072196424007416, | |
| "learning_rate": 0.0009890014369372483, | |
| "loss": 1.7741, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.16108120850194477, | |
| "grad_norm": 0.16227015852928162, | |
| "learning_rate": 0.0009887134860271302, | |
| "loss": 1.7972, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.16186697049463716, | |
| "grad_norm": 0.19584600627422333, | |
| "learning_rate": 0.0009884218572915272, | |
| "loss": 1.7683, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.1626527324873296, | |
| "grad_norm": 0.2638975977897644, | |
| "learning_rate": 0.0009881265529250985, | |
| "loss": 1.8308, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.163438494480022, | |
| "grad_norm": 0.15571711957454681, | |
| "learning_rate": 0.0009878275751501644, | |
| "loss": 1.8231, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.1642242564727144, | |
| "grad_norm": 0.2547709047794342, | |
| "learning_rate": 0.0009875249262166898, | |
| "loss": 1.7788, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.16501001846540683, | |
| "grad_norm": 0.2403130978345871, | |
| "learning_rate": 0.0009872186084022663, | |
| "loss": 1.8067, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.16579578045809923, | |
| "grad_norm": 0.16266390681266785, | |
| "learning_rate": 0.0009869086240120967, | |
| "loss": 1.8301, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.16658154245079165, | |
| "grad_norm": 0.18285565078258514, | |
| "learning_rate": 0.0009865949753789759, | |
| "loss": 1.7974, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.16736730444348408, | |
| "grad_norm": 0.1687275618314743, | |
| "learning_rate": 0.0009862776648632745, | |
| "loss": 1.7562, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.16815306643617647, | |
| "grad_norm": 0.1526845097541809, | |
| "learning_rate": 0.000985956694852921, | |
| "loss": 1.7842, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.1689388284288689, | |
| "grad_norm": 0.22343283891677856, | |
| "learning_rate": 0.000985632067763383, | |
| "loss": 1.8555, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.16972459042156132, | |
| "grad_norm": 0.15908358991146088, | |
| "learning_rate": 0.0009853037860376496, | |
| "loss": 1.7357, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.17051035241425372, | |
| "grad_norm": 0.19639401137828827, | |
| "learning_rate": 0.0009849718521462133, | |
| "loss": 1.7991, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.17129611440694614, | |
| "grad_norm": 0.23364387452602386, | |
| "learning_rate": 0.0009846362685870505, | |
| "loss": 1.8042, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.17208187639963854, | |
| "grad_norm": 0.1824749857187271, | |
| "learning_rate": 0.0009842970378856042, | |
| "loss": 1.8007, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.17286763839233096, | |
| "grad_norm": 0.17124874889850616, | |
| "learning_rate": 0.000983954162594763, | |
| "loss": 1.8384, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.1736534003850234, | |
| "grad_norm": 0.16191472113132477, | |
| "learning_rate": 0.0009836076452948434, | |
| "loss": 1.826, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.17443916237771578, | |
| "grad_norm": 0.315621942281723, | |
| "learning_rate": 0.0009832574885935703, | |
| "loss": 1.7842, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.1752249243704082, | |
| "grad_norm": 0.18572475016117096, | |
| "learning_rate": 0.0009829036951260566, | |
| "loss": 1.7614, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.1760106863631006, | |
| "grad_norm": 0.1333106905221939, | |
| "learning_rate": 0.0009825462675547834, | |
| "loss": 1.7805, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.17679644835579303, | |
| "grad_norm": 0.1603097766637802, | |
| "learning_rate": 0.0009821852085695813, | |
| "loss": 1.7832, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.17758221034848545, | |
| "grad_norm": 0.21313336491584778, | |
| "learning_rate": 0.0009818205208876082, | |
| "loss": 1.8232, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.17836797234117785, | |
| "grad_norm": 0.23421534895896912, | |
| "learning_rate": 0.0009814522072533309, | |
| "loss": 1.7626, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.17915373433387027, | |
| "grad_norm": 0.16554996371269226, | |
| "learning_rate": 0.0009810802704385022, | |
| "loss": 1.8293, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.1799394963265627, | |
| "grad_norm": 0.1454770267009735, | |
| "learning_rate": 0.0009807047132421422, | |
| "loss": 1.9289, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.1807252583192551, | |
| "grad_norm": 0.21001407504081726, | |
| "learning_rate": 0.0009803255384905158, | |
| "loss": 1.7769, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.18151102031194752, | |
| "grad_norm": 0.2452252209186554, | |
| "learning_rate": 0.000979942749037112, | |
| "loss": 1.7492, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.18229678230463991, | |
| "grad_norm": 0.22114233672618866, | |
| "learning_rate": 0.0009795563477626224, | |
| "loss": 1.8087, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.18308254429733234, | |
| "grad_norm": 0.1857612133026123, | |
| "learning_rate": 0.0009791663375749195, | |
| "loss": 1.7559, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.18386830629002476, | |
| "grad_norm": 0.19770672917366028, | |
| "learning_rate": 0.0009787727214090344, | |
| "loss": 1.7924, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.18465406828271716, | |
| "grad_norm": 0.1833062618970871, | |
| "learning_rate": 0.0009783755022271356, | |
| "loss": 1.8486, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.18543983027540958, | |
| "grad_norm": 0.2029699981212616, | |
| "learning_rate": 0.0009779746830185056, | |
| "loss": 1.809, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.18622559226810198, | |
| "grad_norm": 0.1918937712907791, | |
| "learning_rate": 0.0009775702667995196, | |
| "loss": 1.7214, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.1870113542607944, | |
| "grad_norm": 0.15816162526607513, | |
| "learning_rate": 0.000977162256613622, | |
| "loss": 1.7447, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.18779711625348683, | |
| "grad_norm": 0.17059017717838287, | |
| "learning_rate": 0.0009767506555313033, | |
| "loss": 1.7853, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.18858287824617923, | |
| "grad_norm": 0.15515488386154175, | |
| "learning_rate": 0.0009763354666500778, | |
| "loss": 1.8019, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.18936864023887165, | |
| "grad_norm": 0.17095063626766205, | |
| "learning_rate": 0.0009759166930944596, | |
| "loss": 1.7827, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.19015440223156405, | |
| "grad_norm": 0.20768775045871735, | |
| "learning_rate": 0.0009754943380159398, | |
| "loss": 1.7911, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.19094016422425647, | |
| "grad_norm": 0.2060774266719818, | |
| "learning_rate": 0.0009750684045929618, | |
| "loss": 1.7821, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.1917259262169489, | |
| "grad_norm": 0.1782405525445938, | |
| "learning_rate": 0.0009746388960308982, | |
| "loss": 1.7665, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.1925116882096413, | |
| "grad_norm": 0.17600089311599731, | |
| "learning_rate": 0.0009742058155620266, | |
| "loss": 1.7827, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.19329745020233371, | |
| "grad_norm": 0.14907889068126678, | |
| "learning_rate": 0.0009737691664455045, | |
| "loss": 1.7102, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.19408321219502614, | |
| "grad_norm": 0.16940507292747498, | |
| "learning_rate": 0.0009733289519673459, | |
| "loss": 1.7302, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.19486897418771854, | |
| "grad_norm": 0.17307011783123016, | |
| "learning_rate": 0.0009728851754403955, | |
| "loss": 1.8605, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.19565473618041096, | |
| "grad_norm": 0.1481655091047287, | |
| "learning_rate": 0.0009724378402043049, | |
| "loss": 1.8172, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.19644049817310336, | |
| "grad_norm": 0.16137413680553436, | |
| "learning_rate": 0.0009719869496255065, | |
| "loss": 1.831, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.19722626016579578, | |
| "grad_norm": 0.14303524792194366, | |
| "learning_rate": 0.0009715325070971883, | |
| "loss": 1.7464, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.1980120221584882, | |
| "grad_norm": 0.14343352615833282, | |
| "learning_rate": 0.0009710745160392692, | |
| "loss": 1.7807, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.1987977841511806, | |
| "grad_norm": 0.14091046154499054, | |
| "learning_rate": 0.000970612979898372, | |
| "loss": 1.8111, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.19958354614387303, | |
| "grad_norm": 0.22847269475460052, | |
| "learning_rate": 0.0009701479021477986, | |
| "loss": 1.8072, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.20036930813656542, | |
| "grad_norm": 0.1330152153968811, | |
| "learning_rate": 0.000969679286287503, | |
| "loss": 1.7495, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.20115507012925785, | |
| "grad_norm": 0.16856741905212402, | |
| "learning_rate": 0.0009692071358440656, | |
| "loss": 1.7604, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.20194083212195027, | |
| "grad_norm": 0.16036243736743927, | |
| "learning_rate": 0.0009687314543706662, | |
| "loss": 1.7445, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.20272659411464267, | |
| "grad_norm": 0.1447385549545288, | |
| "learning_rate": 0.0009682522454470577, | |
| "loss": 1.7842, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.2035123561073351, | |
| "grad_norm": 0.19771301746368408, | |
| "learning_rate": 0.0009677695126795387, | |
| "loss": 1.7434, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.20429811810002751, | |
| "grad_norm": 0.17068088054656982, | |
| "learning_rate": 0.0009672832597009267, | |
| "loss": 1.7717, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.2050838800927199, | |
| "grad_norm": 0.17894063889980316, | |
| "learning_rate": 0.0009667934901705304, | |
| "loss": 1.6901, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.20586964208541234, | |
| "grad_norm": 0.12928830087184906, | |
| "learning_rate": 0.0009663002077741228, | |
| "loss": 1.7405, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.20665540407810473, | |
| "grad_norm": 0.18357087671756744, | |
| "learning_rate": 0.0009658034162239126, | |
| "loss": 1.748, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.20744116607079716, | |
| "grad_norm": 0.17646510899066925, | |
| "learning_rate": 0.000965303119258517, | |
| "loss": 1.7418, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.20822692806348958, | |
| "grad_norm": 0.14562882483005524, | |
| "learning_rate": 0.0009647993206429336, | |
| "loss": 1.734, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.20901269005618198, | |
| "grad_norm": 0.16555768251419067, | |
| "learning_rate": 0.0009642920241685109, | |
| "loss": 1.7481, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.2097984520488744, | |
| "grad_norm": 0.22527766227722168, | |
| "learning_rate": 0.0009637812336529214, | |
| "loss": 1.797, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.2105842140415668, | |
| "grad_norm": 0.2324640154838562, | |
| "learning_rate": 0.0009632669529401317, | |
| "loss": 1.7871, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.21136997603425922, | |
| "grad_norm": 0.18207474052906036, | |
| "learning_rate": 0.0009627491859003744, | |
| "loss": 1.7247, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.21215573802695165, | |
| "grad_norm": 0.21197597682476044, | |
| "learning_rate": 0.0009622279364301177, | |
| "loss": 1.7338, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.21294150001964404, | |
| "grad_norm": 0.15248093008995056, | |
| "learning_rate": 0.0009617032084520378, | |
| "loss": 1.6968, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.21372726201233647, | |
| "grad_norm": 0.15904445946216583, | |
| "learning_rate": 0.000961175005914988, | |
| "loss": 1.7187, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.2145130240050289, | |
| "grad_norm": 0.1640489548444748, | |
| "learning_rate": 0.0009606433327939697, | |
| "loss": 1.7054, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.2152987859977213, | |
| "grad_norm": 0.20775249600410461, | |
| "learning_rate": 0.0009601081930901024, | |
| "loss": 1.7597, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.2160845479904137, | |
| "grad_norm": 0.22332137823104858, | |
| "learning_rate": 0.0009595695908305927, | |
| "loss": 1.7071, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.2168703099831061, | |
| "grad_norm": 0.2273789495229721, | |
| "learning_rate": 0.0009590275300687057, | |
| "loss": 1.7413, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.21765607197579853, | |
| "grad_norm": 0.17828406393527985, | |
| "learning_rate": 0.0009584820148837331, | |
| "loss": 1.7059, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.21844183396849096, | |
| "grad_norm": 0.19548800587654114, | |
| "learning_rate": 0.0009579330493809629, | |
| "loss": 1.7752, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.21922759596118335, | |
| "grad_norm": 0.16092374920845032, | |
| "learning_rate": 0.0009573806376916486, | |
| "loss": 1.7438, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.22001335795387578, | |
| "grad_norm": 0.1342301070690155, | |
| "learning_rate": 0.0009568247839729782, | |
| "loss": 1.751, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.22079911994656817, | |
| "grad_norm": 0.1362721025943756, | |
| "learning_rate": 0.0009562654924080425, | |
| "loss": 1.717, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.2215848819392606, | |
| "grad_norm": 0.16024941205978394, | |
| "learning_rate": 0.0009557027672058043, | |
| "loss": 1.8112, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.22237064393195302, | |
| "grad_norm": 0.154706671833992, | |
| "learning_rate": 0.000955136612601066, | |
| "loss": 1.738, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.22315640592464542, | |
| "grad_norm": 0.1728077530860901, | |
| "learning_rate": 0.0009545670328544382, | |
| "loss": 1.7967, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.22394216791733784, | |
| "grad_norm": 0.15964308381080627, | |
| "learning_rate": 0.0009539940322523072, | |
| "loss": 1.8126, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.22472792991003024, | |
| "grad_norm": 0.1784270852804184, | |
| "learning_rate": 0.0009534176151068035, | |
| "loss": 1.7388, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.22551369190272266, | |
| "grad_norm": 0.18767036497592926, | |
| "learning_rate": 0.0009528377857557686, | |
| "loss": 1.6965, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.2262994538954151, | |
| "grad_norm": 0.21508271992206573, | |
| "learning_rate": 0.0009522545485627227, | |
| "loss": 1.7143, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.22708521588810748, | |
| "grad_norm": 0.30984237790107727, | |
| "learning_rate": 0.0009516679079168318, | |
| "loss": 1.6847, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.2278709778807999, | |
| "grad_norm": 0.12825290858745575, | |
| "learning_rate": 0.0009510778682328746, | |
| "loss": 1.749, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.22865673987349233, | |
| "grad_norm": 0.23394402861595154, | |
| "learning_rate": 0.0009504844339512095, | |
| "loss": 1.7347, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.22944250186618473, | |
| "grad_norm": 0.24663372337818146, | |
| "learning_rate": 0.0009498876095377409, | |
| "loss": 1.6935, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.23022826385887715, | |
| "grad_norm": 0.23490461707115173, | |
| "learning_rate": 0.0009492873994838858, | |
| "loss": 1.804, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.23101402585156955, | |
| "grad_norm": 0.20233745872974396, | |
| "learning_rate": 0.0009486838083065396, | |
| "loss": 1.7856, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.23179978784426197, | |
| "grad_norm": 0.18699239194393158, | |
| "learning_rate": 0.0009480768405480432, | |
| "loss": 1.6904, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.2325855498369544, | |
| "grad_norm": 0.27135419845581055, | |
| "learning_rate": 0.0009474665007761471, | |
| "loss": 1.755, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.2333713118296468, | |
| "grad_norm": 0.1718074530363083, | |
| "learning_rate": 0.0009468527935839787, | |
| "loss": 1.7456, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.23415707382233922, | |
| "grad_norm": 0.161013662815094, | |
| "learning_rate": 0.0009462357235900065, | |
| "loss": 1.7511, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.23494283581503161, | |
| "grad_norm": 0.20107895135879517, | |
| "learning_rate": 0.0009456152954380063, | |
| "loss": 1.7084, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.23572859780772404, | |
| "grad_norm": 0.14512185752391815, | |
| "learning_rate": 0.0009449915137970255, | |
| "loss": 1.7557, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.23651435980041646, | |
| "grad_norm": 0.1591528356075287, | |
| "learning_rate": 0.0009443643833613483, | |
| "loss": 1.7474, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.23730012179310886, | |
| "grad_norm": 0.22338952124118805, | |
| "learning_rate": 0.0009437339088504603, | |
| "loss": 1.7563, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.23808588378580128, | |
| "grad_norm": 0.1846311241388321, | |
| "learning_rate": 0.0009431000950090131, | |
| "loss": 1.7591, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.2388716457784937, | |
| "grad_norm": 0.1189197227358818, | |
| "learning_rate": 0.0009424629466067889, | |
| "loss": 1.7469, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.2396574077711861, | |
| "grad_norm": 0.19045637547969818, | |
| "learning_rate": 0.0009418224684386633, | |
| "loss": 1.6978, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.24044316976387853, | |
| "grad_norm": 0.1384202241897583, | |
| "learning_rate": 0.0009411786653245712, | |
| "loss": 1.7447, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.24122893175657092, | |
| "grad_norm": 0.19370493292808533, | |
| "learning_rate": 0.0009405315421094684, | |
| "loss": 1.777, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.24201469374926335, | |
| "grad_norm": 0.14141830801963806, | |
| "learning_rate": 0.0009398811036632973, | |
| "loss": 1.7378, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.24280045574195577, | |
| "grad_norm": 0.18611302971839905, | |
| "learning_rate": 0.000939227354880948, | |
| "loss": 1.7318, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.24358621773464817, | |
| "grad_norm": 0.18367739021778107, | |
| "learning_rate": 0.0009385703006822237, | |
| "loss": 1.7881, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.2443719797273406, | |
| "grad_norm": 0.17989496886730194, | |
| "learning_rate": 0.0009379099460118017, | |
| "loss": 1.7075, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.245157741720033, | |
| "grad_norm": 0.16730940341949463, | |
| "learning_rate": 0.0009372462958391978, | |
| "loss": 1.7174, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.24594350371272541, | |
| "grad_norm": 0.16663284599781036, | |
| "learning_rate": 0.0009365793551587278, | |
| "loss": 1.7412, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.24672926570541784, | |
| "grad_norm": 0.1993977576494217, | |
| "learning_rate": 0.0009359091289894702, | |
| "loss": 1.7547, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.24751502769811023, | |
| "grad_norm": 0.14924155175685883, | |
| "learning_rate": 0.000935235622375229, | |
| "loss": 1.6906, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.24830078969080266, | |
| "grad_norm": 0.1745729148387909, | |
| "learning_rate": 0.0009345588403844945, | |
| "loss": 1.6876, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.24908655168349506, | |
| "grad_norm": 0.17542210221290588, | |
| "learning_rate": 0.000933878788110407, | |
| "loss": 1.6807, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 0.24987231367618748, | |
| "grad_norm": 0.18530936539173126, | |
| "learning_rate": 0.0009331954706707162, | |
| "loss": 1.7146, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.2506580756688799, | |
| "grad_norm": 0.14683561027050018, | |
| "learning_rate": 0.0009325088932077448, | |
| "loss": 1.707, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 0.2514438376615723, | |
| "grad_norm": 0.16001202166080475, | |
| "learning_rate": 0.0009318190608883485, | |
| "loss": 1.7857, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.2522295996542647, | |
| "grad_norm": 0.16465690732002258, | |
| "learning_rate": 0.0009311259789038775, | |
| "loss": 1.6935, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.25301536164695715, | |
| "grad_norm": 0.20848460495471954, | |
| "learning_rate": 0.0009304296524701377, | |
| "loss": 1.7694, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.2538011236396496, | |
| "grad_norm": 0.14489056169986725, | |
| "learning_rate": 0.0009297300868273506, | |
| "loss": 1.7444, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 0.25458688563234194, | |
| "grad_norm": 0.14249326288700104, | |
| "learning_rate": 0.0009290272872401153, | |
| "loss": 1.7361, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.25537264762503437, | |
| "grad_norm": 0.15388713777065277, | |
| "learning_rate": 0.0009283212589973671, | |
| "loss": 1.7088, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.2561584096177268, | |
| "grad_norm": 0.21898041665554047, | |
| "learning_rate": 0.0009276120074123395, | |
| "loss": 1.792, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.2569441716104192, | |
| "grad_norm": 0.1465713083744049, | |
| "learning_rate": 0.0009268995378225228, | |
| "loss": 1.7808, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.25772993360311164, | |
| "grad_norm": 0.13897457718849182, | |
| "learning_rate": 0.0009261838555896245, | |
| "loss": 1.702, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.258515695595804, | |
| "grad_norm": 0.18026679754257202, | |
| "learning_rate": 0.000925464966099529, | |
| "loss": 1.7441, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 0.25930145758849643, | |
| "grad_norm": 0.13608209788799286, | |
| "learning_rate": 0.0009247428747622573, | |
| "loss": 1.6581, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.26008721958118886, | |
| "grad_norm": 0.21667583286762238, | |
| "learning_rate": 0.0009240175870119252, | |
| "loss": 1.7058, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 0.2608729815738813, | |
| "grad_norm": 0.19037890434265137, | |
| "learning_rate": 0.0009232891083067043, | |
| "loss": 1.7079, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.2616587435665737, | |
| "grad_norm": 0.14540046453475952, | |
| "learning_rate": 0.0009225574441287787, | |
| "loss": 1.7886, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.2624445055592661, | |
| "grad_norm": 0.15847225487232208, | |
| "learning_rate": 0.0009218225999843057, | |
| "loss": 1.7691, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.2632302675519585, | |
| "grad_norm": 0.1281496286392212, | |
| "learning_rate": 0.0009210845814033729, | |
| "loss": 1.7192, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.2640160295446509, | |
| "grad_norm": 0.14975865185260773, | |
| "learning_rate": 0.0009203433939399577, | |
| "loss": 1.6939, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.26480179153734335, | |
| "grad_norm": 0.23517081141471863, | |
| "learning_rate": 0.0009195990431718846, | |
| "loss": 1.7496, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 0.26558755353003577, | |
| "grad_norm": 0.24029028415679932, | |
| "learning_rate": 0.000918851534700784, | |
| "loss": 1.7212, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.26637331552272814, | |
| "grad_norm": 0.1643204241991043, | |
| "learning_rate": 0.0009181008741520494, | |
| "loss": 1.6901, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.26715907751542056, | |
| "grad_norm": 0.14907053112983704, | |
| "learning_rate": 0.0009173470671747952, | |
| "loss": 1.7005, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.267944839508113, | |
| "grad_norm": 0.14824387431144714, | |
| "learning_rate": 0.0009165901194418147, | |
| "loss": 1.7242, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 0.2687306015008054, | |
| "grad_norm": 0.15933027863502502, | |
| "learning_rate": 0.0009158300366495371, | |
| "loss": 1.7147, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.26951636349349783, | |
| "grad_norm": 0.15938429534435272, | |
| "learning_rate": 0.0009150668245179839, | |
| "loss": 1.755, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.27030212548619026, | |
| "grad_norm": 0.1357879489660263, | |
| "learning_rate": 0.0009143004887907273, | |
| "loss": 1.7154, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.2710878874788826, | |
| "grad_norm": 0.17757509648799896, | |
| "learning_rate": 0.0009135310352348458, | |
| "loss": 1.7224, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.27187364947157505, | |
| "grad_norm": 0.1572904735803604, | |
| "learning_rate": 0.0009127584696408813, | |
| "loss": 1.7091, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.2726594114642675, | |
| "grad_norm": 0.13565635681152344, | |
| "learning_rate": 0.0009119827978227952, | |
| "loss": 1.6385, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 0.2734451734569599, | |
| "grad_norm": 0.17699536681175232, | |
| "learning_rate": 0.0009112040256179253, | |
| "loss": 1.7315, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.2742309354496523, | |
| "grad_norm": 0.13635475933551788, | |
| "learning_rate": 0.0009104221588869407, | |
| "loss": 1.7882, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 0.2750166974423447, | |
| "grad_norm": 0.1719839721918106, | |
| "learning_rate": 0.0009096372035137995, | |
| "loss": 1.7376, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.2758024594350371, | |
| "grad_norm": 0.16986599564552307, | |
| "learning_rate": 0.0009088491654057025, | |
| "loss": 1.7442, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.27658822142772954, | |
| "grad_norm": 0.1694546341896057, | |
| "learning_rate": 0.0009080580504930503, | |
| "loss": 1.733, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.27737398342042197, | |
| "grad_norm": 0.16824652254581451, | |
| "learning_rate": 0.0009072638647293976, | |
| "loss": 1.726, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 0.2781597454131144, | |
| "grad_norm": 0.15378861129283905, | |
| "learning_rate": 0.0009064666140914093, | |
| "loss": 1.7501, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.27894550740580676, | |
| "grad_norm": 0.17473557591438293, | |
| "learning_rate": 0.0009056663045788147, | |
| "loss": 1.6769, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.2797312693984992, | |
| "grad_norm": 0.17376935482025146, | |
| "learning_rate": 0.000904862942214363, | |
| "loss": 1.6894, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.2805170313911916, | |
| "grad_norm": 0.1903456747531891, | |
| "learning_rate": 0.0009040565330437778, | |
| "loss": 1.6576, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.28130279338388403, | |
| "grad_norm": 0.21947069466114044, | |
| "learning_rate": 0.0009032470831357112, | |
| "loss": 1.7025, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.28208855537657646, | |
| "grad_norm": 0.12956298887729645, | |
| "learning_rate": 0.0009024345985816988, | |
| "loss": 1.7224, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 0.2828743173692688, | |
| "grad_norm": 0.17501172423362732, | |
| "learning_rate": 0.0009016190854961128, | |
| "loss": 1.7486, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.28366007936196125, | |
| "grad_norm": 0.13125917315483093, | |
| "learning_rate": 0.0009008005500161177, | |
| "loss": 1.7292, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 0.2844458413546537, | |
| "grad_norm": 0.13544058799743652, | |
| "learning_rate": 0.0008999789983016224, | |
| "loss": 1.7354, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.2852316033473461, | |
| "grad_norm": 0.18308532238006592, | |
| "learning_rate": 0.0008991544365352349, | |
| "loss": 1.6718, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.2860173653400385, | |
| "grad_norm": 0.1633603572845459, | |
| "learning_rate": 0.000898326870922215, | |
| "loss": 1.7, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.2868031273327309, | |
| "grad_norm": 0.1754382848739624, | |
| "learning_rate": 0.0008974963076904284, | |
| "loss": 1.7266, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.2875888893254233, | |
| "grad_norm": 0.2322840392589569, | |
| "learning_rate": 0.0008966627530902993, | |
| "loss": 1.6203, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.28837465131811574, | |
| "grad_norm": 0.1650850623846054, | |
| "learning_rate": 0.0008958262133947638, | |
| "loss": 1.7018, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 0.28916041331080816, | |
| "grad_norm": 0.18278241157531738, | |
| "learning_rate": 0.0008949866948992219, | |
| "loss": 1.7729, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.2899461753035006, | |
| "grad_norm": 0.1643027514219284, | |
| "learning_rate": 0.0008941442039214911, | |
| "loss": 1.7301, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.290731937296193, | |
| "grad_norm": 0.18282204866409302, | |
| "learning_rate": 0.000893298746801758, | |
| "loss": 1.7888, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.2915176992888854, | |
| "grad_norm": 0.16003000736236572, | |
| "learning_rate": 0.0008924503299025313, | |
| "loss": 1.7235, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 0.2923034612815778, | |
| "grad_norm": 0.16633334755897522, | |
| "learning_rate": 0.0008915989596085933, | |
| "loss": 1.7026, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.2930892232742702, | |
| "grad_norm": 0.29993849992752075, | |
| "learning_rate": 0.0008907446423269525, | |
| "loss": 1.7593, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 0.29387498526696265, | |
| "grad_norm": 0.15839068591594696, | |
| "learning_rate": 0.0008898873844867947, | |
| "loss": 1.6872, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.2946607472596551, | |
| "grad_norm": 0.21255196630954742, | |
| "learning_rate": 0.0008890271925394353, | |
| "loss": 1.8016, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.29544650925234744, | |
| "grad_norm": 0.1803053617477417, | |
| "learning_rate": 0.0008881640729582698, | |
| "loss": 1.7209, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.29623227124503987, | |
| "grad_norm": 0.20228391885757446, | |
| "learning_rate": 0.0008872980322387264, | |
| "loss": 1.6888, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 0.2970180332377323, | |
| "grad_norm": 0.1691243052482605, | |
| "learning_rate": 0.000886429076898216, | |
| "loss": 1.6535, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.2978037952304247, | |
| "grad_norm": 0.14024175703525543, | |
| "learning_rate": 0.0008855572134760834, | |
| "loss": 1.7665, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 0.29858955722311714, | |
| "grad_norm": 0.20091187953948975, | |
| "learning_rate": 0.0008846824485335587, | |
| "loss": 1.691, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.2993753192158095, | |
| "grad_norm": 0.12678097188472748, | |
| "learning_rate": 0.0008838047886537071, | |
| "loss": 1.6767, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 0.30016108120850193, | |
| "grad_norm": 0.14702141284942627, | |
| "learning_rate": 0.0008829242404413798, | |
| "loss": 1.6825, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.30094684320119436, | |
| "grad_norm": 0.17799334228038788, | |
| "learning_rate": 0.0008820408105231643, | |
| "loss": 1.7464, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 0.3017326051938868, | |
| "grad_norm": 0.1361367255449295, | |
| "learning_rate": 0.0008811545055473345, | |
| "loss": 1.665, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.3025183671865792, | |
| "grad_norm": 0.14883366227149963, | |
| "learning_rate": 0.0008802653321838003, | |
| "loss": 1.6717, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.3033041291792716, | |
| "grad_norm": 0.1452435553073883, | |
| "learning_rate": 0.0008793732971240582, | |
| "loss": 1.6707, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.304089891171964, | |
| "grad_norm": 0.14490064978599548, | |
| "learning_rate": 0.00087847840708114, | |
| "loss": 1.7017, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 0.3048756531646564, | |
| "grad_norm": 0.1717883199453354, | |
| "learning_rate": 0.0008775806687895631, | |
| "loss": 1.6796, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.30566141515734885, | |
| "grad_norm": 0.17518092691898346, | |
| "learning_rate": 0.0008766800890052793, | |
| "loss": 1.7223, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 0.30644717715004127, | |
| "grad_norm": 0.21119089424610138, | |
| "learning_rate": 0.0008757766745056241, | |
| "loss": 1.6164, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.30723293914273364, | |
| "grad_norm": 0.21860988438129425, | |
| "learning_rate": 0.0008748704320892658, | |
| "loss": 1.6693, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 0.30801870113542607, | |
| "grad_norm": 0.17688967287540436, | |
| "learning_rate": 0.000873961368576154, | |
| "loss": 1.6734, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.3088044631281185, | |
| "grad_norm": 0.15035995841026306, | |
| "learning_rate": 0.0008730494908074693, | |
| "loss": 1.6174, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 0.3095902251208109, | |
| "grad_norm": 0.131919726729393, | |
| "learning_rate": 0.00087213480564557, | |
| "loss": 1.7046, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.31037598711350334, | |
| "grad_norm": 0.16669556498527527, | |
| "learning_rate": 0.0008712173199739424, | |
| "loss": 1.6493, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.3111617491061957, | |
| "grad_norm": 0.14696942269802094, | |
| "learning_rate": 0.0008702970406971473, | |
| "loss": 1.6731, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.31194751109888813, | |
| "grad_norm": 0.12769675254821777, | |
| "learning_rate": 0.0008693739747407697, | |
| "loss": 1.665, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 0.31273327309158055, | |
| "grad_norm": 0.15159061551094055, | |
| "learning_rate": 0.000868448129051365, | |
| "loss": 1.6186, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.313519035084273, | |
| "grad_norm": 0.17533285915851593, | |
| "learning_rate": 0.0008675195105964083, | |
| "loss": 1.677, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 0.3143047970769654, | |
| "grad_norm": 0.19997474551200867, | |
| "learning_rate": 0.0008665881263642407, | |
| "loss": 1.6715, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.3150905590696578, | |
| "grad_norm": 0.191118523478508, | |
| "learning_rate": 0.0008656539833640174, | |
| "loss": 1.6473, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 0.3158763210623502, | |
| "grad_norm": 0.15149670839309692, | |
| "learning_rate": 0.0008647170886256546, | |
| "loss": 1.6804, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.3166620830550426, | |
| "grad_norm": 0.14794859290122986, | |
| "learning_rate": 0.0008637774491997773, | |
| "loss": 1.7138, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 0.31744784504773504, | |
| "grad_norm": 0.1326620876789093, | |
| "learning_rate": 0.000862835072157665, | |
| "loss": 1.6957, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.31823360704042747, | |
| "grad_norm": 0.16334739327430725, | |
| "learning_rate": 0.0008618899645911997, | |
| "loss": 1.7162, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.3190193690331199, | |
| "grad_norm": 0.17086929082870483, | |
| "learning_rate": 0.0008609421336128121, | |
| "loss": 1.7613, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.31980513102581226, | |
| "grad_norm": 0.14249901473522186, | |
| "learning_rate": 0.0008599915863554273, | |
| "loss": 1.7397, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 0.3205908930185047, | |
| "grad_norm": 0.21984586119651794, | |
| "learning_rate": 0.0008590383299724127, | |
| "loss": 1.651, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.3213766550111971, | |
| "grad_norm": 0.12290716916322708, | |
| "learning_rate": 0.0008580823716375227, | |
| "loss": 1.6888, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 0.32216241700388953, | |
| "grad_norm": 0.13404521346092224, | |
| "learning_rate": 0.0008571237185448456, | |
| "loss": 1.7038, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.32294817899658196, | |
| "grad_norm": 0.24460573494434357, | |
| "learning_rate": 0.000856162377908749, | |
| "loss": 1.6997, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 0.3237339409892743, | |
| "grad_norm": 0.1533568948507309, | |
| "learning_rate": 0.000855198356963826, | |
| "loss": 1.6101, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.32451970298196675, | |
| "grad_norm": 0.14102007448673248, | |
| "learning_rate": 0.0008542316629648398, | |
| "loss": 1.6884, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 0.3253054649746592, | |
| "grad_norm": 0.17003892362117767, | |
| "learning_rate": 0.0008532623031866704, | |
| "loss": 1.7216, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.3260912269673516, | |
| "grad_norm": 0.17711204290390015, | |
| "learning_rate": 0.0008522902849242587, | |
| "loss": 1.637, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.326876988960044, | |
| "grad_norm": 0.1561163067817688, | |
| "learning_rate": 0.0008513156154925523, | |
| "loss": 1.7727, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.3276627509527364, | |
| "grad_norm": 0.1554763913154602, | |
| "learning_rate": 0.0008503383022264506, | |
| "loss": 1.6895, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 0.3284485129454288, | |
| "grad_norm": 0.16475364565849304, | |
| "learning_rate": 0.0008493583524807484, | |
| "loss": 1.6817, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.32923427493812124, | |
| "grad_norm": 0.15485644340515137, | |
| "learning_rate": 0.0008483757736300822, | |
| "loss": 1.7206, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 0.33002003693081366, | |
| "grad_norm": 0.15525245666503906, | |
| "learning_rate": 0.0008473905730688733, | |
| "loss": 1.6805, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.3308057989235061, | |
| "grad_norm": 0.17805235087871552, | |
| "learning_rate": 0.0008464027582112732, | |
| "loss": 1.7431, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 0.33159156091619846, | |
| "grad_norm": 0.16209563612937927, | |
| "learning_rate": 0.000845412336491107, | |
| "loss": 1.7098, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.3323773229088909, | |
| "grad_norm": 0.1910310685634613, | |
| "learning_rate": 0.0008444193153618182, | |
| "loss": 1.6679, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 0.3331630849015833, | |
| "grad_norm": 0.202640563249588, | |
| "learning_rate": 0.0008434237022964117, | |
| "loss": 1.7205, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.33394884689427573, | |
| "grad_norm": 0.3347494602203369, | |
| "learning_rate": 0.0008424255047873986, | |
| "loss": 1.7269, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.33473460888696815, | |
| "grad_norm": 0.1840268224477768, | |
| "learning_rate": 0.0008414247303467389, | |
| "loss": 1.6667, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.3355203708796605, | |
| "grad_norm": 0.17524029314517975, | |
| "learning_rate": 0.0008404213865057857, | |
| "loss": 1.7353, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 0.33630613287235295, | |
| "grad_norm": 0.15382514894008636, | |
| "learning_rate": 0.0008394154808152278, | |
| "loss": 1.6428, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.33709189486504537, | |
| "grad_norm": 0.2421923577785492, | |
| "learning_rate": 0.0008384070208450338, | |
| "loss": 1.7266, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 0.3378776568577378, | |
| "grad_norm": 0.16263365745544434, | |
| "learning_rate": 0.0008373960141843938, | |
| "loss": 1.7025, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.3386634188504302, | |
| "grad_norm": 0.17603865265846252, | |
| "learning_rate": 0.000836382468441664, | |
| "loss": 1.6223, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 0.33944918084312264, | |
| "grad_norm": 0.6742755770683289, | |
| "learning_rate": 0.0008353663912443079, | |
| "loss": 1.7571, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.340234942835815, | |
| "grad_norm": 0.16411545872688293, | |
| "learning_rate": 0.0008343477902388395, | |
| "loss": 1.7097, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 0.34102070482850744, | |
| "grad_norm": 0.1652558147907257, | |
| "learning_rate": 0.0008333266730907663, | |
| "loss": 1.7458, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.34180646682119986, | |
| "grad_norm": 0.1978401392698288, | |
| "learning_rate": 0.0008323030474845306, | |
| "loss": 1.6904, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.3425922288138923, | |
| "grad_norm": 0.17765507102012634, | |
| "learning_rate": 0.0008312769211234524, | |
| "loss": 1.6717, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.3433779908065847, | |
| "grad_norm": 0.1868458241224289, | |
| "learning_rate": 0.0008302483017296711, | |
| "loss": 1.7436, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 0.3441637527992771, | |
| "grad_norm": 0.15795573592185974, | |
| "learning_rate": 0.0008292171970440876, | |
| "loss": 1.7093, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.3449495147919695, | |
| "grad_norm": 0.14316695928573608, | |
| "learning_rate": 0.0008281836148263057, | |
| "loss": 1.6806, | |
| "step": 2195 | |
| }, | |
| { | |
| "epoch": 0.3457352767846619, | |
| "grad_norm": 0.17996078729629517, | |
| "learning_rate": 0.0008271475628545741, | |
| "loss": 1.6856, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.34652103877735435, | |
| "grad_norm": 0.13540180027484894, | |
| "learning_rate": 0.0008261090489257277, | |
| "loss": 1.7469, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 0.3473068007700468, | |
| "grad_norm": 0.22064046561717987, | |
| "learning_rate": 0.000825068080855129, | |
| "loss": 1.7472, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.34809256276273914, | |
| "grad_norm": 0.1410595029592514, | |
| "learning_rate": 0.0008240246664766087, | |
| "loss": 1.6685, | |
| "step": 2215 | |
| }, | |
| { | |
| "epoch": 0.34887832475543157, | |
| "grad_norm": 0.16196031868457794, | |
| "learning_rate": 0.0008229788136424081, | |
| "loss": 1.6643, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.349664086748124, | |
| "grad_norm": 0.13669522106647491, | |
| "learning_rate": 0.0008219305302231185, | |
| "loss": 1.6631, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.3504498487408164, | |
| "grad_norm": 0.15855997800827026, | |
| "learning_rate": 0.0008208798241076227, | |
| "loss": 1.6319, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.35123561073350884, | |
| "grad_norm": 0.13129590451717377, | |
| "learning_rate": 0.000819826703203036, | |
| "loss": 1.6641, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 0.3520213727262012, | |
| "grad_norm": 0.17638882994651794, | |
| "learning_rate": 0.0008187711754346456, | |
| "loss": 1.676, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.35280713471889363, | |
| "grad_norm": 0.17996013164520264, | |
| "learning_rate": 0.0008177132487458521, | |
| "loss": 1.6299, | |
| "step": 2245 | |
| }, | |
| { | |
| "epoch": 0.35359289671158606, | |
| "grad_norm": 0.18002741038799286, | |
| "learning_rate": 0.0008166529310981092, | |
| "loss": 1.6424, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.3543786587042785, | |
| "grad_norm": 0.14587530493736267, | |
| "learning_rate": 0.0008155902304708634, | |
| "loss": 1.6697, | |
| "step": 2255 | |
| }, | |
| { | |
| "epoch": 0.3551644206969709, | |
| "grad_norm": 0.1491909921169281, | |
| "learning_rate": 0.000814525154861495, | |
| "loss": 1.6469, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.3559501826896633, | |
| "grad_norm": 0.16428177058696747, | |
| "learning_rate": 0.0008134577122852569, | |
| "loss": 1.7014, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 0.3567359446823557, | |
| "grad_norm": 0.12703630328178406, | |
| "learning_rate": 0.0008123879107752146, | |
| "loss": 1.6804, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.3575217066750481, | |
| "grad_norm": 0.22857950627803802, | |
| "learning_rate": 0.0008113157583821861, | |
| "loss": 1.6736, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.35830746866774055, | |
| "grad_norm": 0.144062340259552, | |
| "learning_rate": 0.0008102412631746807, | |
| "loss": 1.5896, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.35909323066043297, | |
| "grad_norm": 0.13251659274101257, | |
| "learning_rate": 0.000809164433238839, | |
| "loss": 1.6734, | |
| "step": 2285 | |
| }, | |
| { | |
| "epoch": 0.3598789926531254, | |
| "grad_norm": 0.20809276401996613, | |
| "learning_rate": 0.0008080852766783712, | |
| "loss": 1.7509, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.36066475464581776, | |
| "grad_norm": 0.20248106122016907, | |
| "learning_rate": 0.0008070038016144972, | |
| "loss": 1.6447, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 0.3614505166385102, | |
| "grad_norm": 0.2282155454158783, | |
| "learning_rate": 0.0008059200161858841, | |
| "loss": 1.6913, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.3622362786312026, | |
| "grad_norm": 0.15200570225715637, | |
| "learning_rate": 0.0008048339285485862, | |
| "loss": 1.6474, | |
| "step": 2305 | |
| }, | |
| { | |
| "epoch": 0.36302204062389504, | |
| "grad_norm": 0.16428659856319427, | |
| "learning_rate": 0.0008037455468759831, | |
| "loss": 1.6909, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.36380780261658746, | |
| "grad_norm": 0.1449277251958847, | |
| "learning_rate": 0.0008026548793587181, | |
| "loss": 1.6456, | |
| "step": 2315 | |
| }, | |
| { | |
| "epoch": 0.36459356460927983, | |
| "grad_norm": 0.12401038408279419, | |
| "learning_rate": 0.0008015619342046365, | |
| "loss": 1.6657, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.36537932660197225, | |
| "grad_norm": 0.18149249255657196, | |
| "learning_rate": 0.0008004667196387245, | |
| "loss": 1.6201, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.3661650885946647, | |
| "grad_norm": 0.1270531415939331, | |
| "learning_rate": 0.0007993692439030463, | |
| "loss": 1.7276, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.3669508505873571, | |
| "grad_norm": 0.11993835866451263, | |
| "learning_rate": 0.000798269515256683, | |
| "loss": 1.6833, | |
| "step": 2335 | |
| }, | |
| { | |
| "epoch": 0.3677366125800495, | |
| "grad_norm": 0.13166651129722595, | |
| "learning_rate": 0.0007971675419756698, | |
| "loss": 1.6036, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.3685223745727419, | |
| "grad_norm": 0.18711446225643158, | |
| "learning_rate": 0.0007960633323529341, | |
| "loss": 1.6782, | |
| "step": 2345 | |
| }, | |
| { | |
| "epoch": 0.3693081365654343, | |
| "grad_norm": 0.1776147037744522, | |
| "learning_rate": 0.0007949568946982325, | |
| "loss": 1.6903, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.37009389855812674, | |
| "grad_norm": 0.14657479524612427, | |
| "learning_rate": 0.0007938482373380895, | |
| "loss": 1.7061, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 0.37087966055081917, | |
| "grad_norm": 0.1647639274597168, | |
| "learning_rate": 0.0007927373686157334, | |
| "loss": 1.6486, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.3716654225435116, | |
| "grad_norm": 0.13776181638240814, | |
| "learning_rate": 0.0007916242968910345, | |
| "loss": 1.5636, | |
| "step": 2365 | |
| }, | |
| { | |
| "epoch": 0.37245118453620396, | |
| "grad_norm": 0.1941072791814804, | |
| "learning_rate": 0.0007905090305404417, | |
| "loss": 1.638, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.3732369465288964, | |
| "grad_norm": 0.15426956117153168, | |
| "learning_rate": 0.0007893915779569194, | |
| "loss": 1.6899, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.3740227085215888, | |
| "grad_norm": 0.15892522037029266, | |
| "learning_rate": 0.0007882719475498851, | |
| "loss": 1.6904, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.37480847051428123, | |
| "grad_norm": 0.15704740583896637, | |
| "learning_rate": 0.0007871501477451451, | |
| "loss": 1.6176, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 0.37559423250697366, | |
| "grad_norm": 0.14114616811275482, | |
| "learning_rate": 0.0007860261869848318, | |
| "loss": 1.6511, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.376379994499666, | |
| "grad_norm": 0.1459956020116806, | |
| "learning_rate": 0.0007849000737273397, | |
| "loss": 1.6805, | |
| "step": 2395 | |
| }, | |
| { | |
| "epoch": 0.37716575649235845, | |
| "grad_norm": 0.14372305572032928, | |
| "learning_rate": 0.0007837718164472623, | |
| "loss": 1.6114, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.3779515184850509, | |
| "grad_norm": 0.15054304897785187, | |
| "learning_rate": 0.0007826414236353276, | |
| "loss": 1.7266, | |
| "step": 2405 | |
| }, | |
| { | |
| "epoch": 0.3787372804777433, | |
| "grad_norm": 0.22728455066680908, | |
| "learning_rate": 0.000781508903798335, | |
| "loss": 1.7173, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.3795230424704357, | |
| "grad_norm": 0.15423768758773804, | |
| "learning_rate": 0.0007803742654590906, | |
| "loss": 1.63, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 0.3803088044631281, | |
| "grad_norm": 0.21122123301029205, | |
| "learning_rate": 0.0007792375171563433, | |
| "loss": 1.6727, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.3810945664558205, | |
| "grad_norm": 0.16940170526504517, | |
| "learning_rate": 0.0007780986674447208, | |
| "loss": 1.7145, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 0.38188032844851294, | |
| "grad_norm": 0.13977159559726715, | |
| "learning_rate": 0.0007769577248946649, | |
| "loss": 1.6329, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.38266609044120536, | |
| "grad_norm": 0.11926419287919998, | |
| "learning_rate": 0.000775814698092367, | |
| "loss": 1.7181, | |
| "step": 2435 | |
| }, | |
| { | |
| "epoch": 0.3834518524338978, | |
| "grad_norm": 0.1658172905445099, | |
| "learning_rate": 0.0007746695956397041, | |
| "loss": 1.5932, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.3842376144265902, | |
| "grad_norm": 0.11511421203613281, | |
| "learning_rate": 0.0007735224261541727, | |
| "loss": 1.6356, | |
| "step": 2445 | |
| }, | |
| { | |
| "epoch": 0.3850233764192826, | |
| "grad_norm": 0.15368768572807312, | |
| "learning_rate": 0.0007723731982688255, | |
| "loss": 1.6138, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.385809138411975, | |
| "grad_norm": 0.1516408920288086, | |
| "learning_rate": 0.0007712219206322056, | |
| "loss": 1.6715, | |
| "step": 2455 | |
| }, | |
| { | |
| "epoch": 0.38659490040466743, | |
| "grad_norm": 0.15503014624118805, | |
| "learning_rate": 0.0007700686019082812, | |
| "loss": 1.5931, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.38738066239735985, | |
| "grad_norm": 0.18041059374809265, | |
| "learning_rate": 0.0007689132507763812, | |
| "loss": 1.6274, | |
| "step": 2465 | |
| }, | |
| { | |
| "epoch": 0.3881664243900523, | |
| "grad_norm": 0.14432255923748016, | |
| "learning_rate": 0.0007677558759311291, | |
| "loss": 1.6361, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.38895218638274465, | |
| "grad_norm": 0.14074422419071198, | |
| "learning_rate": 0.0007665964860823784, | |
| "loss": 1.6276, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.38973794837543707, | |
| "grad_norm": 0.15996317565441132, | |
| "learning_rate": 0.0007654350899551458, | |
| "loss": 1.6351, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.3905237103681295, | |
| "grad_norm": 0.1606004387140274, | |
| "learning_rate": 0.0007642716962895472, | |
| "loss": 1.6978, | |
| "step": 2485 | |
| }, | |
| { | |
| "epoch": 0.3913094723608219, | |
| "grad_norm": 0.15753324329853058, | |
| "learning_rate": 0.0007631063138407301, | |
| "loss": 1.6589, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.39209523435351434, | |
| "grad_norm": 0.14514195919036865, | |
| "learning_rate": 0.0007619389513788093, | |
| "loss": 1.6412, | |
| "step": 2495 | |
| }, | |
| { | |
| "epoch": 0.3928809963462067, | |
| "grad_norm": 0.17876259982585907, | |
| "learning_rate": 0.0007607696176888002, | |
| "loss": 1.7265, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.39366675833889914, | |
| "grad_norm": 0.1770378202199936, | |
| "learning_rate": 0.000759598321570552, | |
| "loss": 1.6525, | |
| "step": 2505 | |
| }, | |
| { | |
| "epoch": 0.39445252033159156, | |
| "grad_norm": 0.13470305502414703, | |
| "learning_rate": 0.0007584250718386832, | |
| "loss": 1.5947, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.395238282324284, | |
| "grad_norm": 0.16835801303386688, | |
| "learning_rate": 0.0007572498773225136, | |
| "loss": 1.6635, | |
| "step": 2515 | |
| }, | |
| { | |
| "epoch": 0.3960240443169764, | |
| "grad_norm": 0.18890178203582764, | |
| "learning_rate": 0.0007560727468659988, | |
| "loss": 1.58, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.3968098063096688, | |
| "grad_norm": 0.1397354155778885, | |
| "learning_rate": 0.0007548936893276634, | |
| "loss": 1.6523, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 0.3975955683023612, | |
| "grad_norm": 0.1338605135679245, | |
| "learning_rate": 0.0007537127135805341, | |
| "loss": 1.6461, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.3983813302950536, | |
| "grad_norm": 0.1662890762090683, | |
| "learning_rate": 0.0007525298285120733, | |
| "loss": 1.6881, | |
| "step": 2535 | |
| }, | |
| { | |
| "epoch": 0.39916709228774605, | |
| "grad_norm": 0.14534525573253632, | |
| "learning_rate": 0.000751345043024112, | |
| "loss": 1.6404, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.3999528542804385, | |
| "grad_norm": 0.16143235564231873, | |
| "learning_rate": 0.0007501583660327827, | |
| "loss": 1.6824, | |
| "step": 2545 | |
| }, | |
| { | |
| "epoch": 0.40073861627313084, | |
| "grad_norm": 0.1411980539560318, | |
| "learning_rate": 0.0007489698064684527, | |
| "loss": 1.6082, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.40152437826582327, | |
| "grad_norm": 0.1654977947473526, | |
| "learning_rate": 0.0007477793732756564, | |
| "loss": 1.6116, | |
| "step": 2555 | |
| }, | |
| { | |
| "epoch": 0.4023101402585157, | |
| "grad_norm": 0.1615074872970581, | |
| "learning_rate": 0.0007465870754130286, | |
| "loss": 1.613, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.4030959022512081, | |
| "grad_norm": 0.14305217564105988, | |
| "learning_rate": 0.0007453929218532365, | |
| "loss": 1.6736, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 0.40388166424390054, | |
| "grad_norm": 0.13158391416072845, | |
| "learning_rate": 0.0007441969215829122, | |
| "loss": 1.6553, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.4046674262365929, | |
| "grad_norm": 0.30756956338882446, | |
| "learning_rate": 0.0007429990836025855, | |
| "loss": 1.6628, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 0.40545318822928533, | |
| "grad_norm": 0.127960205078125, | |
| "learning_rate": 0.0007417994169266159, | |
| "loss": 1.6054, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.40623895022197776, | |
| "grad_norm": 0.19319772720336914, | |
| "learning_rate": 0.000740597930583125, | |
| "loss": 1.631, | |
| "step": 2585 | |
| }, | |
| { | |
| "epoch": 0.4070247122146702, | |
| "grad_norm": 0.14012780785560608, | |
| "learning_rate": 0.0007393946336139278, | |
| "loss": 1.5878, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.4078104742073626, | |
| "grad_norm": 0.12992408871650696, | |
| "learning_rate": 0.0007381895350744656, | |
| "loss": 1.7209, | |
| "step": 2595 | |
| }, | |
| { | |
| "epoch": 0.40859623620005503, | |
| "grad_norm": 0.17100664973258972, | |
| "learning_rate": 0.0007369826440337378, | |
| "loss": 1.658, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.4093819981927474, | |
| "grad_norm": 0.13439995050430298, | |
| "learning_rate": 0.0007357739695742324, | |
| "loss": 1.6069, | |
| "step": 2605 | |
| }, | |
| { | |
| "epoch": 0.4101677601854398, | |
| "grad_norm": 0.15416887402534485, | |
| "learning_rate": 0.0007345635207918594, | |
| "loss": 1.7172, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.41095352217813225, | |
| "grad_norm": 0.1403576135635376, | |
| "learning_rate": 0.0007333513067958812, | |
| "loss": 1.7471, | |
| "step": 2615 | |
| }, | |
| { | |
| "epoch": 0.41173928417082467, | |
| "grad_norm": 0.1455819308757782, | |
| "learning_rate": 0.0007321373367088442, | |
| "loss": 1.6624, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.4125250461635171, | |
| "grad_norm": 0.17243732511997223, | |
| "learning_rate": 0.0007309216196665105, | |
| "loss": 1.5789, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.41331080815620946, | |
| "grad_norm": 0.16030173003673553, | |
| "learning_rate": 0.0007297041648177889, | |
| "loss": 1.7102, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.4140965701489019, | |
| "grad_norm": 0.14231261610984802, | |
| "learning_rate": 0.0007284849813246662, | |
| "loss": 1.707, | |
| "step": 2635 | |
| }, | |
| { | |
| "epoch": 0.4148823321415943, | |
| "grad_norm": 0.14213086664676666, | |
| "learning_rate": 0.000727264078362138, | |
| "loss": 1.6524, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.41566809413428674, | |
| "grad_norm": 0.16257204115390778, | |
| "learning_rate": 0.0007260414651181399, | |
| "loss": 1.6361, | |
| "step": 2645 | |
| }, | |
| { | |
| "epoch": 0.41645385612697916, | |
| "grad_norm": 0.1615426391363144, | |
| "learning_rate": 0.0007248171507934785, | |
| "loss": 1.6094, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.41723961811967153, | |
| "grad_norm": 0.16172373294830322, | |
| "learning_rate": 0.0007235911446017612, | |
| "loss": 1.6127, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 0.41802538011236395, | |
| "grad_norm": 0.22370944917201996, | |
| "learning_rate": 0.0007223634557693286, | |
| "loss": 1.6398, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.4188111421050564, | |
| "grad_norm": 0.1495995670557022, | |
| "learning_rate": 0.0007211340935351831, | |
| "loss": 1.6818, | |
| "step": 2665 | |
| }, | |
| { | |
| "epoch": 0.4195969040977488, | |
| "grad_norm": 0.13977926969528198, | |
| "learning_rate": 0.0007199030671509209, | |
| "loss": 1.7055, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.4203826660904412, | |
| "grad_norm": 0.15699009597301483, | |
| "learning_rate": 0.0007186703858806617, | |
| "loss": 1.6536, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 0.4211684280831336, | |
| "grad_norm": 0.148645281791687, | |
| "learning_rate": 0.0007174360590009787, | |
| "loss": 1.5893, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.421954190075826, | |
| "grad_norm": 0.14828044176101685, | |
| "learning_rate": 0.0007162000958008301, | |
| "loss": 1.6163, | |
| "step": 2685 | |
| }, | |
| { | |
| "epoch": 0.42273995206851844, | |
| "grad_norm": 0.15765506029129028, | |
| "learning_rate": 0.0007149625055814872, | |
| "loss": 1.6033, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.42352571406121087, | |
| "grad_norm": 0.18456675112247467, | |
| "learning_rate": 0.0007137232976564663, | |
| "loss": 1.6514, | |
| "step": 2695 | |
| }, | |
| { | |
| "epoch": 0.4243114760539033, | |
| "grad_norm": 0.16911190748214722, | |
| "learning_rate": 0.0007124824813514571, | |
| "loss": 1.6484, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.42509723804659566, | |
| "grad_norm": 0.1826726347208023, | |
| "learning_rate": 0.0007112400660042537, | |
| "loss": 1.6445, | |
| "step": 2705 | |
| }, | |
| { | |
| "epoch": 0.4258830000392881, | |
| "grad_norm": 0.14041639864444733, | |
| "learning_rate": 0.0007099960609646838, | |
| "loss": 1.6464, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.4266687620319805, | |
| "grad_norm": 0.12943454086780548, | |
| "learning_rate": 0.000708750475594538, | |
| "loss": 1.6154, | |
| "step": 2715 | |
| }, | |
| { | |
| "epoch": 0.42745452402467293, | |
| "grad_norm": 0.15895868837833405, | |
| "learning_rate": 0.0007075033192675, | |
| "loss": 1.5816, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.42824028601736536, | |
| "grad_norm": 0.17042267322540283, | |
| "learning_rate": 0.0007062546013690759, | |
| "loss": 1.5853, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 0.4290260480100578, | |
| "grad_norm": 0.20396976172924042, | |
| "learning_rate": 0.0007050043312965232, | |
| "loss": 1.6317, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.42981181000275015, | |
| "grad_norm": 0.18434785306453705, | |
| "learning_rate": 0.0007037525184587803, | |
| "loss": 1.6722, | |
| "step": 2735 | |
| }, | |
| { | |
| "epoch": 0.4305975719954426, | |
| "grad_norm": 0.12577559053897858, | |
| "learning_rate": 0.000702499172276396, | |
| "loss": 1.7135, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.431383333988135, | |
| "grad_norm": 0.20810461044311523, | |
| "learning_rate": 0.0007012443021814581, | |
| "loss": 1.7251, | |
| "step": 2745 | |
| }, | |
| { | |
| "epoch": 0.4321690959808274, | |
| "grad_norm": 0.22052597999572754, | |
| "learning_rate": 0.0006999879176175226, | |
| "loss": 1.6398, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.43295485797351985, | |
| "grad_norm": 0.1631423681974411, | |
| "learning_rate": 0.0006987300280395427, | |
| "loss": 1.6847, | |
| "step": 2755 | |
| }, | |
| { | |
| "epoch": 0.4337406199662122, | |
| "grad_norm": 0.166092187166214, | |
| "learning_rate": 0.0006974706429137978, | |
| "loss": 1.6606, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.43452638195890464, | |
| "grad_norm": 0.16192929446697235, | |
| "learning_rate": 0.0006962097717178221, | |
| "loss": 1.6697, | |
| "step": 2765 | |
| }, | |
| { | |
| "epoch": 0.43531214395159706, | |
| "grad_norm": 0.11831378191709518, | |
| "learning_rate": 0.0006949474239403328, | |
| "loss": 1.5835, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.4360979059442895, | |
| "grad_norm": 0.15122415125370026, | |
| "learning_rate": 0.0006936836090811598, | |
| "loss": 1.6469, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 0.4368836679369819, | |
| "grad_norm": 0.18945452570915222, | |
| "learning_rate": 0.000692418336651173, | |
| "loss": 1.5554, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.4376694299296743, | |
| "grad_norm": 0.1292644888162613, | |
| "learning_rate": 0.0006911516161722115, | |
| "loss": 1.6987, | |
| "step": 2785 | |
| }, | |
| { | |
| "epoch": 0.4384551919223667, | |
| "grad_norm": 0.16247059404850006, | |
| "learning_rate": 0.000689883457177012, | |
| "loss": 1.6198, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.43924095391505913, | |
| "grad_norm": 0.1748296171426773, | |
| "learning_rate": 0.0006886138692091362, | |
| "loss": 1.691, | |
| "step": 2795 | |
| }, | |
| { | |
| "epoch": 0.44002671590775155, | |
| "grad_norm": 0.12950074672698975, | |
| "learning_rate": 0.0006873428618229003, | |
| "loss": 1.6407, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.440812477900444, | |
| "grad_norm": 0.17089992761611938, | |
| "learning_rate": 0.0006860704445833015, | |
| "loss": 1.641, | |
| "step": 2805 | |
| }, | |
| { | |
| "epoch": 0.44159823989313635, | |
| "grad_norm": 0.1477111577987671, | |
| "learning_rate": 0.0006847966270659478, | |
| "loss": 1.678, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.44238400188582877, | |
| "grad_norm": 0.14656804502010345, | |
| "learning_rate": 0.0006835214188569843, | |
| "loss": 1.6195, | |
| "step": 2815 | |
| }, | |
| { | |
| "epoch": 0.4431697638785212, | |
| "grad_norm": 0.15418322384357452, | |
| "learning_rate": 0.0006822448295530221, | |
| "loss": 1.6286, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.4439555258712136, | |
| "grad_norm": 0.13586872816085815, | |
| "learning_rate": 0.000680966868761066, | |
| "loss": 1.6822, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 0.44474128786390604, | |
| "grad_norm": 0.14607511460781097, | |
| "learning_rate": 0.0006796875460984414, | |
| "loss": 1.6614, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.4455270498565984, | |
| "grad_norm": 0.15407826006412506, | |
| "learning_rate": 0.0006784068711927229, | |
| "loss": 1.6135, | |
| "step": 2835 | |
| }, | |
| { | |
| "epoch": 0.44631281184929084, | |
| "grad_norm": 0.1422751098871231, | |
| "learning_rate": 0.0006771248536816613, | |
| "loss": 1.6104, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.44709857384198326, | |
| "grad_norm": 0.1250545233488083, | |
| "learning_rate": 0.0006758415032131112, | |
| "loss": 1.6461, | |
| "step": 2845 | |
| }, | |
| { | |
| "epoch": 0.4478843358346757, | |
| "grad_norm": 0.1620786488056183, | |
| "learning_rate": 0.0006745568294449587, | |
| "loss": 1.6642, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.4486700978273681, | |
| "grad_norm": 0.2060784548521042, | |
| "learning_rate": 0.0006732708420450477, | |
| "loss": 1.666, | |
| "step": 2855 | |
| }, | |
| { | |
| "epoch": 0.4494558598200605, | |
| "grad_norm": 0.1400783210992813, | |
| "learning_rate": 0.0006719835506911088, | |
| "loss": 1.6238, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.4502416218127529, | |
| "grad_norm": 0.1502895951271057, | |
| "learning_rate": 0.0006706949650706849, | |
| "loss": 1.5815, | |
| "step": 2865 | |
| }, | |
| { | |
| "epoch": 0.4510273838054453, | |
| "grad_norm": 0.16864760220050812, | |
| "learning_rate": 0.0006694050948810592, | |
| "loss": 1.6644, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.45181314579813775, | |
| "grad_norm": 0.14266374707221985, | |
| "learning_rate": 0.0006681139498291815, | |
| "loss": 1.6552, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 0.4525989077908302, | |
| "grad_norm": 0.25216013193130493, | |
| "learning_rate": 0.0006668215396315964, | |
| "loss": 1.5835, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.4533846697835226, | |
| "grad_norm": 0.15556307137012482, | |
| "learning_rate": 0.0006655278740143689, | |
| "loss": 1.6315, | |
| "step": 2885 | |
| }, | |
| { | |
| "epoch": 0.45417043177621497, | |
| "grad_norm": 0.14055636525154114, | |
| "learning_rate": 0.0006642329627130115, | |
| "loss": 1.6428, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.4549561937689074, | |
| "grad_norm": 0.16647887229919434, | |
| "learning_rate": 0.0006629368154724116, | |
| "loss": 1.6541, | |
| "step": 2895 | |
| }, | |
| { | |
| "epoch": 0.4557419557615998, | |
| "grad_norm": 0.17561818659305573, | |
| "learning_rate": 0.0006616394420467575, | |
| "loss": 1.6565, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.45652771775429224, | |
| "grad_norm": 0.1490733027458191, | |
| "learning_rate": 0.000660340852199465, | |
| "loss": 1.6201, | |
| "step": 2905 | |
| }, | |
| { | |
| "epoch": 0.45731347974698466, | |
| "grad_norm": 0.16749203205108643, | |
| "learning_rate": 0.0006590410557031044, | |
| "loss": 1.6306, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.45809924173967703, | |
| "grad_norm": 0.12608392536640167, | |
| "learning_rate": 0.0006577400623393266, | |
| "loss": 1.6271, | |
| "step": 2915 | |
| }, | |
| { | |
| "epoch": 0.45888500373236946, | |
| "grad_norm": 0.1351223587989807, | |
| "learning_rate": 0.0006564378818987893, | |
| "loss": 1.6072, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.4596707657250619, | |
| "grad_norm": 0.14112618565559387, | |
| "learning_rate": 0.0006551345241810837, | |
| "loss": 1.6559, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 0.4604565277177543, | |
| "grad_norm": 0.12047524750232697, | |
| "learning_rate": 0.000653829998994661, | |
| "loss": 1.6756, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.46124228971044673, | |
| "grad_norm": 0.16356752812862396, | |
| "learning_rate": 0.0006525243161567576, | |
| "loss": 1.6884, | |
| "step": 2935 | |
| }, | |
| { | |
| "epoch": 0.4620280517031391, | |
| "grad_norm": 0.14315475523471832, | |
| "learning_rate": 0.0006512174854933224, | |
| "loss": 1.6488, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.4628138136958315, | |
| "grad_norm": 0.12611393630504608, | |
| "learning_rate": 0.0006499095168389419, | |
| "loss": 1.6481, | |
| "step": 2945 | |
| }, | |
| { | |
| "epoch": 0.46359957568852395, | |
| "grad_norm": 0.12579138576984406, | |
| "learning_rate": 0.0006486004200367669, | |
| "loss": 1.6586, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.46438533768121637, | |
| "grad_norm": 0.18551719188690186, | |
| "learning_rate": 0.0006472902049384377, | |
| "loss": 1.5887, | |
| "step": 2955 | |
| }, | |
| { | |
| "epoch": 0.4651710996739088, | |
| "grad_norm": 0.15424126386642456, | |
| "learning_rate": 0.0006459788814040105, | |
| "loss": 1.6598, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.46595686166660116, | |
| "grad_norm": 0.1597795933485031, | |
| "learning_rate": 0.0006446664593018834, | |
| "loss": 1.6753, | |
| "step": 2965 | |
| }, | |
| { | |
| "epoch": 0.4667426236592936, | |
| "grad_norm": 0.14304637908935547, | |
| "learning_rate": 0.0006433529485087214, | |
| "loss": 1.6558, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.467528385651986, | |
| "grad_norm": 0.15329056978225708, | |
| "learning_rate": 0.0006420383589093827, | |
| "loss": 1.6326, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 0.46831414764467844, | |
| "grad_norm": 0.1391008198261261, | |
| "learning_rate": 0.000640722700396844, | |
| "loss": 1.6108, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.46909990963737086, | |
| "grad_norm": 0.14664943516254425, | |
| "learning_rate": 0.0006394059828721261, | |
| "loss": 1.6063, | |
| "step": 2985 | |
| }, | |
| { | |
| "epoch": 0.46988567163006323, | |
| "grad_norm": 0.22576557099819183, | |
| "learning_rate": 0.0006380882162442196, | |
| "loss": 1.6035, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.47067143362275565, | |
| "grad_norm": 0.1459265500307083, | |
| "learning_rate": 0.0006367694104300097, | |
| "loss": 1.6023, | |
| "step": 2995 | |
| }, | |
| { | |
| "epoch": 0.4714571956154481, | |
| "grad_norm": 0.12152886390686035, | |
| "learning_rate": 0.0006354495753542027, | |
| "loss": 1.5558, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.4722429576081405, | |
| "grad_norm": 0.13670937716960907, | |
| "learning_rate": 0.0006341287209492498, | |
| "loss": 1.5578, | |
| "step": 3005 | |
| }, | |
| { | |
| "epoch": 0.4730287196008329, | |
| "grad_norm": 0.14993344247341156, | |
| "learning_rate": 0.000632806857155274, | |
| "loss": 1.636, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.4738144815935253, | |
| "grad_norm": 0.20564016699790955, | |
| "learning_rate": 0.000631483993919994, | |
| "loss": 1.6322, | |
| "step": 3015 | |
| }, | |
| { | |
| "epoch": 0.4746002435862177, | |
| "grad_norm": 0.1213807612657547, | |
| "learning_rate": 0.0006301601411986502, | |
| "loss": 1.6361, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.47538600557891014, | |
| "grad_norm": 0.12629488110542297, | |
| "learning_rate": 0.0006288353089539289, | |
| "loss": 1.6376, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 0.47617176757160257, | |
| "grad_norm": 0.14332091808319092, | |
| "learning_rate": 0.0006275095071558881, | |
| "loss": 1.5986, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.476957529564295, | |
| "grad_norm": 0.16367210447788239, | |
| "learning_rate": 0.0006261827457818822, | |
| "loss": 1.6127, | |
| "step": 3035 | |
| }, | |
| { | |
| "epoch": 0.4777432915569874, | |
| "grad_norm": 0.1383843868970871, | |
| "learning_rate": 0.0006248550348164868, | |
| "loss": 1.5363, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.4785290535496798, | |
| "grad_norm": 0.14265769720077515, | |
| "learning_rate": 0.0006235263842514242, | |
| "loss": 1.5952, | |
| "step": 3045 | |
| }, | |
| { | |
| "epoch": 0.4793148155423722, | |
| "grad_norm": 0.13620123267173767, | |
| "learning_rate": 0.0006221968040854867, | |
| "loss": 1.5788, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.48010057753506463, | |
| "grad_norm": 0.17570005357265472, | |
| "learning_rate": 0.0006208663043244631, | |
| "loss": 1.6889, | |
| "step": 3055 | |
| }, | |
| { | |
| "epoch": 0.48088633952775706, | |
| "grad_norm": 0.18797026574611664, | |
| "learning_rate": 0.0006195348949810625, | |
| "loss": 1.7221, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.4816721015204495, | |
| "grad_norm": 0.14126017689704895, | |
| "learning_rate": 0.0006182025860748389, | |
| "loss": 1.5377, | |
| "step": 3065 | |
| }, | |
| { | |
| "epoch": 0.48245786351314185, | |
| "grad_norm": 0.18811370432376862, | |
| "learning_rate": 0.0006168693876321163, | |
| "loss": 1.6591, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.4832436255058343, | |
| "grad_norm": 0.20044410228729248, | |
| "learning_rate": 0.0006155353096859124, | |
| "loss": 1.6215, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 0.4840293874985267, | |
| "grad_norm": 0.13966962695121765, | |
| "learning_rate": 0.0006142003622758641, | |
| "loss": 1.6331, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.4848151494912191, | |
| "grad_norm": 0.1432633399963379, | |
| "learning_rate": 0.0006128645554481512, | |
| "loss": 1.6516, | |
| "step": 3085 | |
| }, | |
| { | |
| "epoch": 0.48560091148391155, | |
| "grad_norm": 0.1459830105304718, | |
| "learning_rate": 0.0006115278992554214, | |
| "loss": 1.5967, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.4863866734766039, | |
| "grad_norm": 0.1282690018415451, | |
| "learning_rate": 0.000610190403756714, | |
| "loss": 1.6412, | |
| "step": 3095 | |
| }, | |
| { | |
| "epoch": 0.48717243546929634, | |
| "grad_norm": 0.1409655064344406, | |
| "learning_rate": 0.0006088520790173843, | |
| "loss": 1.6607, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.48795819746198876, | |
| "grad_norm": 0.1343081146478653, | |
| "learning_rate": 0.0006075129351090291, | |
| "loss": 1.6391, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 0.4887439594546812, | |
| "grad_norm": 0.13972026109695435, | |
| "learning_rate": 0.0006061729821094085, | |
| "loss": 1.5868, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.4895297214473736, | |
| "grad_norm": 0.14654341340065002, | |
| "learning_rate": 0.0006048322301023723, | |
| "loss": 1.612, | |
| "step": 3115 | |
| }, | |
| { | |
| "epoch": 0.490315483440066, | |
| "grad_norm": 0.17553551495075226, | |
| "learning_rate": 0.0006034906891777831, | |
| "loss": 1.613, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.4911012454327584, | |
| "grad_norm": 0.18809862434864044, | |
| "learning_rate": 0.0006021483694314406, | |
| "loss": 1.6353, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 0.49188700742545083, | |
| "grad_norm": 0.17778019607067108, | |
| "learning_rate": 0.0006008052809650052, | |
| "loss": 1.6269, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.49267276941814325, | |
| "grad_norm": 0.13343100249767303, | |
| "learning_rate": 0.0005994614338859224, | |
| "loss": 1.6214, | |
| "step": 3135 | |
| }, | |
| { | |
| "epoch": 0.4934585314108357, | |
| "grad_norm": 0.13967198133468628, | |
| "learning_rate": 0.0005981168383073472, | |
| "loss": 1.6184, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.49424429340352805, | |
| "grad_norm": 0.12899260222911835, | |
| "learning_rate": 0.0005967715043480667, | |
| "loss": 1.5902, | |
| "step": 3145 | |
| }, | |
| { | |
| "epoch": 0.49503005539622047, | |
| "grad_norm": 0.1432379186153412, | |
| "learning_rate": 0.0005954254421324253, | |
| "loss": 1.6336, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.4958158173889129, | |
| "grad_norm": 0.15734805166721344, | |
| "learning_rate": 0.0005940786617902477, | |
| "loss": 1.6614, | |
| "step": 3155 | |
| }, | |
| { | |
| "epoch": 0.4966015793816053, | |
| "grad_norm": 0.17404969036579132, | |
| "learning_rate": 0.0005927311734567624, | |
| "loss": 1.6685, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.49738734137429774, | |
| "grad_norm": 0.21907399594783783, | |
| "learning_rate": 0.0005913829872725269, | |
| "loss": 1.5632, | |
| "step": 3165 | |
| }, | |
| { | |
| "epoch": 0.4981731033669901, | |
| "grad_norm": 0.1369909644126892, | |
| "learning_rate": 0.0005900341133833491, | |
| "loss": 1.623, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.49895886535968254, | |
| "grad_norm": 0.14243030548095703, | |
| "learning_rate": 0.0005886845619402138, | |
| "loss": 1.5829, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 0.49974462735237496, | |
| "grad_norm": 0.1607036292552948, | |
| "learning_rate": 0.0005873343430992034, | |
| "loss": 1.5537, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.5005303893450673, | |
| "grad_norm": 0.12607838213443756, | |
| "learning_rate": 0.0005859834670214236, | |
| "loss": 1.6441, | |
| "step": 3185 | |
| }, | |
| { | |
| "epoch": 0.5013161513377598, | |
| "grad_norm": 0.13709622621536255, | |
| "learning_rate": 0.0005846319438729257, | |
| "loss": 1.627, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.5021019133304522, | |
| "grad_norm": 0.1329515129327774, | |
| "learning_rate": 0.000583279783824631, | |
| "loss": 1.6167, | |
| "step": 3195 | |
| }, | |
| { | |
| "epoch": 0.5028876753231446, | |
| "grad_norm": 0.15467658638954163, | |
| "learning_rate": 0.0005819269970522532, | |
| "loss": 1.6389, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.503673437315837, | |
| "grad_norm": 0.19440630078315735, | |
| "learning_rate": 0.0005805735937362231, | |
| "loss": 1.5974, | |
| "step": 3205 | |
| }, | |
| { | |
| "epoch": 0.5044591993085294, | |
| "grad_norm": 0.1676415205001831, | |
| "learning_rate": 0.0005792195840616108, | |
| "loss": 1.6102, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.5052449613012219, | |
| "grad_norm": 0.19758261740207672, | |
| "learning_rate": 0.0005778649782180497, | |
| "loss": 1.6061, | |
| "step": 3215 | |
| }, | |
| { | |
| "epoch": 0.5060307232939143, | |
| "grad_norm": 0.15707242488861084, | |
| "learning_rate": 0.00057650978639966, | |
| "loss": 1.5979, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.5068164852866067, | |
| "grad_norm": 0.15218037366867065, | |
| "learning_rate": 0.0005751540188049713, | |
| "loss": 1.5923, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 0.5076022472792991, | |
| "grad_norm": 0.149359792470932, | |
| "learning_rate": 0.000573797685636846, | |
| "loss": 1.5835, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.5083880092719915, | |
| "grad_norm": 0.18229952454566956, | |
| "learning_rate": 0.0005724407971024037, | |
| "loss": 1.6207, | |
| "step": 3235 | |
| }, | |
| { | |
| "epoch": 0.5091737712646839, | |
| "grad_norm": 0.16412632167339325, | |
| "learning_rate": 0.0005710833634129424, | |
| "loss": 1.6654, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.5099595332573763, | |
| "grad_norm": 0.15745171904563904, | |
| "learning_rate": 0.0005697253947838631, | |
| "loss": 1.543, | |
| "step": 3245 | |
| }, | |
| { | |
| "epoch": 0.5107452952500687, | |
| "grad_norm": 0.13623344898223877, | |
| "learning_rate": 0.0005683669014345924, | |
| "loss": 1.582, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.5115310572427612, | |
| "grad_norm": 0.1499263346195221, | |
| "learning_rate": 0.0005670078935885058, | |
| "loss": 1.5902, | |
| "step": 3255 | |
| }, | |
| { | |
| "epoch": 0.5123168192354536, | |
| "grad_norm": 0.14522770047187805, | |
| "learning_rate": 0.0005656483814728508, | |
| "loss": 1.6872, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.513102581228146, | |
| "grad_norm": 0.1918184757232666, | |
| "learning_rate": 0.0005642883753186693, | |
| "loss": 1.6782, | |
| "step": 3265 | |
| }, | |
| { | |
| "epoch": 0.5138883432208384, | |
| "grad_norm": 0.18836189806461334, | |
| "learning_rate": 0.0005629278853607218, | |
| "loss": 1.6462, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.5146741052135309, | |
| "grad_norm": 0.12512125074863434, | |
| "learning_rate": 0.000561566921837409, | |
| "loss": 1.6126, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 0.5154598672062233, | |
| "grad_norm": 0.14574217796325684, | |
| "learning_rate": 0.0005602054949906957, | |
| "loss": 1.6821, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.5162456291989157, | |
| "grad_norm": 0.17824433743953705, | |
| "learning_rate": 0.0005588436150660336, | |
| "loss": 1.62, | |
| "step": 3285 | |
| }, | |
| { | |
| "epoch": 0.517031391191608, | |
| "grad_norm": 0.21210236847400665, | |
| "learning_rate": 0.000557481292312284, | |
| "loss": 1.6722, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.5178171531843004, | |
| "grad_norm": 0.169542133808136, | |
| "learning_rate": 0.0005561185369816404, | |
| "loss": 1.5686, | |
| "step": 3295 | |
| }, | |
| { | |
| "epoch": 0.5186029151769929, | |
| "grad_norm": 0.15672409534454346, | |
| "learning_rate": 0.0005547553593295521, | |
| "loss": 1.6275, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.5193886771696853, | |
| "grad_norm": 0.1227111890912056, | |
| "learning_rate": 0.0005533917696146464, | |
| "loss": 1.6133, | |
| "step": 3305 | |
| }, | |
| { | |
| "epoch": 0.5201744391623777, | |
| "grad_norm": 0.12742657959461212, | |
| "learning_rate": 0.0005520277780986514, | |
| "loss": 1.6471, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.5209602011550701, | |
| "grad_norm": 1.2764840126037598, | |
| "learning_rate": 0.0005506633950463195, | |
| "loss": 1.6583, | |
| "step": 3315 | |
| }, | |
| { | |
| "epoch": 0.5217459631477626, | |
| "grad_norm": 0.26217857003211975, | |
| "learning_rate": 0.0005492986307253489, | |
| "loss": 1.5793, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.522531725140455, | |
| "grad_norm": 0.2518414258956909, | |
| "learning_rate": 0.0005479334954063076, | |
| "loss": 1.6404, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 0.5233174871331474, | |
| "grad_norm": 0.14766822755336761, | |
| "learning_rate": 0.0005465679993625552, | |
| "loss": 1.6244, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.5241032491258398, | |
| "grad_norm": 0.15620769560337067, | |
| "learning_rate": 0.0005452021528701663, | |
| "loss": 1.5799, | |
| "step": 3335 | |
| }, | |
| { | |
| "epoch": 0.5248890111185321, | |
| "grad_norm": 0.1620067059993744, | |
| "learning_rate": 0.0005438359662078528, | |
| "loss": 1.6273, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.5256747731112246, | |
| "grad_norm": 0.13544130325317383, | |
| "learning_rate": 0.0005424694496568859, | |
| "loss": 1.5757, | |
| "step": 3345 | |
| }, | |
| { | |
| "epoch": 0.526460535103917, | |
| "grad_norm": 0.13699087500572205, | |
| "learning_rate": 0.0005411026135010203, | |
| "loss": 1.7186, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.5272462970966094, | |
| "grad_norm": 0.1648073047399521, | |
| "learning_rate": 0.0005397354680264151, | |
| "loss": 1.5614, | |
| "step": 3355 | |
| }, | |
| { | |
| "epoch": 0.5280320590893018, | |
| "grad_norm": 0.15802352130413055, | |
| "learning_rate": 0.0005383680235215579, | |
| "loss": 1.7054, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.5288178210819943, | |
| "grad_norm": 0.16547876596450806, | |
| "learning_rate": 0.0005370002902771861, | |
| "loss": 1.6021, | |
| "step": 3365 | |
| }, | |
| { | |
| "epoch": 0.5296035830746867, | |
| "grad_norm": 0.19628289341926575, | |
| "learning_rate": 0.0005356322785862102, | |
| "loss": 1.6354, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.5303893450673791, | |
| "grad_norm": 0.18181757628917694, | |
| "learning_rate": 0.0005342639987436363, | |
| "loss": 1.6311, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 0.5311751070600715, | |
| "grad_norm": 0.13773776590824127, | |
| "learning_rate": 0.0005328954610464881, | |
| "loss": 1.6258, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.531960869052764, | |
| "grad_norm": 0.16505779325962067, | |
| "learning_rate": 0.0005315266757937305, | |
| "loss": 1.6317, | |
| "step": 3385 | |
| }, | |
| { | |
| "epoch": 0.5327466310454563, | |
| "grad_norm": 0.18392467498779297, | |
| "learning_rate": 0.0005301576532861905, | |
| "loss": 1.5434, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.5335323930381487, | |
| "grad_norm": 0.4494565725326538, | |
| "learning_rate": 0.0005287884038264813, | |
| "loss": 1.6195, | |
| "step": 3395 | |
| }, | |
| { | |
| "epoch": 0.5343181550308411, | |
| "grad_norm": 0.11773408949375153, | |
| "learning_rate": 0.0005274189377189235, | |
| "loss": 1.6681, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.5351039170235335, | |
| "grad_norm": 0.12879732251167297, | |
| "learning_rate": 0.0005260492652694687, | |
| "loss": 1.576, | |
| "step": 3405 | |
| }, | |
| { | |
| "epoch": 0.535889679016226, | |
| "grad_norm": 0.18784938752651215, | |
| "learning_rate": 0.0005246793967856206, | |
| "loss": 1.6229, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.5366754410089184, | |
| "grad_norm": 0.17286457121372223, | |
| "learning_rate": 0.0005233093425763586, | |
| "loss": 1.5689, | |
| "step": 3415 | |
| }, | |
| { | |
| "epoch": 0.5374612030016108, | |
| "grad_norm": 0.14577491581439972, | |
| "learning_rate": 0.0005219391129520597, | |
| "loss": 1.563, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.5382469649943032, | |
| "grad_norm": 0.11635435372591019, | |
| "learning_rate": 0.0005205687182244211, | |
| "loss": 1.5521, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 0.5390327269869957, | |
| "grad_norm": 0.17694814503192902, | |
| "learning_rate": 0.0005191981687063822, | |
| "loss": 1.6428, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.5398184889796881, | |
| "grad_norm": 0.13338413834571838, | |
| "learning_rate": 0.0005178274747120477, | |
| "loss": 1.5941, | |
| "step": 3435 | |
| }, | |
| { | |
| "epoch": 0.5406042509723805, | |
| "grad_norm": 0.16258469223976135, | |
| "learning_rate": 0.0005164566465566092, | |
| "loss": 1.5991, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.5413900129650728, | |
| "grad_norm": 0.14882458746433258, | |
| "learning_rate": 0.0005150856945562681, | |
| "loss": 1.5826, | |
| "step": 3445 | |
| }, | |
| { | |
| "epoch": 0.5421757749577653, | |
| "grad_norm": 0.15069830417633057, | |
| "learning_rate": 0.0005137146290281574, | |
| "loss": 1.6133, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.5429615369504577, | |
| "grad_norm": 0.16154706478118896, | |
| "learning_rate": 0.0005123434602902655, | |
| "loss": 1.6148, | |
| "step": 3455 | |
| }, | |
| { | |
| "epoch": 0.5437472989431501, | |
| "grad_norm": 0.14857438206672668, | |
| "learning_rate": 0.0005109721986613561, | |
| "loss": 1.6026, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.5445330609358425, | |
| "grad_norm": 0.180572509765625, | |
| "learning_rate": 0.0005096008544608931, | |
| "loss": 1.6101, | |
| "step": 3465 | |
| }, | |
| { | |
| "epoch": 0.545318822928535, | |
| "grad_norm": 0.2038494050502777, | |
| "learning_rate": 0.0005082294380089613, | |
| "loss": 1.5987, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.5461045849212274, | |
| "grad_norm": 0.15037231147289276, | |
| "learning_rate": 0.000506857959626189, | |
| "loss": 1.5964, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 0.5468903469139198, | |
| "grad_norm": 0.12556269764900208, | |
| "learning_rate": 0.0005054864296336714, | |
| "loss": 1.5537, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.5476761089066122, | |
| "grad_norm": 0.1907692849636078, | |
| "learning_rate": 0.0005041148583528912, | |
| "loss": 1.6699, | |
| "step": 3485 | |
| }, | |
| { | |
| "epoch": 0.5484618708993046, | |
| "grad_norm": 0.15658941864967346, | |
| "learning_rate": 0.0005027432561056421, | |
| "loss": 1.6159, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.549247632891997, | |
| "grad_norm": 0.1611129492521286, | |
| "learning_rate": 0.0005013716332139509, | |
| "loss": 1.6474, | |
| "step": 3495 | |
| }, | |
| { | |
| "epoch": 0.5500333948846894, | |
| "grad_norm": 0.17045730352401733, | |
| "learning_rate": 0.0005, | |
| "loss": 1.5635, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.5508191568773818, | |
| "grad_norm": 0.1496753841638565, | |
| "learning_rate": 0.0004986283667860492, | |
| "loss": 1.679, | |
| "step": 3505 | |
| }, | |
| { | |
| "epoch": 0.5516049188700742, | |
| "grad_norm": 0.13515827059745789, | |
| "learning_rate": 0.0004972567438943581, | |
| "loss": 1.6049, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.5523906808627667, | |
| "grad_norm": 0.12685181200504303, | |
| "learning_rate": 0.0004958851416471089, | |
| "loss": 1.6869, | |
| "step": 3515 | |
| }, | |
| { | |
| "epoch": 0.5531764428554591, | |
| "grad_norm": 0.14106754958629608, | |
| "learning_rate": 0.0004945135703663286, | |
| "loss": 1.6025, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.5539622048481515, | |
| "grad_norm": 0.1331792026758194, | |
| "learning_rate": 0.000493142040373811, | |
| "loss": 1.6071, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 0.5547479668408439, | |
| "grad_norm": 0.14535002410411835, | |
| "learning_rate": 0.0004917705619910389, | |
| "loss": 1.656, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.5555337288335364, | |
| "grad_norm": 0.12771806120872498, | |
| "learning_rate": 0.0004903991455391071, | |
| "loss": 1.5305, | |
| "step": 3535 | |
| }, | |
| { | |
| "epoch": 0.5563194908262288, | |
| "grad_norm": 0.15101516246795654, | |
| "learning_rate": 0.0004890278013386439, | |
| "loss": 1.6659, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.5571052528189212, | |
| "grad_norm": 0.13331681489944458, | |
| "learning_rate": 0.00048765653970973463, | |
| "loss": 1.6045, | |
| "step": 3545 | |
| }, | |
| { | |
| "epoch": 0.5578910148116135, | |
| "grad_norm": 0.11692263185977936, | |
| "learning_rate": 0.00048628537097184253, | |
| "loss": 1.5341, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.5586767768043059, | |
| "grad_norm": 0.12343940883874893, | |
| "learning_rate": 0.00048491430544373213, | |
| "loss": 1.5635, | |
| "step": 3555 | |
| }, | |
| { | |
| "epoch": 0.5594625387969984, | |
| "grad_norm": 0.17898155748844147, | |
| "learning_rate": 0.00048354335344339084, | |
| "loss": 1.5985, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.5602483007896908, | |
| "grad_norm": 0.14213447272777557, | |
| "learning_rate": 0.0004821725252879523, | |
| "loss": 1.6057, | |
| "step": 3565 | |
| }, | |
| { | |
| "epoch": 0.5610340627823832, | |
| "grad_norm": 0.12153229862451553, | |
| "learning_rate": 0.0004808018312936178, | |
| "loss": 1.6717, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.5618198247750756, | |
| "grad_norm": 0.12813471257686615, | |
| "learning_rate": 0.00047943128177557905, | |
| "loss": 1.6458, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 0.5626055867677681, | |
| "grad_norm": 0.15701644122600555, | |
| "learning_rate": 0.00047806088704794046, | |
| "loss": 1.6205, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.5633913487604605, | |
| "grad_norm": 0.1378682404756546, | |
| "learning_rate": 0.00047669065742364145, | |
| "loss": 1.643, | |
| "step": 3585 | |
| }, | |
| { | |
| "epoch": 0.5641771107531529, | |
| "grad_norm": 0.14088793098926544, | |
| "learning_rate": 0.0004753206032143795, | |
| "loss": 1.7, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.5649628727458453, | |
| "grad_norm": 0.15259869396686554, | |
| "learning_rate": 0.0004739507347305314, | |
| "loss": 1.6119, | |
| "step": 3595 | |
| }, | |
| { | |
| "epoch": 0.5657486347385376, | |
| "grad_norm": 0.13838313519954681, | |
| "learning_rate": 0.0004725810622810765, | |
| "loss": 1.609, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.5665343967312301, | |
| "grad_norm": 0.19349806010723114, | |
| "learning_rate": 0.00047121159617351884, | |
| "loss": 1.5298, | |
| "step": 3605 | |
| }, | |
| { | |
| "epoch": 0.5673201587239225, | |
| "grad_norm": 0.14952600002288818, | |
| "learning_rate": 0.00046984234671380956, | |
| "loss": 1.5832, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.5681059207166149, | |
| "grad_norm": 0.1487891525030136, | |
| "learning_rate": 0.00046847332420626964, | |
| "loss": 1.58, | |
| "step": 3615 | |
| }, | |
| { | |
| "epoch": 0.5688916827093073, | |
| "grad_norm": 0.12130414694547653, | |
| "learning_rate": 0.00046710453895351195, | |
| "loss": 1.6952, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.5696774447019998, | |
| "grad_norm": 0.13457755744457245, | |
| "learning_rate": 0.00046573600125636395, | |
| "loss": 1.5882, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 0.5704632066946922, | |
| "grad_norm": 0.11782009154558182, | |
| "learning_rate": 0.0004643677214137898, | |
| "loss": 1.606, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.5712489686873846, | |
| "grad_norm": 0.17926670610904694, | |
| "learning_rate": 0.00046299970972281396, | |
| "loss": 1.6567, | |
| "step": 3635 | |
| }, | |
| { | |
| "epoch": 0.572034730680077, | |
| "grad_norm": 0.14689800143241882, | |
| "learning_rate": 0.0004616319764784421, | |
| "loss": 1.5803, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.5728204926727695, | |
| "grad_norm": 0.17201411724090576, | |
| "learning_rate": 0.0004602645319735849, | |
| "loss": 1.5905, | |
| "step": 3645 | |
| }, | |
| { | |
| "epoch": 0.5736062546654618, | |
| "grad_norm": 0.15453019738197327, | |
| "learning_rate": 0.00045889738649897984, | |
| "loss": 1.613, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.5743920166581542, | |
| "grad_norm": 0.19608797132968903, | |
| "learning_rate": 0.00045753055034311406, | |
| "loss": 1.5823, | |
| "step": 3655 | |
| }, | |
| { | |
| "epoch": 0.5751777786508466, | |
| "grad_norm": 0.13336700201034546, | |
| "learning_rate": 0.00045616403379214725, | |
| "loss": 1.633, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.575963540643539, | |
| "grad_norm": 0.1499382108449936, | |
| "learning_rate": 0.0004547978471298336, | |
| "loss": 1.5769, | |
| "step": 3665 | |
| }, | |
| { | |
| "epoch": 0.5767493026362315, | |
| "grad_norm": 0.1264503449201584, | |
| "learning_rate": 0.00045343200063744483, | |
| "loss": 1.581, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.5775350646289239, | |
| "grad_norm": 0.15569479763507843, | |
| "learning_rate": 0.00045206650459369256, | |
| "loss": 1.5404, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 0.5783208266216163, | |
| "grad_norm": 0.12474947422742844, | |
| "learning_rate": 0.00045070136927465124, | |
| "loss": 1.6176, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.5791065886143087, | |
| "grad_norm": 0.12202292680740356, | |
| "learning_rate": 0.0004493366049536806, | |
| "loss": 1.649, | |
| "step": 3685 | |
| }, | |
| { | |
| "epoch": 0.5798923506070012, | |
| "grad_norm": 0.1440856158733368, | |
| "learning_rate": 0.00044797222190134863, | |
| "loss": 1.6267, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.5806781125996936, | |
| "grad_norm": 0.14805102348327637, | |
| "learning_rate": 0.00044660823038535373, | |
| "loss": 1.568, | |
| "step": 3695 | |
| }, | |
| { | |
| "epoch": 0.581463874592386, | |
| "grad_norm": 0.16469161212444305, | |
| "learning_rate": 0.0004452446406704478, | |
| "loss": 1.6439, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.5822496365850783, | |
| "grad_norm": 0.1361335963010788, | |
| "learning_rate": 0.0004438814630183595, | |
| "loss": 1.5802, | |
| "step": 3705 | |
| }, | |
| { | |
| "epoch": 0.5830353985777708, | |
| "grad_norm": 0.14041323959827423, | |
| "learning_rate": 0.0004425187076877161, | |
| "loss": 1.6259, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.5838211605704632, | |
| "grad_norm": 0.173885777592659, | |
| "learning_rate": 0.00044115638493396643, | |
| "loss": 1.6512, | |
| "step": 3715 | |
| }, | |
| { | |
| "epoch": 0.5846069225631556, | |
| "grad_norm": 0.13919731974601746, | |
| "learning_rate": 0.0004397945050093044, | |
| "loss": 1.582, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.585392684555848, | |
| "grad_norm": 0.13981099426746368, | |
| "learning_rate": 0.0004384330781625911, | |
| "loss": 1.5468, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 0.5861784465485405, | |
| "grad_norm": 0.1628427505493164, | |
| "learning_rate": 0.0004370721146392783, | |
| "loss": 1.6533, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.5869642085412329, | |
| "grad_norm": 0.1454862356185913, | |
| "learning_rate": 0.00043571162468133073, | |
| "loss": 1.611, | |
| "step": 3735 | |
| }, | |
| { | |
| "epoch": 0.5877499705339253, | |
| "grad_norm": 0.127084419131279, | |
| "learning_rate": 0.0004343516185271494, | |
| "loss": 1.5406, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.5885357325266177, | |
| "grad_norm": 0.12855832278728485, | |
| "learning_rate": 0.00043299210641149435, | |
| "loss": 1.5789, | |
| "step": 3745 | |
| }, | |
| { | |
| "epoch": 0.5893214945193102, | |
| "grad_norm": 0.12993024289608002, | |
| "learning_rate": 0.0004316330985654077, | |
| "loss": 1.6774, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.5901072565120025, | |
| "grad_norm": 0.13693836331367493, | |
| "learning_rate": 0.000430274605216137, | |
| "loss": 1.6085, | |
| "step": 3755 | |
| }, | |
| { | |
| "epoch": 0.5908930185046949, | |
| "grad_norm": 0.1504506915807724, | |
| "learning_rate": 0.0004289166365870577, | |
| "loss": 1.5573, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.5916787804973873, | |
| "grad_norm": 0.17335031926631927, | |
| "learning_rate": 0.00042755920289759634, | |
| "loss": 1.626, | |
| "step": 3765 | |
| }, | |
| { | |
| "epoch": 0.5924645424900797, | |
| "grad_norm": 0.13067643344402313, | |
| "learning_rate": 0.0004262023143631538, | |
| "loss": 1.5954, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.5932503044827722, | |
| "grad_norm": 0.15987832844257355, | |
| "learning_rate": 0.0004248459811950288, | |
| "loss": 1.6007, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 0.5940360664754646, | |
| "grad_norm": 0.240760937333107, | |
| "learning_rate": 0.00042349021360034005, | |
| "loss": 1.6095, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.594821828468157, | |
| "grad_norm": 0.19699476659297943, | |
| "learning_rate": 0.0004221350217819504, | |
| "loss": 1.6421, | |
| "step": 3785 | |
| }, | |
| { | |
| "epoch": 0.5956075904608494, | |
| "grad_norm": 0.13763698935508728, | |
| "learning_rate": 0.00042078041593838946, | |
| "loss": 1.5438, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.5963933524535419, | |
| "grad_norm": 0.13714967668056488, | |
| "learning_rate": 0.00041942640626377694, | |
| "loss": 1.5082, | |
| "step": 3795 | |
| }, | |
| { | |
| "epoch": 0.5971791144462343, | |
| "grad_norm": 0.1486494392156601, | |
| "learning_rate": 0.0004180730029477468, | |
| "loss": 1.6616, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.5979648764389266, | |
| "grad_norm": 0.14766061305999756, | |
| "learning_rate": 0.00041672021617536914, | |
| "loss": 1.6168, | |
| "step": 3805 | |
| }, | |
| { | |
| "epoch": 0.598750638431619, | |
| "grad_norm": 0.13385340571403503, | |
| "learning_rate": 0.00041536805612707434, | |
| "loss": 1.5723, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.5995364004243114, | |
| "grad_norm": 0.17751716077327728, | |
| "learning_rate": 0.00041401653297857654, | |
| "loss": 1.5451, | |
| "step": 3815 | |
| }, | |
| { | |
| "epoch": 0.6003221624170039, | |
| "grad_norm": 0.14219363033771515, | |
| "learning_rate": 0.0004126656569007966, | |
| "loss": 1.6025, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.6011079244096963, | |
| "grad_norm": 0.13877145946025848, | |
| "learning_rate": 0.0004113154380597863, | |
| "loss": 1.6305, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 0.6018936864023887, | |
| "grad_norm": 0.1523716002702713, | |
| "learning_rate": 0.0004099658866166509, | |
| "loss": 1.5868, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.6026794483950811, | |
| "grad_norm": 0.13320079445838928, | |
| "learning_rate": 0.00040861701272747343, | |
| "loss": 1.581, | |
| "step": 3835 | |
| }, | |
| { | |
| "epoch": 0.6034652103877736, | |
| "grad_norm": 0.13998959958553314, | |
| "learning_rate": 0.00040726882654323757, | |
| "loss": 1.6086, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.604250972380466, | |
| "grad_norm": 0.1749476045370102, | |
| "learning_rate": 0.00040592133820975247, | |
| "loss": 1.5971, | |
| "step": 3845 | |
| }, | |
| { | |
| "epoch": 0.6050367343731584, | |
| "grad_norm": 0.14331944286823273, | |
| "learning_rate": 0.00040457455786757466, | |
| "loss": 1.6309, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.6058224963658508, | |
| "grad_norm": 0.1268499195575714, | |
| "learning_rate": 0.00040322849565193334, | |
| "loss": 1.6688, | |
| "step": 3855 | |
| }, | |
| { | |
| "epoch": 0.6066082583585432, | |
| "grad_norm": 0.1139456257224083, | |
| "learning_rate": 0.00040188316169265293, | |
| "loss": 1.5303, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.6073940203512356, | |
| "grad_norm": 0.1379457414150238, | |
| "learning_rate": 0.0004005385661140775, | |
| "loss": 1.643, | |
| "step": 3865 | |
| }, | |
| { | |
| "epoch": 0.608179782343928, | |
| "grad_norm": 0.15720854699611664, | |
| "learning_rate": 0.0003991947190349949, | |
| "loss": 1.6458, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.6089655443366204, | |
| "grad_norm": 0.13279718160629272, | |
| "learning_rate": 0.00039785163056855956, | |
| "loss": 1.6413, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 0.6097513063293128, | |
| "grad_norm": 0.16527603566646576, | |
| "learning_rate": 0.000396509310822217, | |
| "loss": 1.6411, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.6105370683220053, | |
| "grad_norm": 0.11751680821180344, | |
| "learning_rate": 0.00039516776989762783, | |
| "loss": 1.5949, | |
| "step": 3885 | |
| }, | |
| { | |
| "epoch": 0.6113228303146977, | |
| "grad_norm": 0.1423860639333725, | |
| "learning_rate": 0.0003938270178905916, | |
| "loss": 1.5761, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.6121085923073901, | |
| "grad_norm": 0.13618935644626617, | |
| "learning_rate": 0.0003924870648909711, | |
| "loss": 1.6124, | |
| "step": 3895 | |
| }, | |
| { | |
| "epoch": 0.6128943543000825, | |
| "grad_norm": 0.16317065060138702, | |
| "learning_rate": 0.00039114792098261564, | |
| "loss": 1.5755, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.613680116292775, | |
| "grad_norm": 0.13522252440452576, | |
| "learning_rate": 0.0003898095962432862, | |
| "loss": 1.6334, | |
| "step": 3905 | |
| }, | |
| { | |
| "epoch": 0.6144658782854673, | |
| "grad_norm": 0.1667720079421997, | |
| "learning_rate": 0.00038847210074457854, | |
| "loss": 1.5734, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.6152516402781597, | |
| "grad_norm": 0.14179572463035583, | |
| "learning_rate": 0.0003871354445518487, | |
| "loss": 1.5733, | |
| "step": 3915 | |
| }, | |
| { | |
| "epoch": 0.6160374022708521, | |
| "grad_norm": 0.13804243505001068, | |
| "learning_rate": 0.00038579963772413596, | |
| "loss": 1.6097, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.6168231642635446, | |
| "grad_norm": 0.14240044355392456, | |
| "learning_rate": 0.0003844646903140877, | |
| "loss": 1.6263, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 0.617608926256237, | |
| "grad_norm": 0.15428559482097626, | |
| "learning_rate": 0.0003831306123678838, | |
| "loss": 1.6567, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.6183946882489294, | |
| "grad_norm": 0.16212952136993408, | |
| "learning_rate": 0.00038179741392516104, | |
| "loss": 1.6562, | |
| "step": 3935 | |
| }, | |
| { | |
| "epoch": 0.6191804502416218, | |
| "grad_norm": 0.1477675437927246, | |
| "learning_rate": 0.00038046510501893747, | |
| "loss": 1.5078, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.6199662122343143, | |
| "grad_norm": 0.2077953964471817, | |
| "learning_rate": 0.00037913369567553696, | |
| "loss": 1.5593, | |
| "step": 3945 | |
| }, | |
| { | |
| "epoch": 0.6207519742270067, | |
| "grad_norm": 0.14560724794864655, | |
| "learning_rate": 0.00037780319591451347, | |
| "loss": 1.5988, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.6215377362196991, | |
| "grad_norm": 0.15922938287258148, | |
| "learning_rate": 0.0003764736157485761, | |
| "loss": 1.585, | |
| "step": 3955 | |
| }, | |
| { | |
| "epoch": 0.6223234982123914, | |
| "grad_norm": 0.15636377036571503, | |
| "learning_rate": 0.0003751449651835131, | |
| "loss": 1.6747, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.6231092602050838, | |
| "grad_norm": 0.17764806747436523, | |
| "learning_rate": 0.00037381725421811783, | |
| "loss": 1.6357, | |
| "step": 3965 | |
| }, | |
| { | |
| "epoch": 0.6238950221977763, | |
| "grad_norm": 0.18865743279457092, | |
| "learning_rate": 0.000372490492844112, | |
| "loss": 1.5983, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.6246807841904687, | |
| "grad_norm": 0.25516217947006226, | |
| "learning_rate": 0.0003711646910460713, | |
| "loss": 1.6183, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 0.6254665461831611, | |
| "grad_norm": 0.20700369775295258, | |
| "learning_rate": 0.00036983985880134987, | |
| "loss": 1.5452, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.6262523081758535, | |
| "grad_norm": 0.16521629691123962, | |
| "learning_rate": 0.0003685160060800059, | |
| "loss": 1.5794, | |
| "step": 3985 | |
| }, | |
| { | |
| "epoch": 0.627038070168546, | |
| "grad_norm": 0.13135258853435516, | |
| "learning_rate": 0.00036719314284472604, | |
| "loss": 1.6298, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.6278238321612384, | |
| "grad_norm": 0.13091085851192474, | |
| "learning_rate": 0.00036587127905075037, | |
| "loss": 1.553, | |
| "step": 3995 | |
| }, | |
| { | |
| "epoch": 0.6286095941539308, | |
| "grad_norm": 0.11903288215398788, | |
| "learning_rate": 0.0003645504246457976, | |
| "loss": 1.617, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.6293953561466232, | |
| "grad_norm": 0.21342940628528595, | |
| "learning_rate": 0.00036323058956999024, | |
| "loss": 1.6411, | |
| "step": 4005 | |
| }, | |
| { | |
| "epoch": 0.6301811181393157, | |
| "grad_norm": 0.15517936646938324, | |
| "learning_rate": 0.00036191178375578046, | |
| "loss": 1.5769, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.630966880132008, | |
| "grad_norm": 0.15112268924713135, | |
| "learning_rate": 0.00036059401712787393, | |
| "loss": 1.6215, | |
| "step": 4015 | |
| }, | |
| { | |
| "epoch": 0.6317526421247004, | |
| "grad_norm": 0.12736016511917114, | |
| "learning_rate": 0.000359277299603156, | |
| "loss": 1.5587, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.6325384041173928, | |
| "grad_norm": 0.11462604254484177, | |
| "learning_rate": 0.0003579616410906174, | |
| "loss": 1.6249, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 0.6333241661100852, | |
| "grad_norm": 0.12737910449504852, | |
| "learning_rate": 0.00035664705149127856, | |
| "loss": 1.6519, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.6341099281027777, | |
| "grad_norm": 0.13220828771591187, | |
| "learning_rate": 0.00035533354069811665, | |
| "loss": 1.5816, | |
| "step": 4035 | |
| }, | |
| { | |
| "epoch": 0.6348956900954701, | |
| "grad_norm": 0.18805982172489166, | |
| "learning_rate": 0.00035402111859598963, | |
| "loss": 1.5487, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.6356814520881625, | |
| "grad_norm": 0.1409517526626587, | |
| "learning_rate": 0.0003527097950615625, | |
| "loss": 1.6414, | |
| "step": 4045 | |
| }, | |
| { | |
| "epoch": 0.6364672140808549, | |
| "grad_norm": 0.12021984159946442, | |
| "learning_rate": 0.0003513995799632332, | |
| "loss": 1.5564, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.6372529760735474, | |
| "grad_norm": 0.12560012936592102, | |
| "learning_rate": 0.00035009048316105815, | |
| "loss": 1.6305, | |
| "step": 4055 | |
| }, | |
| { | |
| "epoch": 0.6380387380662398, | |
| "grad_norm": 0.11739975959062576, | |
| "learning_rate": 0.00034878251450667767, | |
| "loss": 1.6374, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.6388245000589321, | |
| "grad_norm": 0.1290743499994278, | |
| "learning_rate": 0.00034747568384324253, | |
| "loss": 1.5489, | |
| "step": 4065 | |
| }, | |
| { | |
| "epoch": 0.6396102620516245, | |
| "grad_norm": 0.1409534215927124, | |
| "learning_rate": 0.00034617000100533923, | |
| "loss": 1.5066, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.640396024044317, | |
| "grad_norm": 0.11737538129091263, | |
| "learning_rate": 0.00034486547581891625, | |
| "loss": 1.6869, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 0.6411817860370094, | |
| "grad_norm": 0.1106194406747818, | |
| "learning_rate": 0.00034356211810121083, | |
| "loss": 1.6172, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.6419675480297018, | |
| "grad_norm": 0.14032632112503052, | |
| "learning_rate": 0.0003422599376606735, | |
| "loss": 1.6105, | |
| "step": 4085 | |
| }, | |
| { | |
| "epoch": 0.6427533100223942, | |
| "grad_norm": 0.14203083515167236, | |
| "learning_rate": 0.00034095894429689554, | |
| "loss": 1.5882, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.6435390720150866, | |
| "grad_norm": 0.13212530314922333, | |
| "learning_rate": 0.00033965914780053496, | |
| "loss": 1.6069, | |
| "step": 4095 | |
| }, | |
| { | |
| "epoch": 0.6443248340077791, | |
| "grad_norm": 0.12459713220596313, | |
| "learning_rate": 0.0003383605579532425, | |
| "loss": 1.6644, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.6451105960004715, | |
| "grad_norm": 0.1313340663909912, | |
| "learning_rate": 0.0003370631845275883, | |
| "loss": 1.5869, | |
| "step": 4105 | |
| }, | |
| { | |
| "epoch": 0.6458963579931639, | |
| "grad_norm": 0.13839416205883026, | |
| "learning_rate": 0.00033576703728698855, | |
| "loss": 1.5567, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.6466821199858562, | |
| "grad_norm": 0.11876852810382843, | |
| "learning_rate": 0.00033447212598563126, | |
| "loss": 1.6101, | |
| "step": 4115 | |
| }, | |
| { | |
| "epoch": 0.6474678819785487, | |
| "grad_norm": 0.13081587851047516, | |
| "learning_rate": 0.0003331784603684035, | |
| "loss": 1.5729, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.6482536439712411, | |
| "grad_norm": 0.15403269231319427, | |
| "learning_rate": 0.0003318860501708184, | |
| "loss": 1.5955, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 0.6490394059639335, | |
| "grad_norm": 0.1455393135547638, | |
| "learning_rate": 0.0003305949051189409, | |
| "loss": 1.5778, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.6498251679566259, | |
| "grad_norm": 0.13471262156963348, | |
| "learning_rate": 0.00032930503492931514, | |
| "loss": 1.5595, | |
| "step": 4135 | |
| }, | |
| { | |
| "epoch": 0.6506109299493184, | |
| "grad_norm": 0.15387079119682312, | |
| "learning_rate": 0.0003280164493088912, | |
| "loss": 1.5661, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.6513966919420108, | |
| "grad_norm": 0.15407784283161163, | |
| "learning_rate": 0.00032672915795495223, | |
| "loss": 1.5295, | |
| "step": 4145 | |
| }, | |
| { | |
| "epoch": 0.6521824539347032, | |
| "grad_norm": 0.13979849219322205, | |
| "learning_rate": 0.0003254431705550414, | |
| "loss": 1.5533, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.6529682159273956, | |
| "grad_norm": 0.15691180527210236, | |
| "learning_rate": 0.0003241584967868888, | |
| "loss": 1.5126, | |
| "step": 4155 | |
| }, | |
| { | |
| "epoch": 0.653753977920088, | |
| "grad_norm": 0.1667742133140564, | |
| "learning_rate": 0.0003228751463183388, | |
| "loss": 1.5099, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.6545397399127805, | |
| "grad_norm": 0.1104639545083046, | |
| "learning_rate": 0.00032159312880727724, | |
| "loss": 1.5985, | |
| "step": 4165 | |
| }, | |
| { | |
| "epoch": 0.6553255019054728, | |
| "grad_norm": 0.11984073370695114, | |
| "learning_rate": 0.00032031245390155854, | |
| "loss": 1.6355, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.6561112638981652, | |
| "grad_norm": 0.14178301393985748, | |
| "learning_rate": 0.0003190331312389341, | |
| "loss": 1.6044, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 0.6568970258908576, | |
| "grad_norm": 0.13143818080425262, | |
| "learning_rate": 0.0003177551704469779, | |
| "loss": 1.6314, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.6576827878835501, | |
| "grad_norm": 0.12936004996299744, | |
| "learning_rate": 0.00031647858114301585, | |
| "loss": 1.5972, | |
| "step": 4185 | |
| }, | |
| { | |
| "epoch": 0.6584685498762425, | |
| "grad_norm": 0.13757792115211487, | |
| "learning_rate": 0.00031520337293405236, | |
| "loss": 1.647, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.6592543118689349, | |
| "grad_norm": 0.12552618980407715, | |
| "learning_rate": 0.00031392955541669843, | |
| "loss": 1.6214, | |
| "step": 4195 | |
| }, | |
| { | |
| "epoch": 0.6600400738616273, | |
| "grad_norm": 0.14673569798469543, | |
| "learning_rate": 0.0003126571381770998, | |
| "loss": 1.6282, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.6608258358543198, | |
| "grad_norm": 0.13930362462997437, | |
| "learning_rate": 0.0003113861307908638, | |
| "loss": 1.6094, | |
| "step": 4205 | |
| }, | |
| { | |
| "epoch": 0.6616115978470122, | |
| "grad_norm": 0.15072229504585266, | |
| "learning_rate": 0.00031011654282298815, | |
| "loss": 1.5527, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.6623973598397046, | |
| "grad_norm": 0.18607932329177856, | |
| "learning_rate": 0.0003088483838277885, | |
| "loss": 1.6465, | |
| "step": 4215 | |
| }, | |
| { | |
| "epoch": 0.6631831218323969, | |
| "grad_norm": 0.17094473540782928, | |
| "learning_rate": 0.0003075816633488271, | |
| "loss": 1.5388, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.6639688838250893, | |
| "grad_norm": 0.12629753351211548, | |
| "learning_rate": 0.0003063163909188403, | |
| "loss": 1.5959, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 0.6647546458177818, | |
| "grad_norm": 0.15863820910453796, | |
| "learning_rate": 0.0003050525760596673, | |
| "loss": 1.6186, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.6655404078104742, | |
| "grad_norm": 0.16218873858451843, | |
| "learning_rate": 0.00030379022828217806, | |
| "loss": 1.5959, | |
| "step": 4235 | |
| }, | |
| { | |
| "epoch": 0.6663261698031666, | |
| "grad_norm": 0.12031345069408417, | |
| "learning_rate": 0.0003025293570862021, | |
| "loss": 1.6435, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.667111931795859, | |
| "grad_norm": 0.13391123712062836, | |
| "learning_rate": 0.0003012699719604573, | |
| "loss": 1.5797, | |
| "step": 4245 | |
| }, | |
| { | |
| "epoch": 0.6678976937885515, | |
| "grad_norm": 0.1528289020061493, | |
| "learning_rate": 0.0003000120823824775, | |
| "loss": 1.5509, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.6686834557812439, | |
| "grad_norm": 0.13060513138771057, | |
| "learning_rate": 0.00029875569781854204, | |
| "loss": 1.5022, | |
| "step": 4255 | |
| }, | |
| { | |
| "epoch": 0.6694692177739363, | |
| "grad_norm": 0.12339182198047638, | |
| "learning_rate": 0.0002975008277236041, | |
| "loss": 1.5875, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.6702549797666287, | |
| "grad_norm": 0.13095946609973907, | |
| "learning_rate": 0.00029624748154121974, | |
| "loss": 1.507, | |
| "step": 4265 | |
| }, | |
| { | |
| "epoch": 0.671040741759321, | |
| "grad_norm": 0.1417776495218277, | |
| "learning_rate": 0.0002949956687034769, | |
| "loss": 1.5078, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.6718265037520135, | |
| "grad_norm": 0.1152493953704834, | |
| "learning_rate": 0.0002937453986309242, | |
| "loss": 1.5462, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 0.6726122657447059, | |
| "grad_norm": 0.11423389613628387, | |
| "learning_rate": 0.00029249668073250015, | |
| "loss": 1.5613, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.6733980277373983, | |
| "grad_norm": 0.15614832937717438, | |
| "learning_rate": 0.000291249524405462, | |
| "loss": 1.5854, | |
| "step": 4285 | |
| }, | |
| { | |
| "epoch": 0.6741837897300907, | |
| "grad_norm": 0.11558407545089722, | |
| "learning_rate": 0.0002900039390353164, | |
| "loss": 1.5662, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.6749695517227832, | |
| "grad_norm": 0.11645932495594025, | |
| "learning_rate": 0.00028875993399574634, | |
| "loss": 1.532, | |
| "step": 4295 | |
| }, | |
| { | |
| "epoch": 0.6757553137154756, | |
| "grad_norm": 0.18881000578403473, | |
| "learning_rate": 0.00028751751864854316, | |
| "loss": 1.5883, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.676541075708168, | |
| "grad_norm": 0.12830057740211487, | |
| "learning_rate": 0.0002862767023435339, | |
| "loss": 1.7049, | |
| "step": 4305 | |
| }, | |
| { | |
| "epoch": 0.6773268377008604, | |
| "grad_norm": 0.16305623948574066, | |
| "learning_rate": 0.0002850374944185128, | |
| "loss": 1.5801, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.6781125996935529, | |
| "grad_norm": 0.13387204706668854, | |
| "learning_rate": 0.0002837999041991699, | |
| "loss": 1.5547, | |
| "step": 4315 | |
| }, | |
| { | |
| "epoch": 0.6788983616862453, | |
| "grad_norm": 0.14606694877147675, | |
| "learning_rate": 0.00028256394099902127, | |
| "loss": 1.5591, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.6796841236789376, | |
| "grad_norm": 0.1575789600610733, | |
| "learning_rate": 0.0002813296141193384, | |
| "loss": 1.5756, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 0.68046988567163, | |
| "grad_norm": 0.17807775735855103, | |
| "learning_rate": 0.00028009693284907926, | |
| "loss": 1.6095, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.6812556476643225, | |
| "grad_norm": 0.1488974243402481, | |
| "learning_rate": 0.00027886590646481705, | |
| "loss": 1.5749, | |
| "step": 4335 | |
| }, | |
| { | |
| "epoch": 0.6820414096570149, | |
| "grad_norm": 0.14755655825138092, | |
| "learning_rate": 0.00027763654423067143, | |
| "loss": 1.5712, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.6828271716497073, | |
| "grad_norm": 0.1276608258485794, | |
| "learning_rate": 0.0002764088553982388, | |
| "loss": 1.5585, | |
| "step": 4345 | |
| }, | |
| { | |
| "epoch": 0.6836129336423997, | |
| "grad_norm": 0.18238218128681183, | |
| "learning_rate": 0.00027518284920652157, | |
| "loss": 1.6127, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.6843986956350921, | |
| "grad_norm": 0.10505139082670212, | |
| "learning_rate": 0.00027395853488186, | |
| "loss": 1.6235, | |
| "step": 4355 | |
| }, | |
| { | |
| "epoch": 0.6851844576277846, | |
| "grad_norm": 0.14508095383644104, | |
| "learning_rate": 0.0002727359216378621, | |
| "loss": 1.5853, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.685970219620477, | |
| "grad_norm": 0.1283094584941864, | |
| "learning_rate": 0.0002715150186753339, | |
| "loss": 1.5724, | |
| "step": 4365 | |
| }, | |
| { | |
| "epoch": 0.6867559816131694, | |
| "grad_norm": 0.13297708332538605, | |
| "learning_rate": 0.00027029583518221135, | |
| "loss": 1.6299, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.6875417436058617, | |
| "grad_norm": 0.1485053300857544, | |
| "learning_rate": 0.0002690783803334897, | |
| "loss": 1.5592, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 0.6883275055985542, | |
| "grad_norm": 0.14514178037643433, | |
| "learning_rate": 0.00026786266329115595, | |
| "loss": 1.6329, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.6891132675912466, | |
| "grad_norm": 0.12720349431037903, | |
| "learning_rate": 0.0002666486932041188, | |
| "loss": 1.5256, | |
| "step": 4385 | |
| }, | |
| { | |
| "epoch": 0.689899029583939, | |
| "grad_norm": 0.14566215872764587, | |
| "learning_rate": 0.0002654364792081407, | |
| "loss": 1.57, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.6906847915766314, | |
| "grad_norm": 0.1420929729938507, | |
| "learning_rate": 0.00026422603042576765, | |
| "loss": 1.5619, | |
| "step": 4395 | |
| }, | |
| { | |
| "epoch": 0.6914705535693239, | |
| "grad_norm": 0.15932297706604004, | |
| "learning_rate": 0.0002630173559662624, | |
| "loss": 1.5294, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.6922563155620163, | |
| "grad_norm": 0.10718350857496262, | |
| "learning_rate": 0.0002618104649255344, | |
| "loss": 1.5715, | |
| "step": 4405 | |
| }, | |
| { | |
| "epoch": 0.6930420775547087, | |
| "grad_norm": 0.1438807249069214, | |
| "learning_rate": 0.0002606053663860722, | |
| "loss": 1.5939, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.6938278395474011, | |
| "grad_norm": 0.1529790461063385, | |
| "learning_rate": 0.0002594020694168753, | |
| "loss": 1.5308, | |
| "step": 4415 | |
| }, | |
| { | |
| "epoch": 0.6946136015400936, | |
| "grad_norm": 0.1177016943693161, | |
| "learning_rate": 0.0002582005830733841, | |
| "loss": 1.4905, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.695399363532786, | |
| "grad_norm": 0.14389681816101074, | |
| "learning_rate": 0.0002570009163974145, | |
| "loss": 1.5644, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 0.6961851255254783, | |
| "grad_norm": 0.13227079808712006, | |
| "learning_rate": 0.00025580307841708785, | |
| "loss": 1.5765, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.6969708875181707, | |
| "grad_norm": 0.14656752347946167, | |
| "learning_rate": 0.00025460707814676365, | |
| "loss": 1.5437, | |
| "step": 4435 | |
| }, | |
| { | |
| "epoch": 0.6977566495108631, | |
| "grad_norm": 0.14903421700000763, | |
| "learning_rate": 0.00025341292458697134, | |
| "loss": 1.6126, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.6985424115035556, | |
| "grad_norm": 0.13473910093307495, | |
| "learning_rate": 0.00025222062672434364, | |
| "loss": 1.5527, | |
| "step": 4445 | |
| }, | |
| { | |
| "epoch": 0.699328173496248, | |
| "grad_norm": 0.1363900601863861, | |
| "learning_rate": 0.0002510301935315474, | |
| "loss": 1.6206, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.7001139354889404, | |
| "grad_norm": 0.140158548951149, | |
| "learning_rate": 0.00024984163396721737, | |
| "loss": 1.5959, | |
| "step": 4455 | |
| }, | |
| { | |
| "epoch": 0.7008996974816328, | |
| "grad_norm": 0.13826651871204376, | |
| "learning_rate": 0.0002486549569758882, | |
| "loss": 1.5772, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.7016854594743253, | |
| "grad_norm": 0.13384497165679932, | |
| "learning_rate": 0.0002474701714879268, | |
| "loss": 1.5658, | |
| "step": 4465 | |
| }, | |
| { | |
| "epoch": 0.7024712214670177, | |
| "grad_norm": 0.14882899820804596, | |
| "learning_rate": 0.0002462872864194661, | |
| "loss": 1.56, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.7032569834597101, | |
| "grad_norm": 0.1305229365825653, | |
| "learning_rate": 0.0002451063106723366, | |
| "loss": 1.5813, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 0.7040427454524024, | |
| "grad_norm": 0.1169796884059906, | |
| "learning_rate": 0.00024392725313400127, | |
| "loss": 1.6154, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.7048285074450948, | |
| "grad_norm": 0.2215767800807953, | |
| "learning_rate": 0.00024275012267748646, | |
| "loss": 1.5724, | |
| "step": 4485 | |
| }, | |
| { | |
| "epoch": 0.7056142694377873, | |
| "grad_norm": 0.12483309954404831, | |
| "learning_rate": 0.000241574928161317, | |
| "loss": 1.5052, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.7064000314304797, | |
| "grad_norm": 0.16134008765220642, | |
| "learning_rate": 0.0002404016784294481, | |
| "loss": 1.5418, | |
| "step": 4495 | |
| }, | |
| { | |
| "epoch": 0.7071857934231721, | |
| "grad_norm": 0.13525766134262085, | |
| "learning_rate": 0.00023923038231119992, | |
| "loss": 1.5272, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.7079715554158645, | |
| "grad_norm": 0.13309256732463837, | |
| "learning_rate": 0.0002380610486211907, | |
| "loss": 1.5805, | |
| "step": 4505 | |
| }, | |
| { | |
| "epoch": 0.708757317408557, | |
| "grad_norm": 0.14293818175792694, | |
| "learning_rate": 0.00023689368615926988, | |
| "loss": 1.5433, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.7095430794012494, | |
| "grad_norm": 0.18992508947849274, | |
| "learning_rate": 0.0002357283037104529, | |
| "loss": 1.5574, | |
| "step": 4515 | |
| }, | |
| { | |
| "epoch": 0.7103288413939418, | |
| "grad_norm": 0.1394491195678711, | |
| "learning_rate": 0.00023456491004485413, | |
| "loss": 1.5914, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.7111146033866342, | |
| "grad_norm": 0.10986452549695969, | |
| "learning_rate": 0.0002334035139176216, | |
| "loss": 1.543, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 0.7119003653793265, | |
| "grad_norm": 0.1387847661972046, | |
| "learning_rate": 0.00023224412406887098, | |
| "loss": 1.4953, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.712686127372019, | |
| "grad_norm": 0.13508880138397217, | |
| "learning_rate": 0.00023108674922361894, | |
| "loss": 1.5373, | |
| "step": 4535 | |
| }, | |
| { | |
| "epoch": 0.7134718893647114, | |
| "grad_norm": 0.14578190445899963, | |
| "learning_rate": 0.0002299313980917191, | |
| "loss": 1.5914, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.7142576513574038, | |
| "grad_norm": 0.15806812047958374, | |
| "learning_rate": 0.0002287780793677945, | |
| "loss": 1.5966, | |
| "step": 4545 | |
| }, | |
| { | |
| "epoch": 0.7150434133500962, | |
| "grad_norm": 0.14135576784610748, | |
| "learning_rate": 0.00022762680173117456, | |
| "loss": 1.5991, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.7158291753427887, | |
| "grad_norm": 0.11881235241889954, | |
| "learning_rate": 0.00022647757384582735, | |
| "loss": 1.5315, | |
| "step": 4555 | |
| }, | |
| { | |
| "epoch": 0.7166149373354811, | |
| "grad_norm": 0.14931359887123108, | |
| "learning_rate": 0.00022533040436029613, | |
| "loss": 1.6535, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.7174006993281735, | |
| "grad_norm": 0.14255623519420624, | |
| "learning_rate": 0.000224185301907633, | |
| "loss": 1.5585, | |
| "step": 4565 | |
| }, | |
| { | |
| "epoch": 0.7181864613208659, | |
| "grad_norm": 0.16669470071792603, | |
| "learning_rate": 0.00022304227510533514, | |
| "loss": 1.5423, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.7189722233135584, | |
| "grad_norm": 0.16331365704536438, | |
| "learning_rate": 0.0002219013325552794, | |
| "loss": 1.5725, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 0.7197579853062508, | |
| "grad_norm": 0.14862756431102753, | |
| "learning_rate": 0.0002207624828436568, | |
| "loss": 1.5734, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.7205437472989431, | |
| "grad_norm": 0.13208940625190735, | |
| "learning_rate": 0.00021962573454090966, | |
| "loss": 1.5893, | |
| "step": 4585 | |
| }, | |
| { | |
| "epoch": 0.7213295092916355, | |
| "grad_norm": 0.11863264441490173, | |
| "learning_rate": 0.0002184910962016649, | |
| "loss": 1.5707, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.722115271284328, | |
| "grad_norm": 0.11269424110651016, | |
| "learning_rate": 0.00021735857636467237, | |
| "loss": 1.5872, | |
| "step": 4595 | |
| }, | |
| { | |
| "epoch": 0.7229010332770204, | |
| "grad_norm": 0.12873615324497223, | |
| "learning_rate": 0.00021622818355273766, | |
| "loss": 1.6142, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.7236867952697128, | |
| "grad_norm": 0.13957516849040985, | |
| "learning_rate": 0.00021509992627266033, | |
| "loss": 1.5326, | |
| "step": 4605 | |
| }, | |
| { | |
| "epoch": 0.7244725572624052, | |
| "grad_norm": 0.13987894356250763, | |
| "learning_rate": 0.00021397381301516822, | |
| "loss": 1.5646, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.7252583192550976, | |
| "grad_norm": 0.12960903346538544, | |
| "learning_rate": 0.00021284985225485486, | |
| "loss": 1.5412, | |
| "step": 4615 | |
| }, | |
| { | |
| "epoch": 0.7260440812477901, | |
| "grad_norm": 0.14691345393657684, | |
| "learning_rate": 0.000211728052450115, | |
| "loss": 1.6413, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.7268298432404825, | |
| "grad_norm": 0.17346711456775665, | |
| "learning_rate": 0.00021060842204308062, | |
| "loss": 1.5836, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 0.7276156052331749, | |
| "grad_norm": 0.11949850618839264, | |
| "learning_rate": 0.00020949096945955859, | |
| "loss": 1.5568, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.7284013672258672, | |
| "grad_norm": 0.16478979587554932, | |
| "learning_rate": 0.00020837570310896543, | |
| "loss": 1.5923, | |
| "step": 4635 | |
| }, | |
| { | |
| "epoch": 0.7291871292185597, | |
| "grad_norm": 0.16687503457069397, | |
| "learning_rate": 0.0002072626313842666, | |
| "loss": 1.5882, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.7299728912112521, | |
| "grad_norm": 0.13315603137016296, | |
| "learning_rate": 0.0002061517626619105, | |
| "loss": 1.5726, | |
| "step": 4645 | |
| }, | |
| { | |
| "epoch": 0.7307586532039445, | |
| "grad_norm": 0.16446514427661896, | |
| "learning_rate": 0.00020504310530176757, | |
| "loss": 1.5051, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.7315444151966369, | |
| "grad_norm": 0.14502394199371338, | |
| "learning_rate": 0.0002039366676470661, | |
| "loss": 1.528, | |
| "step": 4655 | |
| }, | |
| { | |
| "epoch": 0.7323301771893294, | |
| "grad_norm": 0.14425790309906006, | |
| "learning_rate": 0.0002028324580243302, | |
| "loss": 1.5396, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.7331159391820218, | |
| "grad_norm": 0.14922231435775757, | |
| "learning_rate": 0.00020173048474331705, | |
| "loss": 1.5766, | |
| "step": 4665 | |
| }, | |
| { | |
| "epoch": 0.7339017011747142, | |
| "grad_norm": 0.1422736942768097, | |
| "learning_rate": 0.0002006307560969537, | |
| "loss": 1.5823, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.7346874631674066, | |
| "grad_norm": 0.1642303317785263, | |
| "learning_rate": 0.00019953328036127565, | |
| "loss": 1.583, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 0.735473225160099, | |
| "grad_norm": 0.1379195898771286, | |
| "learning_rate": 0.00019843806579536354, | |
| "loss": 1.5402, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.7362589871527914, | |
| "grad_norm": 0.14833813905715942, | |
| "learning_rate": 0.00019734512064128196, | |
| "loss": 1.5992, | |
| "step": 4685 | |
| }, | |
| { | |
| "epoch": 0.7370447491454838, | |
| "grad_norm": 0.13516221940517426, | |
| "learning_rate": 0.00019625445312401697, | |
| "loss": 1.5905, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.7378305111381762, | |
| "grad_norm": 0.1159839853644371, | |
| "learning_rate": 0.0001951660714514138, | |
| "loss": 1.5938, | |
| "step": 4695 | |
| }, | |
| { | |
| "epoch": 0.7386162731308686, | |
| "grad_norm": 0.12499277293682098, | |
| "learning_rate": 0.00019407998381411603, | |
| "loss": 1.5649, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.7394020351235611, | |
| "grad_norm": 0.12141992896795273, | |
| "learning_rate": 0.0001929961983855027, | |
| "loss": 1.5515, | |
| "step": 4705 | |
| }, | |
| { | |
| "epoch": 0.7401877971162535, | |
| "grad_norm": 0.13888101279735565, | |
| "learning_rate": 0.00019191472332162873, | |
| "loss": 1.5339, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.7409735591089459, | |
| "grad_norm": 0.1663873940706253, | |
| "learning_rate": 0.000190835566761161, | |
| "loss": 1.5977, | |
| "step": 4715 | |
| }, | |
| { | |
| "epoch": 0.7417593211016383, | |
| "grad_norm": 0.15261366963386536, | |
| "learning_rate": 0.00018975873682531942, | |
| "loss": 1.5611, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.7425450830943308, | |
| "grad_norm": 0.12267375737428665, | |
| "learning_rate": 0.000188684241617814, | |
| "loss": 1.5463, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 0.7433308450870232, | |
| "grad_norm": 0.1139058992266655, | |
| "learning_rate": 0.0001876120892247854, | |
| "loss": 1.5242, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.7441166070797156, | |
| "grad_norm": 0.13267825543880463, | |
| "learning_rate": 0.00018654228771474324, | |
| "loss": 1.5563, | |
| "step": 4735 | |
| }, | |
| { | |
| "epoch": 0.7449023690724079, | |
| "grad_norm": 0.10687974095344543, | |
| "learning_rate": 0.00018547484513850506, | |
| "loss": 1.5425, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.7456881310651003, | |
| "grad_norm": 0.13263630867004395, | |
| "learning_rate": 0.00018440976952913674, | |
| "loss": 1.5726, | |
| "step": 4745 | |
| }, | |
| { | |
| "epoch": 0.7464738930577928, | |
| "grad_norm": 0.16454148292541504, | |
| "learning_rate": 0.00018334706890189102, | |
| "loss": 1.6489, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.7472596550504852, | |
| "grad_norm": 0.12462660670280457, | |
| "learning_rate": 0.00018228675125414796, | |
| "loss": 1.5117, | |
| "step": 4755 | |
| }, | |
| { | |
| "epoch": 0.7480454170431776, | |
| "grad_norm": 0.13918079435825348, | |
| "learning_rate": 0.00018122882456535438, | |
| "loss": 1.5798, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.74883117903587, | |
| "grad_norm": 0.12236585468053818, | |
| "learning_rate": 0.00018017329679696415, | |
| "loss": 1.5931, | |
| "step": 4765 | |
| }, | |
| { | |
| "epoch": 0.7496169410285625, | |
| "grad_norm": 0.15193891525268555, | |
| "learning_rate": 0.00017912017589237728, | |
| "loss": 1.5679, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.7504027030212549, | |
| "grad_norm": 0.1298915594816208, | |
| "learning_rate": 0.00017806946977688148, | |
| "loss": 1.6734, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 0.7511884650139473, | |
| "grad_norm": 0.12884870171546936, | |
| "learning_rate": 0.00017702118635759195, | |
| "loss": 1.5389, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.7519742270066397, | |
| "grad_norm": 0.12693621218204498, | |
| "learning_rate": 0.00017597533352339123, | |
| "loss": 1.6321, | |
| "step": 4785 | |
| }, | |
| { | |
| "epoch": 0.752759988999332, | |
| "grad_norm": 0.15435655415058136, | |
| "learning_rate": 0.0001749319191448712, | |
| "loss": 1.5707, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.7535457509920245, | |
| "grad_norm": 0.12875737249851227, | |
| "learning_rate": 0.00017389095107427237, | |
| "loss": 1.4873, | |
| "step": 4795 | |
| }, | |
| { | |
| "epoch": 0.7543315129847169, | |
| "grad_norm": 0.11512191593647003, | |
| "learning_rate": 0.00017285243714542593, | |
| "loss": 1.5503, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.7551172749774093, | |
| "grad_norm": 0.1425315886735916, | |
| "learning_rate": 0.0001718163851736943, | |
| "loss": 1.673, | |
| "step": 4805 | |
| }, | |
| { | |
| "epoch": 0.7559030369701017, | |
| "grad_norm": 0.1565292924642563, | |
| "learning_rate": 0.00017078280295591253, | |
| "loss": 1.5589, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.7566887989627942, | |
| "grad_norm": 0.12783244252204895, | |
| "learning_rate": 0.00016975169827032887, | |
| "loss": 1.5735, | |
| "step": 4815 | |
| }, | |
| { | |
| "epoch": 0.7574745609554866, | |
| "grad_norm": 0.17071731388568878, | |
| "learning_rate": 0.0001687230788765477, | |
| "loss": 1.5362, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.758260322948179, | |
| "grad_norm": 0.13069270551204681, | |
| "learning_rate": 0.00016769695251546947, | |
| "loss": 1.5619, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 0.7590460849408714, | |
| "grad_norm": 0.16330239176750183, | |
| "learning_rate": 0.0001666733269092337, | |
| "loss": 1.573, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.7598318469335639, | |
| "grad_norm": 0.1309252232313156, | |
| "learning_rate": 0.00016565220976116058, | |
| "loss": 1.6176, | |
| "step": 4835 | |
| }, | |
| { | |
| "epoch": 0.7606176089262562, | |
| "grad_norm": 0.14916756749153137, | |
| "learning_rate": 0.00016463360875569223, | |
| "loss": 1.5321, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.7614033709189486, | |
| "grad_norm": 0.1265818029642105, | |
| "learning_rate": 0.00016361753155833596, | |
| "loss": 1.6256, | |
| "step": 4845 | |
| }, | |
| { | |
| "epoch": 0.762189132911641, | |
| "grad_norm": 0.17490801215171814, | |
| "learning_rate": 0.0001626039858156062, | |
| "loss": 1.5349, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.7629748949043335, | |
| "grad_norm": 0.14255455136299133, | |
| "learning_rate": 0.0001615929791549663, | |
| "loss": 1.5207, | |
| "step": 4855 | |
| }, | |
| { | |
| "epoch": 0.7637606568970259, | |
| "grad_norm": 0.13435468077659607, | |
| "learning_rate": 0.00016058451918477223, | |
| "loss": 1.5818, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.7645464188897183, | |
| "grad_norm": 0.14869236946105957, | |
| "learning_rate": 0.00015957861349421437, | |
| "loss": 1.5473, | |
| "step": 4865 | |
| }, | |
| { | |
| "epoch": 0.7653321808824107, | |
| "grad_norm": 0.1301611363887787, | |
| "learning_rate": 0.00015857526965326108, | |
| "loss": 1.5311, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.7661179428751032, | |
| "grad_norm": 0.1497713327407837, | |
| "learning_rate": 0.00015757449521260143, | |
| "loss": 1.6037, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 0.7669037048677956, | |
| "grad_norm": 0.12209343910217285, | |
| "learning_rate": 0.00015657629770358838, | |
| "loss": 1.6328, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.767689466860488, | |
| "grad_norm": 0.13366752862930298, | |
| "learning_rate": 0.00015558068463818192, | |
| "loss": 1.6426, | |
| "step": 4885 | |
| }, | |
| { | |
| "epoch": 0.7684752288531804, | |
| "grad_norm": 0.1502147614955902, | |
| "learning_rate": 0.00015458766350889315, | |
| "loss": 1.5698, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.7692609908458727, | |
| "grad_norm": 0.13302424550056458, | |
| "learning_rate": 0.00015359724178872693, | |
| "loss": 1.5695, | |
| "step": 4895 | |
| }, | |
| { | |
| "epoch": 0.7700467528385652, | |
| "grad_norm": 0.1549658626317978, | |
| "learning_rate": 0.00015260942693112674, | |
| "loss": 1.6054, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.7708325148312576, | |
| "grad_norm": 0.15308880805969238, | |
| "learning_rate": 0.00015162422636991795, | |
| "loss": 1.5842, | |
| "step": 4905 | |
| }, | |
| { | |
| "epoch": 0.77161827682395, | |
| "grad_norm": 0.18410451710224152, | |
| "learning_rate": 0.00015064164751925165, | |
| "loss": 1.5244, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.7724040388166424, | |
| "grad_norm": 0.1323505938053131, | |
| "learning_rate": 0.0001496616977735496, | |
| "loss": 1.5401, | |
| "step": 4915 | |
| }, | |
| { | |
| "epoch": 0.7731898008093349, | |
| "grad_norm": 0.1685735434293747, | |
| "learning_rate": 0.00014868438450744753, | |
| "loss": 1.6259, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.7739755628020273, | |
| "grad_norm": 0.12531651556491852, | |
| "learning_rate": 0.00014770971507574137, | |
| "loss": 1.5469, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 0.7747613247947197, | |
| "grad_norm": 0.13574694097042084, | |
| "learning_rate": 0.00014673769681332967, | |
| "loss": 1.5455, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.7755470867874121, | |
| "grad_norm": 0.14784350991249084, | |
| "learning_rate": 0.00014576833703516034, | |
| "loss": 1.5476, | |
| "step": 4935 | |
| }, | |
| { | |
| "epoch": 0.7763328487801046, | |
| "grad_norm": 0.1291940063238144, | |
| "learning_rate": 0.0001448016430361741, | |
| "loss": 1.5855, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.7771186107727969, | |
| "grad_norm": 0.13253316283226013, | |
| "learning_rate": 0.00014383762209125095, | |
| "loss": 1.5273, | |
| "step": 4945 | |
| }, | |
| { | |
| "epoch": 0.7779043727654893, | |
| "grad_norm": 0.12694372236728668, | |
| "learning_rate": 0.00014287628145515452, | |
| "loss": 1.5332, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.7786901347581817, | |
| "grad_norm": 0.12803617119789124, | |
| "learning_rate": 0.00014191762836247734, | |
| "loss": 1.5241, | |
| "step": 4955 | |
| }, | |
| { | |
| "epoch": 0.7794758967508741, | |
| "grad_norm": 0.14353320002555847, | |
| "learning_rate": 0.00014096167002758747, | |
| "loss": 1.6188, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.7802616587435666, | |
| "grad_norm": 0.12240661680698395, | |
| "learning_rate": 0.00014000841364457267, | |
| "loss": 1.5319, | |
| "step": 4965 | |
| }, | |
| { | |
| "epoch": 0.781047420736259, | |
| "grad_norm": 0.11028309911489487, | |
| "learning_rate": 0.00013905786638718803, | |
| "loss": 1.5787, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.7818331827289514, | |
| "grad_norm": 0.12622590363025665, | |
| "learning_rate": 0.00013811003540880023, | |
| "loss": 1.5253, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 0.7826189447216438, | |
| "grad_norm": 0.1264607012271881, | |
| "learning_rate": 0.00013716492784233508, | |
| "loss": 1.5129, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.7834047067143363, | |
| "grad_norm": 0.12151068449020386, | |
| "learning_rate": 0.00013622255080022279, | |
| "loss": 1.6467, | |
| "step": 4985 | |
| }, | |
| { | |
| "epoch": 0.7841904687070287, | |
| "grad_norm": 0.15833619236946106, | |
| "learning_rate": 0.0001352829113743453, | |
| "loss": 1.5941, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.784976230699721, | |
| "grad_norm": 0.13726761937141418, | |
| "learning_rate": 0.00013434601663598272, | |
| "loss": 1.5248, | |
| "step": 4995 | |
| }, | |
| { | |
| "epoch": 0.7857619926924134, | |
| "grad_norm": 0.13140997290611267, | |
| "learning_rate": 0.00013341187363575936, | |
| "loss": 1.6044, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.7865477546851058, | |
| "grad_norm": 0.12105604261159897, | |
| "learning_rate": 0.0001324804894035918, | |
| "loss": 1.5391, | |
| "step": 5005 | |
| }, | |
| { | |
| "epoch": 0.7873335166777983, | |
| "grad_norm": 0.13176581263542175, | |
| "learning_rate": 0.00013155187094863498, | |
| "loss": 1.5185, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.7881192786704907, | |
| "grad_norm": 0.16288596391677856, | |
| "learning_rate": 0.00013062602525923034, | |
| "loss": 1.5822, | |
| "step": 5015 | |
| }, | |
| { | |
| "epoch": 0.7889050406631831, | |
| "grad_norm": 0.14374810457229614, | |
| "learning_rate": 0.00012970295930285274, | |
| "loss": 1.5183, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.7896908026558755, | |
| "grad_norm": 0.13987670838832855, | |
| "learning_rate": 0.00012878268002605776, | |
| "loss": 1.5824, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 0.790476564648568, | |
| "grad_norm": 0.1393355280160904, | |
| "learning_rate": 0.0001278651943544301, | |
| "loss": 1.5475, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.7912623266412604, | |
| "grad_norm": 0.14403095841407776, | |
| "learning_rate": 0.0001269505091925307, | |
| "loss": 1.5717, | |
| "step": 5035 | |
| }, | |
| { | |
| "epoch": 0.7920480886339528, | |
| "grad_norm": 0.1370304971933365, | |
| "learning_rate": 0.00012603863142384596, | |
| "loss": 1.5685, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.7928338506266452, | |
| "grad_norm": 0.12681813538074493, | |
| "learning_rate": 0.0001251295679107343, | |
| "loss": 1.5349, | |
| "step": 5045 | |
| }, | |
| { | |
| "epoch": 0.7936196126193376, | |
| "grad_norm": 0.12516118586063385, | |
| "learning_rate": 0.0001242233254943761, | |
| "loss": 1.5502, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.79440537461203, | |
| "grad_norm": 0.10329622775316238, | |
| "learning_rate": 0.00012331991099472084, | |
| "loss": 1.5565, | |
| "step": 5055 | |
| }, | |
| { | |
| "epoch": 0.7951911366047224, | |
| "grad_norm": 0.13856732845306396, | |
| "learning_rate": 0.00012241933121043692, | |
| "loss": 1.6043, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.7959768985974148, | |
| "grad_norm": 0.11811645328998566, | |
| "learning_rate": 0.00012152159291886012, | |
| "loss": 1.5341, | |
| "step": 5065 | |
| }, | |
| { | |
| "epoch": 0.7967626605901073, | |
| "grad_norm": 0.1346331089735031, | |
| "learning_rate": 0.0001206267028759419, | |
| "loss": 1.5075, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.7975484225827997, | |
| "grad_norm": 0.12564124166965485, | |
| "learning_rate": 0.00011973466781619984, | |
| "loss": 1.5282, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 0.7983341845754921, | |
| "grad_norm": 0.11750493198633194, | |
| "learning_rate": 0.00011884549445266552, | |
| "loss": 1.5096, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.7991199465681845, | |
| "grad_norm": 0.13426090776920319, | |
| "learning_rate": 0.00011795918947683576, | |
| "loss": 1.5373, | |
| "step": 5085 | |
| }, | |
| { | |
| "epoch": 0.799905708560877, | |
| "grad_norm": 0.14929060637950897, | |
| "learning_rate": 0.00011707575955862021, | |
| "loss": 1.5751, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.8006914705535694, | |
| "grad_norm": 0.13136523962020874, | |
| "learning_rate": 0.00011619521134629301, | |
| "loss": 1.5116, | |
| "step": 5095 | |
| }, | |
| { | |
| "epoch": 0.8014772325462617, | |
| "grad_norm": 0.14692793786525726, | |
| "learning_rate": 0.00011531755146644135, | |
| "loss": 1.5606, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.8022629945389541, | |
| "grad_norm": 0.1299007385969162, | |
| "learning_rate": 0.00011444278652391659, | |
| "loss": 1.5613, | |
| "step": 5105 | |
| }, | |
| { | |
| "epoch": 0.8030487565316465, | |
| "grad_norm": 0.1310184895992279, | |
| "learning_rate": 0.00011357092310178412, | |
| "loss": 1.5169, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.803834518524339, | |
| "grad_norm": 0.16118641197681427, | |
| "learning_rate": 0.00011270196776127362, | |
| "loss": 1.5392, | |
| "step": 5115 | |
| }, | |
| { | |
| "epoch": 0.8046202805170314, | |
| "grad_norm": 0.13983090221881866, | |
| "learning_rate": 0.00011183592704173029, | |
| "loss": 1.6329, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.8054060425097238, | |
| "grad_norm": 0.14741620421409607, | |
| "learning_rate": 0.00011097280746056482, | |
| "loss": 1.5803, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 0.8061918045024162, | |
| "grad_norm": 0.1304011195898056, | |
| "learning_rate": 0.00011011261551320528, | |
| "loss": 1.5256, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.8069775664951087, | |
| "grad_norm": 0.14894512295722961, | |
| "learning_rate": 0.00010925535767304751, | |
| "loss": 1.5956, | |
| "step": 5135 | |
| }, | |
| { | |
| "epoch": 0.8077633284878011, | |
| "grad_norm": 0.11353661864995956, | |
| "learning_rate": 0.00010840104039140681, | |
| "loss": 1.6066, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.8085490904804935, | |
| "grad_norm": 0.13712039589881897, | |
| "learning_rate": 0.0001075496700974688, | |
| "loss": 1.5676, | |
| "step": 5145 | |
| }, | |
| { | |
| "epoch": 0.8093348524731858, | |
| "grad_norm": 0.12836557626724243, | |
| "learning_rate": 0.00010670125319824203, | |
| "loss": 1.535, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.8101206144658782, | |
| "grad_norm": 0.12600398063659668, | |
| "learning_rate": 0.00010585579607850903, | |
| "loss": 1.6436, | |
| "step": 5155 | |
| }, | |
| { | |
| "epoch": 0.8109063764585707, | |
| "grad_norm": 0.11498034000396729, | |
| "learning_rate": 0.00010501330510077812, | |
| "loss": 1.5237, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.8116921384512631, | |
| "grad_norm": 0.1327584683895111, | |
| "learning_rate": 0.0001041737866052363, | |
| "loss": 1.4921, | |
| "step": 5165 | |
| }, | |
| { | |
| "epoch": 0.8124779004439555, | |
| "grad_norm": 0.1261293739080429, | |
| "learning_rate": 0.0001033372469097007, | |
| "loss": 1.4974, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.8132636624366479, | |
| "grad_norm": 0.1574053317308426, | |
| "learning_rate": 0.00010250369230957163, | |
| "loss": 1.5147, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 0.8140494244293404, | |
| "grad_norm": 0.13025632500648499, | |
| "learning_rate": 0.00010167312907778514, | |
| "loss": 1.5261, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.8148351864220328, | |
| "grad_norm": 0.13743074238300323, | |
| "learning_rate": 0.00010084556346476526, | |
| "loss": 1.546, | |
| "step": 5185 | |
| }, | |
| { | |
| "epoch": 0.8156209484147252, | |
| "grad_norm": 0.14771868288516998, | |
| "learning_rate": 0.0001000210016983777, | |
| "loss": 1.6203, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.8164067104074176, | |
| "grad_norm": 0.12153328210115433, | |
| "learning_rate": 9.919944998388236e-05, | |
| "loss": 1.492, | |
| "step": 5195 | |
| }, | |
| { | |
| "epoch": 0.8171924724001101, | |
| "grad_norm": 0.12602929770946503, | |
| "learning_rate": 9.83809145038872e-05, | |
| "loss": 1.5288, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.8179782343928024, | |
| "grad_norm": 0.11900759488344193, | |
| "learning_rate": 9.756540141830134e-05, | |
| "loss": 1.498, | |
| "step": 5205 | |
| }, | |
| { | |
| "epoch": 0.8187639963854948, | |
| "grad_norm": 0.1492881178855896, | |
| "learning_rate": 9.675291686428883e-05, | |
| "loss": 1.5234, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.8195497583781872, | |
| "grad_norm": 0.1102166399359703, | |
| "learning_rate": 9.59434669562222e-05, | |
| "loss": 1.5694, | |
| "step": 5215 | |
| }, | |
| { | |
| "epoch": 0.8203355203708796, | |
| "grad_norm": 0.12211363017559052, | |
| "learning_rate": 9.513705778563692e-05, | |
| "loss": 1.5243, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.8211212823635721, | |
| "grad_norm": 0.12994244694709778, | |
| "learning_rate": 9.433369542118537e-05, | |
| "loss": 1.5714, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 0.8219070443562645, | |
| "grad_norm": 0.126256063580513, | |
| "learning_rate": 9.353338590859078e-05, | |
| "loss": 1.5297, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.8226928063489569, | |
| "grad_norm": 0.15575683116912842, | |
| "learning_rate": 9.273613527060254e-05, | |
| "loss": 1.5624, | |
| "step": 5235 | |
| }, | |
| { | |
| "epoch": 0.8234785683416493, | |
| "grad_norm": 0.14137138426303864, | |
| "learning_rate": 9.194194950694984e-05, | |
| "loss": 1.5444, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.8242643303343418, | |
| "grad_norm": 0.15421825647354126, | |
| "learning_rate": 9.115083459429751e-05, | |
| "loss": 1.6643, | |
| "step": 5245 | |
| }, | |
| { | |
| "epoch": 0.8250500923270342, | |
| "grad_norm": 0.13579821586608887, | |
| "learning_rate": 9.03627964862005e-05, | |
| "loss": 1.5539, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.8258358543197265, | |
| "grad_norm": 0.10899634659290314, | |
| "learning_rate": 8.957784111305927e-05, | |
| "loss": 1.4881, | |
| "step": 5255 | |
| }, | |
| { | |
| "epoch": 0.8266216163124189, | |
| "grad_norm": 0.126531720161438, | |
| "learning_rate": 8.879597438207481e-05, | |
| "loss": 1.5383, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.8274073783051114, | |
| "grad_norm": 0.18092769384384155, | |
| "learning_rate": 8.801720217720488e-05, | |
| "loss": 1.5975, | |
| "step": 5265 | |
| }, | |
| { | |
| "epoch": 0.8281931402978038, | |
| "grad_norm": 0.16945770382881165, | |
| "learning_rate": 8.724153035911875e-05, | |
| "loss": 1.559, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.8289789022904962, | |
| "grad_norm": 0.1362890601158142, | |
| "learning_rate": 8.646896476515414e-05, | |
| "loss": 1.5776, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 0.8297646642831886, | |
| "grad_norm": 0.1530720740556717, | |
| "learning_rate": 8.569951120927272e-05, | |
| "loss": 1.5631, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.830550426275881, | |
| "grad_norm": 0.14527654647827148, | |
| "learning_rate": 8.493317548201607e-05, | |
| "loss": 1.5862, | |
| "step": 5285 | |
| }, | |
| { | |
| "epoch": 0.8313361882685735, | |
| "grad_norm": 0.12368170917034149, | |
| "learning_rate": 8.416996335046295e-05, | |
| "loss": 1.5584, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.8321219502612659, | |
| "grad_norm": 0.12269091606140137, | |
| "learning_rate": 8.340988055818522e-05, | |
| "loss": 1.5286, | |
| "step": 5295 | |
| }, | |
| { | |
| "epoch": 0.8329077122539583, | |
| "grad_norm": 0.11459631472826004, | |
| "learning_rate": 8.265293282520492e-05, | |
| "loss": 1.589, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.8336934742466506, | |
| "grad_norm": 0.17323504388332367, | |
| "learning_rate": 8.189912584795072e-05, | |
| "loss": 1.5524, | |
| "step": 5305 | |
| }, | |
| { | |
| "epoch": 0.8344792362393431, | |
| "grad_norm": 0.12731066346168518, | |
| "learning_rate": 8.11484652992161e-05, | |
| "loss": 1.4795, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.8352649982320355, | |
| "grad_norm": 0.12014425545930862, | |
| "learning_rate": 8.040095682811539e-05, | |
| "loss": 1.5776, | |
| "step": 5315 | |
| }, | |
| { | |
| "epoch": 0.8360507602247279, | |
| "grad_norm": 0.13413500785827637, | |
| "learning_rate": 7.965660606004232e-05, | |
| "loss": 1.5533, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.8368365222174203, | |
| "grad_norm": 0.11268212646245956, | |
| "learning_rate": 7.891541859662715e-05, | |
| "loss": 1.5351, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 0.8376222842101128, | |
| "grad_norm": 0.1282140612602234, | |
| "learning_rate": 7.81774000156944e-05, | |
| "loss": 1.5849, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.8384080462028052, | |
| "grad_norm": 0.14709092676639557, | |
| "learning_rate": 7.74425558712214e-05, | |
| "loss": 1.4882, | |
| "step": 5335 | |
| }, | |
| { | |
| "epoch": 0.8391938081954976, | |
| "grad_norm": 0.13272453844547272, | |
| "learning_rate": 7.671089169329581e-05, | |
| "loss": 1.5516, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.83997957018819, | |
| "grad_norm": 0.14306382834911346, | |
| "learning_rate": 7.598241298807479e-05, | |
| "loss": 1.5861, | |
| "step": 5345 | |
| }, | |
| { | |
| "epoch": 0.8407653321808825, | |
| "grad_norm": 0.17192484438419342, | |
| "learning_rate": 7.525712523774292e-05, | |
| "loss": 1.5499, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.8415510941735749, | |
| "grad_norm": 0.11474210023880005, | |
| "learning_rate": 7.453503390047106e-05, | |
| "loss": 1.5965, | |
| "step": 5355 | |
| }, | |
| { | |
| "epoch": 0.8423368561662672, | |
| "grad_norm": 0.14394626021385193, | |
| "learning_rate": 7.381614441037554e-05, | |
| "loss": 1.6175, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.8431226181589596, | |
| "grad_norm": 0.1367376148700714, | |
| "learning_rate": 7.310046217747717e-05, | |
| "loss": 1.5275, | |
| "step": 5365 | |
| }, | |
| { | |
| "epoch": 0.843908380151652, | |
| "grad_norm": 0.12612248957157135, | |
| "learning_rate": 7.238799258766048e-05, | |
| "loss": 1.5517, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.8446941421443445, | |
| "grad_norm": 0.12763731181621552, | |
| "learning_rate": 7.167874100263283e-05, | |
| "loss": 1.6903, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 0.8454799041370369, | |
| "grad_norm": 0.13539037108421326, | |
| "learning_rate": 7.097271275988482e-05, | |
| "loss": 1.599, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.8462656661297293, | |
| "grad_norm": 0.1278766244649887, | |
| "learning_rate": 7.026991317264941e-05, | |
| "loss": 1.5837, | |
| "step": 5385 | |
| }, | |
| { | |
| "epoch": 0.8470514281224217, | |
| "grad_norm": 0.14802652597427368, | |
| "learning_rate": 6.95703475298624e-05, | |
| "loss": 1.5944, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 0.8478371901151142, | |
| "grad_norm": 0.14598144590854645, | |
| "learning_rate": 6.887402109612262e-05, | |
| "loss": 1.5986, | |
| "step": 5395 | |
| }, | |
| { | |
| "epoch": 0.8486229521078066, | |
| "grad_norm": 0.17217661440372467, | |
| "learning_rate": 6.818093911165163e-05, | |
| "loss": 1.6023, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.849408714100499, | |
| "grad_norm": 0.11938291043043137, | |
| "learning_rate": 6.74911067922554e-05, | |
| "loss": 1.5861, | |
| "step": 5405 | |
| }, | |
| { | |
| "epoch": 0.8501944760931913, | |
| "grad_norm": 0.13088202476501465, | |
| "learning_rate": 6.680452932928382e-05, | |
| "loss": 1.5966, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 0.8509802380858837, | |
| "grad_norm": 0.1346057802438736, | |
| "learning_rate": 6.612121188959319e-05, | |
| "loss": 1.5539, | |
| "step": 5415 | |
| }, | |
| { | |
| "epoch": 0.8517660000785762, | |
| "grad_norm": 0.18001966178417206, | |
| "learning_rate": 6.544115961550545e-05, | |
| "loss": 1.4906, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.8525517620712686, | |
| "grad_norm": 0.12191446870565414, | |
| "learning_rate": 6.476437762477117e-05, | |
| "loss": 1.5589, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 0.853337524063961, | |
| "grad_norm": 0.1333189755678177, | |
| "learning_rate": 6.409087101052979e-05, | |
| "loss": 1.5569, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.8541232860566534, | |
| "grad_norm": 0.12484955042600632, | |
| "learning_rate": 6.342064484127224e-05, | |
| "loss": 1.5843, | |
| "step": 5435 | |
| }, | |
| { | |
| "epoch": 0.8549090480493459, | |
| "grad_norm": 0.13213855028152466, | |
| "learning_rate": 6.275370416080223e-05, | |
| "loss": 1.6526, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.8556948100420383, | |
| "grad_norm": 0.1528891623020172, | |
| "learning_rate": 6.209005398819828e-05, | |
| "loss": 1.5803, | |
| "step": 5445 | |
| }, | |
| { | |
| "epoch": 0.8564805720347307, | |
| "grad_norm": 0.10992589592933655, | |
| "learning_rate": 6.142969931777648e-05, | |
| "loss": 1.5425, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.8572663340274231, | |
| "grad_norm": 0.1314469575881958, | |
| "learning_rate": 6.077264511905195e-05, | |
| "loss": 1.5244, | |
| "step": 5455 | |
| }, | |
| { | |
| "epoch": 0.8580520960201156, | |
| "grad_norm": 0.1289691925048828, | |
| "learning_rate": 6.0118896336702814e-05, | |
| "loss": 1.5982, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.8588378580128079, | |
| "grad_norm": 0.13314056396484375, | |
| "learning_rate": 5.946845789053146e-05, | |
| "loss": 1.5215, | |
| "step": 5465 | |
| }, | |
| { | |
| "epoch": 0.8596236200055003, | |
| "grad_norm": 0.11456876993179321, | |
| "learning_rate": 5.882133467542888e-05, | |
| "loss": 1.5819, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 0.8604093819981927, | |
| "grad_norm": 0.11907348781824112, | |
| "learning_rate": 5.8177531561336596e-05, | |
| "loss": 1.5479, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 0.8611951439908851, | |
| "grad_norm": 0.12371833622455597, | |
| "learning_rate": 5.753705339321108e-05, | |
| "loss": 1.5858, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.8619809059835776, | |
| "grad_norm": 0.1354624330997467, | |
| "learning_rate": 5.689990499098685e-05, | |
| "loss": 1.4952, | |
| "step": 5485 | |
| }, | |
| { | |
| "epoch": 0.86276666797627, | |
| "grad_norm": 0.13625556230545044, | |
| "learning_rate": 5.626609114953973e-05, | |
| "loss": 1.5004, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 0.8635524299689624, | |
| "grad_norm": 0.14950565993785858, | |
| "learning_rate": 5.563561663865191e-05, | |
| "loss": 1.6327, | |
| "step": 5495 | |
| }, | |
| { | |
| "epoch": 0.8643381919616548, | |
| "grad_norm": 0.1316632628440857, | |
| "learning_rate": 5.5008486202974494e-05, | |
| "loss": 1.6422, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.8651239539543473, | |
| "grad_norm": 0.11560774594545364, | |
| "learning_rate": 5.4384704561993735e-05, | |
| "loss": 1.4734, | |
| "step": 5505 | |
| }, | |
| { | |
| "epoch": 0.8659097159470397, | |
| "grad_norm": 0.13010354340076447, | |
| "learning_rate": 5.376427640999354e-05, | |
| "loss": 1.6256, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 0.866695477939732, | |
| "grad_norm": 0.13881151378154755, | |
| "learning_rate": 5.3147206416021385e-05, | |
| "loss": 1.5763, | |
| "step": 5515 | |
| }, | |
| { | |
| "epoch": 0.8674812399324244, | |
| "grad_norm": 0.11300093680620193, | |
| "learning_rate": 5.253349922385298e-05, | |
| "loss": 1.506, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.8682670019251169, | |
| "grad_norm": 0.15863415598869324, | |
| "learning_rate": 5.19231594519568e-05, | |
| "loss": 1.535, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 0.8690527639178093, | |
| "grad_norm": 0.12128844112157822, | |
| "learning_rate": 5.13161916934603e-05, | |
| "loss": 1.5609, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 0.8698385259105017, | |
| "grad_norm": 0.13314326107501984, | |
| "learning_rate": 5.07126005161142e-05, | |
| "loss": 1.521, | |
| "step": 5535 | |
| }, | |
| { | |
| "epoch": 0.8706242879031941, | |
| "grad_norm": 0.11809918284416199, | |
| "learning_rate": 5.011239046225913e-05, | |
| "loss": 1.5759, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 0.8714100498958866, | |
| "grad_norm": 0.13252602517604828, | |
| "learning_rate": 4.9515566048790485e-05, | |
| "loss": 1.588, | |
| "step": 5545 | |
| }, | |
| { | |
| "epoch": 0.872195811888579, | |
| "grad_norm": 0.14337563514709473, | |
| "learning_rate": 4.8922131767125345e-05, | |
| "loss": 1.6085, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.8729815738812714, | |
| "grad_norm": 0.1566237211227417, | |
| "learning_rate": 4.833209208316824e-05, | |
| "loss": 1.5998, | |
| "step": 5555 | |
| }, | |
| { | |
| "epoch": 0.8737673358739638, | |
| "grad_norm": 0.14884966611862183, | |
| "learning_rate": 4.774545143727732e-05, | |
| "loss": 1.592, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 0.8745530978666561, | |
| "grad_norm": 0.13440923392772675, | |
| "learning_rate": 4.716221424423145e-05, | |
| "loss": 1.535, | |
| "step": 5565 | |
| }, | |
| { | |
| "epoch": 0.8753388598593486, | |
| "grad_norm": 0.14891453087329865, | |
| "learning_rate": 4.658238489319655e-05, | |
| "loss": 1.5672, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 0.876124621852041, | |
| "grad_norm": 0.13791170716285706, | |
| "learning_rate": 4.600596774769283e-05, | |
| "loss": 1.5814, | |
| "step": 5575 | |
| }, | |
| { | |
| "epoch": 0.8769103838447334, | |
| "grad_norm": 0.13698305189609528, | |
| "learning_rate": 4.543296714556189e-05, | |
| "loss": 1.5656, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.8776961458374258, | |
| "grad_norm": 0.12566827237606049, | |
| "learning_rate": 4.4863387398934086e-05, | |
| "loss": 1.5333, | |
| "step": 5585 | |
| }, | |
| { | |
| "epoch": 0.8784819078301183, | |
| "grad_norm": 0.17175309360027313, | |
| "learning_rate": 4.429723279419573e-05, | |
| "loss": 1.5809, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 0.8792676698228107, | |
| "grad_norm": 0.11969798803329468, | |
| "learning_rate": 4.3734507591957464e-05, | |
| "loss": 1.5349, | |
| "step": 5595 | |
| }, | |
| { | |
| "epoch": 0.8800534318155031, | |
| "grad_norm": 0.13722212612628937, | |
| "learning_rate": 4.31752160270219e-05, | |
| "loss": 1.5832, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.8808391938081955, | |
| "grad_norm": 0.14006294310092926, | |
| "learning_rate": 4.261936230835145e-05, | |
| "loss": 1.5148, | |
| "step": 5605 | |
| }, | |
| { | |
| "epoch": 0.881624955800888, | |
| "grad_norm": 0.14070965349674225, | |
| "learning_rate": 4.20669506190372e-05, | |
| "loss": 1.5549, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 0.8824107177935804, | |
| "grad_norm": 0.14081864058971405, | |
| "learning_rate": 4.151798511626698e-05, | |
| "loss": 1.548, | |
| "step": 5615 | |
| }, | |
| { | |
| "epoch": 0.8831964797862727, | |
| "grad_norm": 0.1195095032453537, | |
| "learning_rate": 4.097246993129428e-05, | |
| "loss": 1.6033, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 0.8839822417789651, | |
| "grad_norm": 0.1440693438053131, | |
| "learning_rate": 4.043040916940727e-05, | |
| "loss": 1.5329, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 0.8847680037716575, | |
| "grad_norm": 0.12744715809822083, | |
| "learning_rate": 3.989180690989774e-05, | |
| "loss": 1.6177, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 0.88555376576435, | |
| "grad_norm": 0.17127704620361328, | |
| "learning_rate": 3.935666720603026e-05, | |
| "loss": 1.5456, | |
| "step": 5635 | |
| }, | |
| { | |
| "epoch": 0.8863395277570424, | |
| "grad_norm": 0.11687444895505905, | |
| "learning_rate": 3.8824994085012054e-05, | |
| "loss": 1.5794, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.8871252897497348, | |
| "grad_norm": 0.12178084254264832, | |
| "learning_rate": 3.829679154796228e-05, | |
| "loss": 1.554, | |
| "step": 5645 | |
| }, | |
| { | |
| "epoch": 0.8879110517424272, | |
| "grad_norm": 0.13413095474243164, | |
| "learning_rate": 3.777206356988239e-05, | |
| "loss": 1.5518, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.8886968137351197, | |
| "grad_norm": 0.1272350400686264, | |
| "learning_rate": 3.725081409962583e-05, | |
| "loss": 1.5592, | |
| "step": 5655 | |
| }, | |
| { | |
| "epoch": 0.8894825757278121, | |
| "grad_norm": 0.1570257842540741, | |
| "learning_rate": 3.67330470598683e-05, | |
| "loss": 1.6286, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 0.8902683377205045, | |
| "grad_norm": 0.12927798926830292, | |
| "learning_rate": 3.62187663470786e-05, | |
| "loss": 1.5626, | |
| "step": 5665 | |
| }, | |
| { | |
| "epoch": 0.8910540997131968, | |
| "grad_norm": 0.13783001899719238, | |
| "learning_rate": 3.570797583148916e-05, | |
| "loss": 1.5048, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 0.8918398617058892, | |
| "grad_norm": 0.13481375575065613, | |
| "learning_rate": 3.52006793570665e-05, | |
| "loss": 1.5288, | |
| "step": 5675 | |
| }, | |
| { | |
| "epoch": 0.8926256236985817, | |
| "grad_norm": 0.15029197931289673, | |
| "learning_rate": 3.4696880741482974e-05, | |
| "loss": 1.549, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 0.8934113856912741, | |
| "grad_norm": 0.14143696427345276, | |
| "learning_rate": 3.419658377608748e-05, | |
| "loss": 1.5579, | |
| "step": 5685 | |
| }, | |
| { | |
| "epoch": 0.8941971476839665, | |
| "grad_norm": 0.11702083051204681, | |
| "learning_rate": 3.369979222587727e-05, | |
| "loss": 1.579, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 0.8949829096766589, | |
| "grad_norm": 0.15968121588230133, | |
| "learning_rate": 3.320650982946954e-05, | |
| "loss": 1.6591, | |
| "step": 5695 | |
| }, | |
| { | |
| "epoch": 0.8957686716693514, | |
| "grad_norm": 0.13863512873649597, | |
| "learning_rate": 3.271674029907334e-05, | |
| "loss": 1.5756, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.8965544336620438, | |
| "grad_norm": 0.11312685161828995, | |
| "learning_rate": 3.223048732046124e-05, | |
| "loss": 1.5231, | |
| "step": 5705 | |
| }, | |
| { | |
| "epoch": 0.8973401956547362, | |
| "grad_norm": 0.13332636654376984, | |
| "learning_rate": 3.1747754552942324e-05, | |
| "loss": 1.6331, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 0.8981259576474286, | |
| "grad_norm": 0.1321624368429184, | |
| "learning_rate": 3.126854562933379e-05, | |
| "loss": 1.5423, | |
| "step": 5715 | |
| }, | |
| { | |
| "epoch": 0.898911719640121, | |
| "grad_norm": 0.12722861766815186, | |
| "learning_rate": 3.079286415593441e-05, | |
| "loss": 1.5334, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 0.8996974816328134, | |
| "grad_norm": 0.12315835803747177, | |
| "learning_rate": 3.0320713712497062e-05, | |
| "loss": 1.6664, | |
| "step": 5725 | |
| }, | |
| { | |
| "epoch": 0.9004832436255058, | |
| "grad_norm": 0.20989854633808136, | |
| "learning_rate": 2.98520978522015e-05, | |
| "loss": 1.6118, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 0.9012690056181982, | |
| "grad_norm": 0.12240595370531082, | |
| "learning_rate": 2.938702010162797e-05, | |
| "loss": 1.541, | |
| "step": 5735 | |
| }, | |
| { | |
| "epoch": 0.9020547676108907, | |
| "grad_norm": 0.1253134161233902, | |
| "learning_rate": 2.8925483960730803e-05, | |
| "loss": 1.5571, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 0.9028405296035831, | |
| "grad_norm": 0.12602074444293976, | |
| "learning_rate": 2.84674929028117e-05, | |
| "loss": 1.6147, | |
| "step": 5745 | |
| }, | |
| { | |
| "epoch": 0.9036262915962755, | |
| "grad_norm": 0.14237385988235474, | |
| "learning_rate": 2.801305037449353e-05, | |
| "loss": 1.5694, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.9044120535889679, | |
| "grad_norm": 0.13480807840824127, | |
| "learning_rate": 2.7562159795695097e-05, | |
| "loss": 1.5912, | |
| "step": 5755 | |
| }, | |
| { | |
| "epoch": 0.9051978155816603, | |
| "grad_norm": 0.13586966693401337, | |
| "learning_rate": 2.7114824559604513e-05, | |
| "loss": 1.552, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.9059835775743528, | |
| "grad_norm": 0.11657349020242691, | |
| "learning_rate": 2.6671048032654188e-05, | |
| "loss": 1.6195, | |
| "step": 5765 | |
| }, | |
| { | |
| "epoch": 0.9067693395670452, | |
| "grad_norm": 0.13299603760242462, | |
| "learning_rate": 2.623083355449557e-05, | |
| "loss": 1.525, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 0.9075551015597375, | |
| "grad_norm": 0.15020130574703217, | |
| "learning_rate": 2.5794184437973433e-05, | |
| "loss": 1.5987, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 0.9083408635524299, | |
| "grad_norm": 0.12535901367664337, | |
| "learning_rate": 2.536110396910174e-05, | |
| "loss": 1.671, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 0.9091266255451224, | |
| "grad_norm": 0.13589003682136536, | |
| "learning_rate": 2.4931595407038098e-05, | |
| "loss": 1.6108, | |
| "step": 5785 | |
| }, | |
| { | |
| "epoch": 0.9099123875378148, | |
| "grad_norm": 0.14382244646549225, | |
| "learning_rate": 2.450566198406018e-05, | |
| "loss": 1.5919, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 0.9106981495305072, | |
| "grad_norm": 0.12787427008152008, | |
| "learning_rate": 2.4083306905540335e-05, | |
| "loss": 1.5341, | |
| "step": 5795 | |
| }, | |
| { | |
| "epoch": 0.9114839115231996, | |
| "grad_norm": 0.12778525054454803, | |
| "learning_rate": 2.36645333499223e-05, | |
| "loss": 1.5093, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.912269673515892, | |
| "grad_norm": 0.14423970878124237, | |
| "learning_rate": 2.3249344468696755e-05, | |
| "loss": 1.5012, | |
| "step": 5805 | |
| }, | |
| { | |
| "epoch": 0.9130554355085845, | |
| "grad_norm": 0.1335286796092987, | |
| "learning_rate": 2.2837743386378007e-05, | |
| "loss": 1.529, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 0.9138411975012769, | |
| "grad_norm": 0.12632855772972107, | |
| "learning_rate": 2.2429733200480308e-05, | |
| "loss": 1.5543, | |
| "step": 5815 | |
| }, | |
| { | |
| "epoch": 0.9146269594939693, | |
| "grad_norm": 0.11702293157577515, | |
| "learning_rate": 2.2025316981494347e-05, | |
| "loss": 1.6215, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.9154127214866616, | |
| "grad_norm": 0.13728240132331848, | |
| "learning_rate": 2.1624497772864514e-05, | |
| "loss": 1.551, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 0.9161984834793541, | |
| "grad_norm": 0.1302066147327423, | |
| "learning_rate": 2.1227278590965572e-05, | |
| "loss": 1.5828, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 0.9169842454720465, | |
| "grad_norm": 0.1424468457698822, | |
| "learning_rate": 2.0833662425080557e-05, | |
| "loss": 1.6033, | |
| "step": 5835 | |
| }, | |
| { | |
| "epoch": 0.9177700074647389, | |
| "grad_norm": 0.15224626660346985, | |
| "learning_rate": 2.0443652237377596e-05, | |
| "loss": 1.6329, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 0.9185557694574313, | |
| "grad_norm": 0.1158602312207222, | |
| "learning_rate": 2.0057250962887964e-05, | |
| "loss": 1.5337, | |
| "step": 5845 | |
| }, | |
| { | |
| "epoch": 0.9193415314501238, | |
| "grad_norm": 0.145168736577034, | |
| "learning_rate": 1.9674461509484266e-05, | |
| "loss": 1.5735, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.9201272934428162, | |
| "grad_norm": 0.14250454306602478, | |
| "learning_rate": 1.92952867578578e-05, | |
| "loss": 1.5675, | |
| "step": 5855 | |
| }, | |
| { | |
| "epoch": 0.9209130554355086, | |
| "grad_norm": 0.1341368407011032, | |
| "learning_rate": 1.891972956149779e-05, | |
| "loss": 1.6223, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 0.921698817428201, | |
| "grad_norm": 0.12028190493583679, | |
| "learning_rate": 1.854779274666918e-05, | |
| "loss": 1.5513, | |
| "step": 5865 | |
| }, | |
| { | |
| "epoch": 0.9224845794208935, | |
| "grad_norm": 0.13998378813266754, | |
| "learning_rate": 1.8179479112391704e-05, | |
| "loss": 1.5942, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 0.9232703414135858, | |
| "grad_norm": 0.1285070776939392, | |
| "learning_rate": 1.781479143041875e-05, | |
| "loss": 1.5804, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 0.9240561034062782, | |
| "grad_norm": 0.10047586262226105, | |
| "learning_rate": 1.7453732445216586e-05, | |
| "loss": 1.5314, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 0.9248418653989706, | |
| "grad_norm": 0.1180456280708313, | |
| "learning_rate": 1.7096304873943535e-05, | |
| "loss": 1.5613, | |
| "step": 5885 | |
| }, | |
| { | |
| "epoch": 0.925627627391663, | |
| "grad_norm": 0.1225186139345169, | |
| "learning_rate": 1.6742511406429682e-05, | |
| "loss": 1.5305, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 0.9264133893843555, | |
| "grad_norm": 0.13830195367336273, | |
| "learning_rate": 1.639235470515654e-05, | |
| "loss": 1.5563, | |
| "step": 5895 | |
| }, | |
| { | |
| "epoch": 0.9271991513770479, | |
| "grad_norm": 0.17313812673091888, | |
| "learning_rate": 1.6045837405237075e-05, | |
| "loss": 1.4795, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.9279849133697403, | |
| "grad_norm": 0.12217851728200912, | |
| "learning_rate": 1.570296211439587e-05, | |
| "loss": 1.5596, | |
| "step": 5905 | |
| }, | |
| { | |
| "epoch": 0.9287706753624327, | |
| "grad_norm": 0.11904492974281311, | |
| "learning_rate": 1.5363731412949444e-05, | |
| "loss": 1.6062, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 0.9295564373551252, | |
| "grad_norm": 0.13460589945316315, | |
| "learning_rate": 1.5028147853786867e-05, | |
| "loss": 1.5486, | |
| "step": 5915 | |
| }, | |
| { | |
| "epoch": 0.9303421993478176, | |
| "grad_norm": 0.10764899104833603, | |
| "learning_rate": 1.4696213962350492e-05, | |
| "loss": 1.5923, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 0.93112796134051, | |
| "grad_norm": 0.12785036861896515, | |
| "learning_rate": 1.4367932236617142e-05, | |
| "loss": 1.6108, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 0.9319137233332023, | |
| "grad_norm": 0.11170931160449982, | |
| "learning_rate": 1.4043305147079077e-05, | |
| "loss": 1.4704, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 0.9326994853258948, | |
| "grad_norm": 0.200826033949852, | |
| "learning_rate": 1.372233513672555e-05, | |
| "loss": 1.6068, | |
| "step": 5935 | |
| }, | |
| { | |
| "epoch": 0.9334852473185872, | |
| "grad_norm": 0.11707545816898346, | |
| "learning_rate": 1.3405024621024332e-05, | |
| "loss": 1.5703, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 0.9342710093112796, | |
| "grad_norm": 0.11969916522502899, | |
| "learning_rate": 1.3091375987903498e-05, | |
| "loss": 1.5001, | |
| "step": 5945 | |
| }, | |
| { | |
| "epoch": 0.935056771303972, | |
| "grad_norm": 0.13447532057762146, | |
| "learning_rate": 1.2781391597733837e-05, | |
| "loss": 1.5555, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.9358425332966644, | |
| "grad_norm": 0.13763001561164856, | |
| "learning_rate": 1.247507378331042e-05, | |
| "loss": 1.5385, | |
| "step": 5955 | |
| }, | |
| { | |
| "epoch": 0.9366282952893569, | |
| "grad_norm": 0.13508008420467377, | |
| "learning_rate": 1.2172424849835662e-05, | |
| "loss": 1.555, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 0.9374140572820493, | |
| "grad_norm": 0.122609443962574, | |
| "learning_rate": 1.1873447074901512e-05, | |
| "loss": 1.5754, | |
| "step": 5965 | |
| }, | |
| { | |
| "epoch": 0.9381998192747417, | |
| "grad_norm": 0.12040087580680847, | |
| "learning_rate": 1.1578142708472794e-05, | |
| "loss": 1.4984, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 0.9389855812674341, | |
| "grad_norm": 0.14471302926540375, | |
| "learning_rate": 1.1286513972869783e-05, | |
| "loss": 1.5293, | |
| "step": 5975 | |
| }, | |
| { | |
| "epoch": 0.9397713432601265, | |
| "grad_norm": 0.12743833661079407, | |
| "learning_rate": 1.0998563062751821e-05, | |
| "loss": 1.553, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 0.9405571052528189, | |
| "grad_norm": 0.12718817591667175, | |
| "learning_rate": 1.0714292145100557e-05, | |
| "loss": 1.5326, | |
| "step": 5985 | |
| }, | |
| { | |
| "epoch": 0.9413428672455113, | |
| "grad_norm": 0.1280830055475235, | |
| "learning_rate": 1.0433703359203906e-05, | |
| "loss": 1.5474, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 0.9421286292382037, | |
| "grad_norm": 0.13597886264324188, | |
| "learning_rate": 1.0156798816639723e-05, | |
| "loss": 1.564, | |
| "step": 5995 | |
| }, | |
| { | |
| "epoch": 0.9429143912308962, | |
| "grad_norm": 0.14008958637714386, | |
| "learning_rate": 9.883580601260044e-06, | |
| "loss": 1.6469, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.9437001532235886, | |
| "grad_norm": 0.12624593079090118, | |
| "learning_rate": 9.61405076917532e-06, | |
| "loss": 1.5435, | |
| "step": 6005 | |
| }, | |
| { | |
| "epoch": 0.944485915216281, | |
| "grad_norm": 0.1182253360748291, | |
| "learning_rate": 9.348211348739033e-06, | |
| "loss": 1.6148, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 0.9452716772089734, | |
| "grad_norm": 0.12373318523168564, | |
| "learning_rate": 9.086064340532274e-06, | |
| "loss": 1.6464, | |
| "step": 6015 | |
| }, | |
| { | |
| "epoch": 0.9460574392016659, | |
| "grad_norm": 0.1271478533744812, | |
| "learning_rate": 8.827611717349027e-06, | |
| "loss": 1.5245, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 0.9468432011943583, | |
| "grad_norm": 0.12978233397006989, | |
| "learning_rate": 8.572855424180736e-06, | |
| "loss": 1.5904, | |
| "step": 6025 | |
| }, | |
| { | |
| "epoch": 0.9476289631870506, | |
| "grad_norm": 0.14304792881011963, | |
| "learning_rate": 8.321797378202378e-06, | |
| "loss": 1.5159, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 0.948414725179743, | |
| "grad_norm": 0.1286846399307251, | |
| "learning_rate": 8.074439468757412e-06, | |
| "loss": 1.5156, | |
| "step": 6035 | |
| }, | |
| { | |
| "epoch": 0.9492004871724354, | |
| "grad_norm": 0.12985752522945404, | |
| "learning_rate": 7.830783557343901e-06, | |
| "loss": 1.5533, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 0.9499862491651279, | |
| "grad_norm": 0.11546095460653305, | |
| "learning_rate": 7.590831477600646e-06, | |
| "loss": 1.4813, | |
| "step": 6045 | |
| }, | |
| { | |
| "epoch": 0.9507720111578203, | |
| "grad_norm": 0.13258661329746246, | |
| "learning_rate": 7.354585035292794e-06, | |
| "loss": 1.6366, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.9515577731505127, | |
| "grad_norm": 0.1464798003435135, | |
| "learning_rate": 7.122046008298966e-06, | |
| "loss": 1.5343, | |
| "step": 6055 | |
| }, | |
| { | |
| "epoch": 0.9523435351432051, | |
| "grad_norm": 0.11942831426858902, | |
| "learning_rate": 6.8932161465972694e-06, | |
| "loss": 1.548, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 0.9531292971358976, | |
| "grad_norm": 0.11840633302927017, | |
| "learning_rate": 6.668097172252529e-06, | |
| "loss": 1.63, | |
| "step": 6065 | |
| }, | |
| { | |
| "epoch": 0.95391505912859, | |
| "grad_norm": 0.12097301334142685, | |
| "learning_rate": 6.446690779403241e-06, | |
| "loss": 1.5491, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 0.9547008211212824, | |
| "grad_norm": 0.12309488654136658, | |
| "learning_rate": 6.228998634248695e-06, | |
| "loss": 1.598, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 0.9554865831139748, | |
| "grad_norm": 0.14806729555130005, | |
| "learning_rate": 6.015022375036539e-06, | |
| "loss": 1.5416, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 0.9562723451066671, | |
| "grad_norm": 0.12924478948116302, | |
| "learning_rate": 5.8047636120504015e-06, | |
| "loss": 1.5189, | |
| "step": 6085 | |
| }, | |
| { | |
| "epoch": 0.9570581070993596, | |
| "grad_norm": 0.13043110072612762, | |
| "learning_rate": 5.598223927597901e-06, | |
| "loss": 1.67, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 0.957843869092052, | |
| "grad_norm": 0.1203450858592987, | |
| "learning_rate": 5.395404875998488e-06, | |
| "loss": 1.4862, | |
| "step": 6095 | |
| }, | |
| { | |
| "epoch": 0.9586296310847444, | |
| "grad_norm": 0.1190691590309143, | |
| "learning_rate": 5.196307983572069e-06, | |
| "loss": 1.557, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.9594153930774368, | |
| "grad_norm": 0.1312546730041504, | |
| "learning_rate": 5.000934748627117e-06, | |
| "loss": 1.5992, | |
| "step": 6105 | |
| }, | |
| { | |
| "epoch": 0.9602011550701293, | |
| "grad_norm": 0.12697143852710724, | |
| "learning_rate": 4.809286641449862e-06, | |
| "loss": 1.5766, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 0.9609869170628217, | |
| "grad_norm": 0.11688420176506042, | |
| "learning_rate": 4.621365104292896e-06, | |
| "loss": 1.5383, | |
| "step": 6115 | |
| }, | |
| { | |
| "epoch": 0.9617726790555141, | |
| "grad_norm": 0.15452660620212555, | |
| "learning_rate": 4.437171551364416e-06, | |
| "loss": 1.6561, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 0.9625584410482065, | |
| "grad_norm": 0.14491859078407288, | |
| "learning_rate": 4.256707368817503e-06, | |
| "loss": 1.5707, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 0.963344203040899, | |
| "grad_norm": 0.14828036725521088, | |
| "learning_rate": 4.079973914739965e-06, | |
| "loss": 1.5733, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 0.9641299650335913, | |
| "grad_norm": 0.11809247732162476, | |
| "learning_rate": 3.906972519143736e-06, | |
| "loss": 1.5501, | |
| "step": 6135 | |
| }, | |
| { | |
| "epoch": 0.9649157270262837, | |
| "grad_norm": 0.11582066118717194, | |
| "learning_rate": 3.737704483955107e-06, | |
| "loss": 1.5558, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 0.9657014890189761, | |
| "grad_norm": 0.1349029392004013, | |
| "learning_rate": 3.5721710830049493e-06, | |
| "loss": 1.5224, | |
| "step": 6145 | |
| }, | |
| { | |
| "epoch": 0.9664872510116685, | |
| "grad_norm": 0.14910638332366943, | |
| "learning_rate": 3.4103735620189536e-06, | |
| "loss": 1.5653, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.967273013004361, | |
| "grad_norm": 0.11077561974525452, | |
| "learning_rate": 3.2523131386083537e-06, | |
| "loss": 1.5118, | |
| "step": 6155 | |
| }, | |
| { | |
| "epoch": 0.9680587749970534, | |
| "grad_norm": 0.12212218344211578, | |
| "learning_rate": 3.0979910022607693e-06, | |
| "loss": 1.5892, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 0.9688445369897458, | |
| "grad_norm": 0.1238313764333725, | |
| "learning_rate": 2.947408314331379e-06, | |
| "loss": 1.5932, | |
| "step": 6165 | |
| }, | |
| { | |
| "epoch": 0.9696302989824382, | |
| "grad_norm": 0.128588005900383, | |
| "learning_rate": 2.8005662080339276e-06, | |
| "loss": 1.5289, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 0.9704160609751307, | |
| "grad_norm": 0.12846864759922028, | |
| "learning_rate": 2.657465788432234e-06, | |
| "loss": 1.5869, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 0.9712018229678231, | |
| "grad_norm": 0.13134929537773132, | |
| "learning_rate": 2.518108132432084e-06, | |
| "loss": 1.5328, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 0.9719875849605154, | |
| "grad_norm": 0.13864241540431976, | |
| "learning_rate": 2.382494288773074e-06, | |
| "loss": 1.6102, | |
| "step": 6185 | |
| }, | |
| { | |
| "epoch": 0.9727733469532078, | |
| "grad_norm": 0.12534551322460175, | |
| "learning_rate": 2.2506252780205017e-06, | |
| "loss": 1.5703, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 0.9735591089459003, | |
| "grad_norm": 0.11990083009004593, | |
| "learning_rate": 2.122502092557876e-06, | |
| "loss": 1.6265, | |
| "step": 6195 | |
| }, | |
| { | |
| "epoch": 0.9743448709385927, | |
| "grad_norm": 0.11616454273462296, | |
| "learning_rate": 1.9981256965794226e-06, | |
| "loss": 1.5141, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.9751306329312851, | |
| "grad_norm": 0.12967850267887115, | |
| "learning_rate": 1.8774970260827528e-06, | |
| "loss": 1.5729, | |
| "step": 6205 | |
| }, | |
| { | |
| "epoch": 0.9759163949239775, | |
| "grad_norm": 0.13299420475959778, | |
| "learning_rate": 1.760616988861985e-06, | |
| "loss": 1.5698, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 0.97670215691667, | |
| "grad_norm": 0.12787404656410217, | |
| "learning_rate": 1.6474864645008024e-06, | |
| "loss": 1.5025, | |
| "step": 6215 | |
| }, | |
| { | |
| "epoch": 0.9774879189093624, | |
| "grad_norm": 0.11461758613586426, | |
| "learning_rate": 1.5381063043656274e-06, | |
| "loss": 1.5551, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 0.9782736809020548, | |
| "grad_norm": 0.11087030172348022, | |
| "learning_rate": 1.4324773315996797e-06, | |
| "loss": 1.5443, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 0.9790594428947472, | |
| "grad_norm": 0.13169339299201965, | |
| "learning_rate": 1.3306003411163725e-06, | |
| "loss": 1.5355, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 0.9798452048874396, | |
| "grad_norm": 0.14098110795021057, | |
| "learning_rate": 1.232476099593538e-06, | |
| "loss": 1.5563, | |
| "step": 6235 | |
| }, | |
| { | |
| "epoch": 0.980630966880132, | |
| "grad_norm": 0.1428086906671524, | |
| "learning_rate": 1.1381053454675994e-06, | |
| "loss": 1.5886, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 0.9814167288728244, | |
| "grad_norm": 0.11641329526901245, | |
| "learning_rate": 1.0474887889280749e-06, | |
| "loss": 1.5523, | |
| "step": 6245 | |
| }, | |
| { | |
| "epoch": 0.9822024908655168, | |
| "grad_norm": 0.11133892089128494, | |
| "learning_rate": 9.606271119119713e-07, | |
| "loss": 1.5447, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.9829882528582092, | |
| "grad_norm": 0.11940602213144302, | |
| "learning_rate": 8.775209680991214e-07, | |
| "loss": 1.5727, | |
| "step": 6255 | |
| }, | |
| { | |
| "epoch": 0.9837740148509017, | |
| "grad_norm": 0.12904313206672668, | |
| "learning_rate": 7.981709829068539e-07, | |
| "loss": 1.5067, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 0.9845597768435941, | |
| "grad_norm": 0.15031477808952332, | |
| "learning_rate": 7.225777534854428e-07, | |
| "loss": 1.549, | |
| "step": 6265 | |
| }, | |
| { | |
| "epoch": 0.9853455388362865, | |
| "grad_norm": 0.12052863091230392, | |
| "learning_rate": 6.507418487135542e-07, | |
| "loss": 1.5189, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 0.9861313008289789, | |
| "grad_norm": 0.1250990927219391, | |
| "learning_rate": 5.826638091941394e-07, | |
| "loss": 1.585, | |
| "step": 6275 | |
| }, | |
| { | |
| "epoch": 0.9869170628216714, | |
| "grad_norm": 0.14981551468372345, | |
| "learning_rate": 5.18344147250216e-07, | |
| "loss": 1.5447, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 0.9877028248143638, | |
| "grad_norm": 0.12979185581207275, | |
| "learning_rate": 4.577833469208703e-07, | |
| "loss": 1.6224, | |
| "step": 6285 | |
| }, | |
| { | |
| "epoch": 0.9884885868070561, | |
| "grad_norm": 0.1247519999742508, | |
| "learning_rate": 4.009818639580387e-07, | |
| "loss": 1.5269, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 0.9892743487997485, | |
| "grad_norm": 0.11512382328510284, | |
| "learning_rate": 3.4794012582262115e-07, | |
| "loss": 1.5008, | |
| "step": 6295 | |
| }, | |
| { | |
| "epoch": 0.9900601107924409, | |
| "grad_norm": 0.13370949029922485, | |
| "learning_rate": 2.9865853168159483e-07, | |
| "loss": 1.5376, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.9908458727851334, | |
| "grad_norm": 0.14563091099262238, | |
| "learning_rate": 2.5313745240473915e-07, | |
| "loss": 1.5473, | |
| "step": 6305 | |
| }, | |
| { | |
| "epoch": 0.9916316347778258, | |
| "grad_norm": 0.12514719367027283, | |
| "learning_rate": 2.1137723056213753e-07, | |
| "loss": 1.5579, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 0.9924173967705182, | |
| "grad_norm": 0.14008943736553192, | |
| "learning_rate": 1.7337818042134635e-07, | |
| "loss": 1.5233, | |
| "step": 6315 | |
| }, | |
| { | |
| "epoch": 0.9932031587632106, | |
| "grad_norm": 0.11884216964244843, | |
| "learning_rate": 1.3914058794511907e-07, | |
| "loss": 1.5463, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 0.9939889207559031, | |
| "grad_norm": 0.13800643384456635, | |
| "learning_rate": 1.0866471078940787e-07, | |
| "loss": 1.5987, | |
| "step": 6325 | |
| }, | |
| { | |
| "epoch": 0.9947746827485955, | |
| "grad_norm": 0.11561693996191025, | |
| "learning_rate": 8.195077830114306e-08, | |
| "loss": 1.539, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 0.9955604447412879, | |
| "grad_norm": 0.1500178575515747, | |
| "learning_rate": 5.8998991516678866e-08, | |
| "loss": 1.5477, | |
| "step": 6335 | |
| }, | |
| { | |
| "epoch": 0.9963462067339802, | |
| "grad_norm": 0.1255454123020172, | |
| "learning_rate": 3.9809523160183606e-08, | |
| "loss": 1.5781, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 0.9971319687266726, | |
| "grad_norm": 0.18067841231822968, | |
| "learning_rate": 2.4382517642640435e-08, | |
| "loss": 1.4978, | |
| "step": 6345 | |
| }, | |
| { | |
| "epoch": 0.9979177307193651, | |
| "grad_norm": 0.15813903510570526, | |
| "learning_rate": 1.2718091060182069e-08, | |
| "loss": 1.5221, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.9987034927120575, | |
| "grad_norm": 0.1291486918926239, | |
| "learning_rate": 4.816331193924217e-09, | |
| "loss": 1.6245, | |
| "step": 6355 | |
| }, | |
| { | |
| "epoch": 0.9994892547047499, | |
| "grad_norm": 0.11886509507894516, | |
| "learning_rate": 6.772975085778121e-10, | |
| "loss": 1.6374, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 0.9999607119003654, | |
| "eval_loss": 1.557084083557129, | |
| "eval_runtime": 120.519, | |
| "eval_samples_per_second": 88.376, | |
| "eval_steps_per_second": 11.052, | |
| "step": 6363 | |
| }, | |
| { | |
| "epoch": 0.9999607119003654, | |
| "step": 6363, | |
| "total_flos": 2.657246056189788e+17, | |
| "train_loss": 1.732499259158573, | |
| "train_runtime": 9418.2953, | |
| "train_samples_per_second": 21.62, | |
| "train_steps_per_second": 0.676 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 6363, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.657246056189788e+17, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |