| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.9850107066381155, | |
| "eval_steps": 500, | |
| "global_step": 932, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004282655246252677, | |
| "grad_norm": 5.760822510102549, | |
| "learning_rate": 6.25e-07, | |
| "loss": 1.9283, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.008565310492505354, | |
| "grad_norm": 9.541601401962827, | |
| "learning_rate": 1.25e-06, | |
| "loss": 1.9234, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.01284796573875803, | |
| "grad_norm": 6.538784535229611, | |
| "learning_rate": 1.875e-06, | |
| "loss": 1.8408, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.017130620985010708, | |
| "grad_norm": 7.213295700964454, | |
| "learning_rate": 2.5e-06, | |
| "loss": 1.9104, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.021413276231263382, | |
| "grad_norm": 6.314491554982707, | |
| "learning_rate": 3.125e-06, | |
| "loss": 1.9888, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.02569593147751606, | |
| "grad_norm": 2.828604192300556, | |
| "learning_rate": 3.75e-06, | |
| "loss": 1.8774, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.029978586723768737, | |
| "grad_norm": 1.9892932329122277, | |
| "learning_rate": 4.375e-06, | |
| "loss": 1.8232, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.034261241970021415, | |
| "grad_norm": 2.301797599943145, | |
| "learning_rate": 5e-06, | |
| "loss": 1.7929, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.03854389721627409, | |
| "grad_norm": 2.642325145637469, | |
| "learning_rate": 5.625e-06, | |
| "loss": 1.6999, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.042826552462526764, | |
| "grad_norm": 1.54457145688109, | |
| "learning_rate": 6.25e-06, | |
| "loss": 1.797, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.047109207708779445, | |
| "grad_norm": 0.6231435902899881, | |
| "learning_rate": 6.875000000000001e-06, | |
| "loss": 1.8327, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.05139186295503212, | |
| "grad_norm": 1.9278562906780023, | |
| "learning_rate": 7.5e-06, | |
| "loss": 1.8551, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.055674518201284794, | |
| "grad_norm": 2.1201436104190123, | |
| "learning_rate": 8.125000000000001e-06, | |
| "loss": 1.713, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.059957173447537475, | |
| "grad_norm": 1.6833290019534897, | |
| "learning_rate": 8.75e-06, | |
| "loss": 1.7723, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.06423982869379015, | |
| "grad_norm": 1.7592896430688332, | |
| "learning_rate": 9.375000000000001e-06, | |
| "loss": 1.7786, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.06852248394004283, | |
| "grad_norm": 1.3527651484156915, | |
| "learning_rate": 1e-05, | |
| "loss": 1.8679, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0728051391862955, | |
| "grad_norm": 0.487619361955017, | |
| "learning_rate": 1.0625e-05, | |
| "loss": 1.7012, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.07708779443254818, | |
| "grad_norm": 1.429523434604011, | |
| "learning_rate": 1.125e-05, | |
| "loss": 1.6743, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.08137044967880086, | |
| "grad_norm": 1.1313491131725162, | |
| "learning_rate": 1.1875e-05, | |
| "loss": 1.7325, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.08565310492505353, | |
| "grad_norm": 0.7296310910231132, | |
| "learning_rate": 1.25e-05, | |
| "loss": 1.6433, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.08993576017130621, | |
| "grad_norm": 1.0873378233145565, | |
| "learning_rate": 1.3125e-05, | |
| "loss": 1.8281, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.09421841541755889, | |
| "grad_norm": 0.4193072086415473, | |
| "learning_rate": 1.3750000000000002e-05, | |
| "loss": 1.7653, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.09850107066381156, | |
| "grad_norm": 0.6318751609453201, | |
| "learning_rate": 1.4374999999999999e-05, | |
| "loss": 1.806, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.10278372591006424, | |
| "grad_norm": 0.7998125137748582, | |
| "learning_rate": 1.5e-05, | |
| "loss": 1.7682, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.10706638115631692, | |
| "grad_norm": 0.4607962108022345, | |
| "learning_rate": 1.5625e-05, | |
| "loss": 1.7188, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.11134903640256959, | |
| "grad_norm": 0.6691122325494083, | |
| "learning_rate": 1.6250000000000002e-05, | |
| "loss": 1.7694, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.11563169164882227, | |
| "grad_norm": 0.5966739618315919, | |
| "learning_rate": 1.6875000000000004e-05, | |
| "loss": 1.709, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.11991434689507495, | |
| "grad_norm": 0.5572079216591783, | |
| "learning_rate": 1.75e-05, | |
| "loss": 1.7757, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.12419700214132762, | |
| "grad_norm": 0.5765542135816161, | |
| "learning_rate": 1.8125e-05, | |
| "loss": 1.6293, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.1284796573875803, | |
| "grad_norm": 0.43151210853354827, | |
| "learning_rate": 1.8750000000000002e-05, | |
| "loss": 1.7835, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.13276231263383298, | |
| "grad_norm": 0.6485552906377018, | |
| "learning_rate": 1.9375e-05, | |
| "loss": 1.5803, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.13704496788008566, | |
| "grad_norm": 0.4482132894148142, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6477, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.14132762312633834, | |
| "grad_norm": 0.611457842177946, | |
| "learning_rate": 2.0625e-05, | |
| "loss": 1.6426, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.145610278372591, | |
| "grad_norm": 0.38871825671554094, | |
| "learning_rate": 2.125e-05, | |
| "loss": 1.5813, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.14989293361884368, | |
| "grad_norm": 0.5250386322112104, | |
| "learning_rate": 2.1875e-05, | |
| "loss": 1.6655, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.15417558886509636, | |
| "grad_norm": 0.41266647411410223, | |
| "learning_rate": 2.25e-05, | |
| "loss": 1.7505, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.15845824411134904, | |
| "grad_norm": 0.4161967413290072, | |
| "learning_rate": 2.3125000000000003e-05, | |
| "loss": 1.6562, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.16274089935760172, | |
| "grad_norm": 0.4023472892837264, | |
| "learning_rate": 2.375e-05, | |
| "loss": 1.5572, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.1670235546038544, | |
| "grad_norm": 0.4716743391777267, | |
| "learning_rate": 2.4375e-05, | |
| "loss": 1.654, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.17130620985010706, | |
| "grad_norm": 0.41899791933237945, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.6262, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.17558886509635974, | |
| "grad_norm": 0.4691927568541724, | |
| "learning_rate": 2.4999922473602244e-05, | |
| "loss": 1.7226, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.17987152034261242, | |
| "grad_norm": 0.3788848931279516, | |
| "learning_rate": 2.499968989537063e-05, | |
| "loss": 1.5837, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.1841541755888651, | |
| "grad_norm": 0.4140681800260881, | |
| "learning_rate": 2.4999302268190118e-05, | |
| "loss": 1.6646, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.18843683083511778, | |
| "grad_norm": 0.44590521442058745, | |
| "learning_rate": 2.4998759596868908e-05, | |
| "loss": 1.6526, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.19271948608137046, | |
| "grad_norm": 0.42731019400707126, | |
| "learning_rate": 2.499806188813843e-05, | |
| "loss": 1.6234, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.19700214132762311, | |
| "grad_norm": 0.41939827714864014, | |
| "learning_rate": 2.4997209150653212e-05, | |
| "loss": 1.6093, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.2012847965738758, | |
| "grad_norm": 0.4139349892587555, | |
| "learning_rate": 2.4996201394990805e-05, | |
| "loss": 1.6361, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.20556745182012848, | |
| "grad_norm": 0.4278900632702424, | |
| "learning_rate": 2.4995038633651627e-05, | |
| "loss": 1.6978, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.20985010706638116, | |
| "grad_norm": 0.3616303314921443, | |
| "learning_rate": 2.499372088105884e-05, | |
| "loss": 1.4552, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.21413276231263384, | |
| "grad_norm": 0.42854114017469835, | |
| "learning_rate": 2.4992248153558134e-05, | |
| "loss": 1.7517, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.21841541755888652, | |
| "grad_norm": 0.526378364056868, | |
| "learning_rate": 2.4990620469417554e-05, | |
| "loss": 1.6058, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.22269807280513917, | |
| "grad_norm": 0.4059991411502784, | |
| "learning_rate": 2.498883784882726e-05, | |
| "loss": 1.5755, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.22698072805139186, | |
| "grad_norm": 0.4066265032195638, | |
| "learning_rate": 2.4986900313899273e-05, | |
| "loss": 1.6502, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.23126338329764454, | |
| "grad_norm": 0.4679382599100356, | |
| "learning_rate": 2.498480788866721e-05, | |
| "loss": 1.5904, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.23554603854389722, | |
| "grad_norm": 0.4146983639334321, | |
| "learning_rate": 2.4982560599085984e-05, | |
| "loss": 1.7578, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.2398286937901499, | |
| "grad_norm": 0.5011668519133488, | |
| "learning_rate": 2.4980158473031472e-05, | |
| "loss": 1.6348, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.24411134903640258, | |
| "grad_norm": 0.39760241005080976, | |
| "learning_rate": 2.4977601540300188e-05, | |
| "loss": 1.6521, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.24839400428265523, | |
| "grad_norm": 0.3842980574877057, | |
| "learning_rate": 2.49748898326089e-05, | |
| "loss": 1.5468, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.25267665952890794, | |
| "grad_norm": 0.5027132906331951, | |
| "learning_rate": 2.497202338359423e-05, | |
| "loss": 1.6786, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.2569593147751606, | |
| "grad_norm": 0.42843610006674887, | |
| "learning_rate": 2.4969002228812256e-05, | |
| "loss": 1.5481, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.26124197002141325, | |
| "grad_norm": 0.418048089271474, | |
| "learning_rate": 2.4965826405738054e-05, | |
| "loss": 1.5, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.26552462526766596, | |
| "grad_norm": 0.38187537005853855, | |
| "learning_rate": 2.4962495953765248e-05, | |
| "loss": 1.5241, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.2698072805139186, | |
| "grad_norm": 0.41994694502120095, | |
| "learning_rate": 2.495901091420551e-05, | |
| "loss": 1.5668, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.2740899357601713, | |
| "grad_norm": 0.41737827691699964, | |
| "learning_rate": 2.4955371330288045e-05, | |
| "loss": 1.6447, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.278372591006424, | |
| "grad_norm": 0.42240231204308587, | |
| "learning_rate": 2.4951577247159068e-05, | |
| "loss": 1.5265, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.2826552462526767, | |
| "grad_norm": 0.4053695799459516, | |
| "learning_rate": 2.494762871188124e-05, | |
| "loss": 1.6668, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.28693790149892934, | |
| "grad_norm": 0.3755341743631125, | |
| "learning_rate": 2.4943525773433063e-05, | |
| "loss": 1.4097, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.291220556745182, | |
| "grad_norm": 0.3698075541817392, | |
| "learning_rate": 2.4939268482708318e-05, | |
| "loss": 1.7374, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.2955032119914347, | |
| "grad_norm": 0.35883020884289013, | |
| "learning_rate": 2.4934856892515378e-05, | |
| "loss": 1.7297, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.29978586723768735, | |
| "grad_norm": 0.42882530161941707, | |
| "learning_rate": 2.4930291057576603e-05, | |
| "loss": 1.6139, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.30406852248394006, | |
| "grad_norm": 0.43658211064964164, | |
| "learning_rate": 2.4925571034527633e-05, | |
| "loss": 1.6844, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.3083511777301927, | |
| "grad_norm": 0.375017122269398, | |
| "learning_rate": 2.492069688191668e-05, | |
| "loss": 1.5154, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.31263383297644537, | |
| "grad_norm": 0.4040881085038259, | |
| "learning_rate": 2.4915668660203827e-05, | |
| "loss": 1.6869, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.3169164882226981, | |
| "grad_norm": 0.5315682285098243, | |
| "learning_rate": 2.4910486431760266e-05, | |
| "loss": 1.6036, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.32119914346895073, | |
| "grad_norm": 0.6819504727092934, | |
| "learning_rate": 2.490515026086751e-05, | |
| "loss": 1.7321, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.32548179871520344, | |
| "grad_norm": 0.4459093235436832, | |
| "learning_rate": 2.489966021371662e-05, | |
| "loss": 1.6316, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.3297644539614561, | |
| "grad_norm": 0.41265976791945247, | |
| "learning_rate": 2.4894016358407368e-05, | |
| "loss": 1.6822, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.3340471092077088, | |
| "grad_norm": 0.40455952502188075, | |
| "learning_rate": 2.4888218764947397e-05, | |
| "loss": 1.6279, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.33832976445396146, | |
| "grad_norm": 0.39048708108607677, | |
| "learning_rate": 2.488226750525135e-05, | |
| "loss": 1.67, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.3426124197002141, | |
| "grad_norm": 0.37437295904257595, | |
| "learning_rate": 2.487616265313999e-05, | |
| "loss": 1.6237, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.3468950749464668, | |
| "grad_norm": 0.5090276930456816, | |
| "learning_rate": 2.486990428433926e-05, | |
| "loss": 1.6003, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.3511777301927195, | |
| "grad_norm": 0.46215544091371435, | |
| "learning_rate": 2.486349247647938e-05, | |
| "loss": 1.6227, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.3554603854389722, | |
| "grad_norm": 0.41822651733354704, | |
| "learning_rate": 2.485692730909383e-05, | |
| "loss": 1.669, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.35974304068522484, | |
| "grad_norm": 0.441655220825228, | |
| "learning_rate": 2.4850208863618425e-05, | |
| "loss": 1.4542, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.3640256959314775, | |
| "grad_norm": 0.3786999347152407, | |
| "learning_rate": 2.4843337223390267e-05, | |
| "loss": 1.4966, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.3683083511777302, | |
| "grad_norm": 0.363991509035686, | |
| "learning_rate": 2.483631247364671e-05, | |
| "loss": 1.4573, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.37259100642398285, | |
| "grad_norm": 0.36392542721746446, | |
| "learning_rate": 2.482913470152433e-05, | |
| "loss": 1.5823, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.37687366167023556, | |
| "grad_norm": 0.3644244797395943, | |
| "learning_rate": 2.482180399605781e-05, | |
| "loss": 1.5918, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.3811563169164882, | |
| "grad_norm": 0.3517233506762531, | |
| "learning_rate": 2.481432044817887e-05, | |
| "loss": 1.6118, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.3854389721627409, | |
| "grad_norm": 0.44516577454752954, | |
| "learning_rate": 2.4806684150715097e-05, | |
| "loss": 1.5337, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.3897216274089936, | |
| "grad_norm": 0.4170570804452654, | |
| "learning_rate": 2.4798895198388845e-05, | |
| "loss": 1.6465, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.39400428265524623, | |
| "grad_norm": 0.36685661291454347, | |
| "learning_rate": 2.4790953687816017e-05, | |
| "loss": 1.6072, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.39828693790149894, | |
| "grad_norm": 0.4465652273066297, | |
| "learning_rate": 2.4782859717504883e-05, | |
| "loss": 1.648, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.4025695931477516, | |
| "grad_norm": 0.551115294286871, | |
| "learning_rate": 2.4774613387854866e-05, | |
| "loss": 1.6789, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.4068522483940043, | |
| "grad_norm": 0.4583820418024637, | |
| "learning_rate": 2.4766214801155276e-05, | |
| "loss": 1.5697, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.41113490364025695, | |
| "grad_norm": 0.41125366081563586, | |
| "learning_rate": 2.475766406158407e-05, | |
| "loss": 1.5489, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.41541755888650966, | |
| "grad_norm": 0.5258069733050229, | |
| "learning_rate": 2.4748961275206527e-05, | |
| "loss": 1.5782, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.4197002141327623, | |
| "grad_norm": 0.5370435285656707, | |
| "learning_rate": 2.4740106549973953e-05, | |
| "loss": 1.4463, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.42398286937901497, | |
| "grad_norm": 0.3656167120256986, | |
| "learning_rate": 2.4731099995722353e-05, | |
| "loss": 1.503, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.4282655246252677, | |
| "grad_norm": 0.37413674489512233, | |
| "learning_rate": 2.4721941724171025e-05, | |
| "loss": 1.5299, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.43254817987152033, | |
| "grad_norm": 0.5442125290315152, | |
| "learning_rate": 2.4712631848921224e-05, | |
| "loss": 1.47, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.43683083511777304, | |
| "grad_norm": 0.41142510883515865, | |
| "learning_rate": 2.470317048545473e-05, | |
| "loss": 1.6346, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.4411134903640257, | |
| "grad_norm": 0.36843680594934913, | |
| "learning_rate": 2.4693557751132405e-05, | |
| "loss": 1.5707, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.44539614561027835, | |
| "grad_norm": 0.40580526926230925, | |
| "learning_rate": 2.4683793765192753e-05, | |
| "loss": 1.611, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.44967880085653106, | |
| "grad_norm": 0.43640536110068956, | |
| "learning_rate": 2.4673878648750446e-05, | |
| "loss": 1.6646, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.4539614561027837, | |
| "grad_norm": 0.36401706952184854, | |
| "learning_rate": 2.4663812524794803e-05, | |
| "loss": 1.501, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.4582441113490364, | |
| "grad_norm": 0.4597685050788604, | |
| "learning_rate": 2.4653595518188276e-05, | |
| "loss": 1.4702, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.4625267665952891, | |
| "grad_norm": 0.4198721125351821, | |
| "learning_rate": 2.4643227755664898e-05, | |
| "loss": 1.5426, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.4668094218415418, | |
| "grad_norm": 0.4415892969218905, | |
| "learning_rate": 2.463270936582872e-05, | |
| "loss": 1.5348, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.47109207708779444, | |
| "grad_norm": 0.468616016936323, | |
| "learning_rate": 2.4622040479152195e-05, | |
| "loss": 1.5948, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.4753747323340471, | |
| "grad_norm": 0.7486771610195644, | |
| "learning_rate": 2.4611221227974584e-05, | |
| "loss": 1.683, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.4796573875802998, | |
| "grad_norm": 0.779148114510847, | |
| "learning_rate": 2.4600251746500296e-05, | |
| "loss": 1.4869, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.48394004282655245, | |
| "grad_norm": 0.4689233006931303, | |
| "learning_rate": 2.4589132170797234e-05, | |
| "loss": 1.561, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.48822269807280516, | |
| "grad_norm": 0.7040850097227628, | |
| "learning_rate": 2.4577862638795098e-05, | |
| "loss": 1.6254, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.4925053533190578, | |
| "grad_norm": 0.4769716615847163, | |
| "learning_rate": 2.456644329028369e-05, | |
| "loss": 1.5774, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.49678800856531047, | |
| "grad_norm": 0.6632040162872483, | |
| "learning_rate": 2.4554874266911157e-05, | |
| "loss": 1.5463, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.5010706638115632, | |
| "grad_norm": 1.1457031665772415, | |
| "learning_rate": 2.4543155712182252e-05, | |
| "loss": 1.642, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.5053533190578159, | |
| "grad_norm": 0.5148470344333809, | |
| "learning_rate": 2.4531287771456556e-05, | |
| "loss": 1.5455, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.5096359743040685, | |
| "grad_norm": 0.9100598002476826, | |
| "learning_rate": 2.4519270591946653e-05, | |
| "loss": 1.555, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.5139186295503212, | |
| "grad_norm": 1.3104358729746841, | |
| "learning_rate": 2.4507104322716326e-05, | |
| "loss": 1.5604, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5182012847965739, | |
| "grad_norm": 0.5181336432498789, | |
| "learning_rate": 2.44947891146787e-05, | |
| "loss": 1.5029, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.5224839400428265, | |
| "grad_norm": 1.0934800951662504, | |
| "learning_rate": 2.4482325120594374e-05, | |
| "loss": 1.5449, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.5267665952890792, | |
| "grad_norm": 0.6352277468903285, | |
| "learning_rate": 2.4469712495069507e-05, | |
| "loss": 1.588, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.5310492505353319, | |
| "grad_norm": 1.4867821380058142, | |
| "learning_rate": 2.445695139455394e-05, | |
| "loss": 1.6408, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.5353319057815846, | |
| "grad_norm": 0.6556668793792217, | |
| "learning_rate": 2.444404197733921e-05, | |
| "loss": 1.5059, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.5396145610278372, | |
| "grad_norm": 1.3366811637363765, | |
| "learning_rate": 2.4430984403556613e-05, | |
| "loss": 1.6334, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.5438972162740899, | |
| "grad_norm": 0.8391779284464247, | |
| "learning_rate": 2.441777883517522e-05, | |
| "loss": 1.5342, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.5481798715203426, | |
| "grad_norm": 1.5151547233227163, | |
| "learning_rate": 2.4404425435999857e-05, | |
| "loss": 1.4767, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.5524625267665952, | |
| "grad_norm": 0.8456634115358744, | |
| "learning_rate": 2.4390924371669065e-05, | |
| "loss": 1.3985, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.556745182012848, | |
| "grad_norm": 0.9899617700169978, | |
| "learning_rate": 2.437727580965307e-05, | |
| "loss": 1.547, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5610278372591007, | |
| "grad_norm": 0.8748009025292892, | |
| "learning_rate": 2.436347991925169e-05, | |
| "loss": 1.5895, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.5653104925053534, | |
| "grad_norm": 0.8284458411110256, | |
| "learning_rate": 2.4349536871592227e-05, | |
| "loss": 1.5536, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.569593147751606, | |
| "grad_norm": 0.918716629707354, | |
| "learning_rate": 2.4335446839627375e-05, | |
| "loss": 1.6851, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.5738758029978587, | |
| "grad_norm": 1.0628279716423659, | |
| "learning_rate": 2.4321209998133025e-05, | |
| "loss": 1.6705, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.5781584582441114, | |
| "grad_norm": 0.73934998026875, | |
| "learning_rate": 2.430682652370616e-05, | |
| "loss": 1.6545, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.582441113490364, | |
| "grad_norm": 0.8203168050853737, | |
| "learning_rate": 2.4292296594762602e-05, | |
| "loss": 1.6182, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.5867237687366167, | |
| "grad_norm": 0.6116686513293031, | |
| "learning_rate": 2.4277620391534845e-05, | |
| "loss": 1.6446, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.5910064239828694, | |
| "grad_norm": 0.9614170914314591, | |
| "learning_rate": 2.4262798096069788e-05, | |
| "loss": 1.494, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.5952890792291221, | |
| "grad_norm": 0.3676545315742134, | |
| "learning_rate": 2.424782989222651e-05, | |
| "loss": 1.595, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.5995717344753747, | |
| "grad_norm": 0.7408509458451011, | |
| "learning_rate": 2.4232715965673952e-05, | |
| "loss": 1.6386, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6038543897216274, | |
| "grad_norm": 0.40233518217652775, | |
| "learning_rate": 2.421745650388864e-05, | |
| "loss": 1.5558, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.6081370449678801, | |
| "grad_norm": 0.5148389113634867, | |
| "learning_rate": 2.4202051696152353e-05, | |
| "loss": 1.5015, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.6124197002141327, | |
| "grad_norm": 0.3807057141875052, | |
| "learning_rate": 2.418650173354977e-05, | |
| "loss": 1.6467, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.6167023554603854, | |
| "grad_norm": 0.3896991503471914, | |
| "learning_rate": 2.41708068089661e-05, | |
| "loss": 1.6053, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.6209850107066381, | |
| "grad_norm": 0.35553342191514337, | |
| "learning_rate": 2.4154967117084705e-05, | |
| "loss": 1.5364, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.6252676659528907, | |
| "grad_norm": 0.36567417738111496, | |
| "learning_rate": 2.4138982854384663e-05, | |
| "loss": 1.6348, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.6295503211991434, | |
| "grad_norm": 0.38816867602696453, | |
| "learning_rate": 2.412285421913834e-05, | |
| "loss": 1.4694, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.6338329764453962, | |
| "grad_norm": 0.35173888322190433, | |
| "learning_rate": 2.410658141140894e-05, | |
| "loss": 1.646, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.6381156316916489, | |
| "grad_norm": 0.36815567692224666, | |
| "learning_rate": 2.4090164633048e-05, | |
| "loss": 1.6168, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.6423982869379015, | |
| "grad_norm": 0.3789787801030716, | |
| "learning_rate": 2.4073604087692925e-05, | |
| "loss": 1.5451, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6466809421841542, | |
| "grad_norm": 0.4139676112725167, | |
| "learning_rate": 2.4056899980764407e-05, | |
| "loss": 1.5772, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.6509635974304069, | |
| "grad_norm": 0.4317710716550067, | |
| "learning_rate": 2.404005251946394e-05, | |
| "loss": 1.5901, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.6552462526766595, | |
| "grad_norm": 0.3793107950355877, | |
| "learning_rate": 2.4023061912771188e-05, | |
| "loss": 1.4831, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.6595289079229122, | |
| "grad_norm": 0.36255246115756395, | |
| "learning_rate": 2.4005928371441444e-05, | |
| "loss": 1.5417, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.6638115631691649, | |
| "grad_norm": 0.35515016194574406, | |
| "learning_rate": 2.3988652108002984e-05, | |
| "loss": 1.4822, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.6680942184154176, | |
| "grad_norm": 0.3462285743933349, | |
| "learning_rate": 2.3971233336754444e-05, | |
| "loss": 1.5157, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.6723768736616702, | |
| "grad_norm": 0.3669326112622935, | |
| "learning_rate": 2.395367227376216e-05, | |
| "loss": 1.5652, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.6766595289079229, | |
| "grad_norm": 0.3704783452888347, | |
| "learning_rate": 2.393596913685748e-05, | |
| "loss": 1.5836, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.6809421841541756, | |
| "grad_norm": 0.3829979392497551, | |
| "learning_rate": 2.391812414563408e-05, | |
| "loss": 1.5023, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.6852248394004282, | |
| "grad_norm": 0.3630273112296912, | |
| "learning_rate": 2.390013752144521e-05, | |
| "loss": 1.6907, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6895074946466809, | |
| "grad_norm": 0.3351207679536815, | |
| "learning_rate": 2.3882009487400993e-05, | |
| "loss": 1.4393, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.6937901498929336, | |
| "grad_norm": 0.3497511991840534, | |
| "learning_rate": 2.386374026836561e-05, | |
| "loss": 1.598, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.6980728051391863, | |
| "grad_norm": 0.3337980565250301, | |
| "learning_rate": 2.3845330090954542e-05, | |
| "loss": 1.4704, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.702355460385439, | |
| "grad_norm": 0.36707456896757124, | |
| "learning_rate": 2.3826779183531744e-05, | |
| "loss": 1.5851, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.7066381156316917, | |
| "grad_norm": 0.37164461991634257, | |
| "learning_rate": 2.380808777620682e-05, | |
| "loss": 1.531, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.7109207708779444, | |
| "grad_norm": 0.3505467917592193, | |
| "learning_rate": 2.3789256100832173e-05, | |
| "loss": 1.4713, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.715203426124197, | |
| "grad_norm": 0.47758470060633207, | |
| "learning_rate": 2.3770284391000113e-05, | |
| "loss": 1.5102, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.7194860813704497, | |
| "grad_norm": 0.33053663778093284, | |
| "learning_rate": 2.375117288203997e-05, | |
| "loss": 1.4791, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.7237687366167024, | |
| "grad_norm": 0.3393970208208402, | |
| "learning_rate": 2.3731921811015175e-05, | |
| "loss": 1.6291, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.728051391862955, | |
| "grad_norm": 0.35855145219326184, | |
| "learning_rate": 2.3712531416720317e-05, | |
| "loss": 1.5539, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.7323340471092077, | |
| "grad_norm": 0.369911611756327, | |
| "learning_rate": 2.3693001939678183e-05, | |
| "loss": 1.4999, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.7366167023554604, | |
| "grad_norm": 0.5505558187826747, | |
| "learning_rate": 2.367333362213678e-05, | |
| "loss": 1.5852, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.7408993576017131, | |
| "grad_norm": 0.3528456056150531, | |
| "learning_rate": 2.3653526708066314e-05, | |
| "loss": 1.5358, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.7451820128479657, | |
| "grad_norm": 0.3968210406914177, | |
| "learning_rate": 2.3633581443156178e-05, | |
| "loss": 1.5028, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.7494646680942184, | |
| "grad_norm": 0.47087903951900106, | |
| "learning_rate": 2.361349807481189e-05, | |
| "loss": 1.6258, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.7537473233404711, | |
| "grad_norm": 0.380904082979793, | |
| "learning_rate": 2.3593276852152056e-05, | |
| "loss": 1.5982, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.7580299785867237, | |
| "grad_norm": 0.40302033351805244, | |
| "learning_rate": 2.3572918026005235e-05, | |
| "loss": 1.6539, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.7623126338329764, | |
| "grad_norm": 0.41272981679464077, | |
| "learning_rate": 2.355242184890686e-05, | |
| "loss": 1.4144, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.7665952890792291, | |
| "grad_norm": 0.3606085218359927, | |
| "learning_rate": 2.35317885750961e-05, | |
| "loss": 1.5244, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.7708779443254818, | |
| "grad_norm": 0.34295993724517143, | |
| "learning_rate": 2.3511018460512696e-05, | |
| "loss": 1.4102, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7751605995717344, | |
| "grad_norm": 0.4192738060845751, | |
| "learning_rate": 2.349011176279379e-05, | |
| "loss": 1.5336, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.7794432548179872, | |
| "grad_norm": 0.3651804066614457, | |
| "learning_rate": 2.3469068741270744e-05, | |
| "loss": 1.5337, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.7837259100642399, | |
| "grad_norm": 0.3621028477405051, | |
| "learning_rate": 2.3447889656965896e-05, | |
| "loss": 1.6515, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.7880085653104925, | |
| "grad_norm": 0.4192540795103203, | |
| "learning_rate": 2.342657477258935e-05, | |
| "loss": 1.6674, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.7922912205567452, | |
| "grad_norm": 0.40348346440086696, | |
| "learning_rate": 2.340512435253569e-05, | |
| "loss": 1.5162, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.7965738758029979, | |
| "grad_norm": 0.39794985457766996, | |
| "learning_rate": 2.3383538662880732e-05, | |
| "loss": 1.4518, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.8008565310492506, | |
| "grad_norm": 0.40289663289027905, | |
| "learning_rate": 2.3361817971378197e-05, | |
| "loss": 1.6306, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.8051391862955032, | |
| "grad_norm": 0.35469529427153196, | |
| "learning_rate": 2.3339962547456397e-05, | |
| "loss": 1.3989, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.8094218415417559, | |
| "grad_norm": 0.3727961066406737, | |
| "learning_rate": 2.3317972662214898e-05, | |
| "loss": 1.5999, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.8137044967880086, | |
| "grad_norm": 0.4333692956220233, | |
| "learning_rate": 2.329584858842116e-05, | |
| "loss": 1.5081, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.8179871520342612, | |
| "grad_norm": 0.3789155310493327, | |
| "learning_rate": 2.3273590600507135e-05, | |
| "loss": 1.4586, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.8222698072805139, | |
| "grad_norm": 0.3768886247305229, | |
| "learning_rate": 2.3251198974565887e-05, | |
| "loss": 1.5521, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.8265524625267666, | |
| "grad_norm": 0.45287917232766545, | |
| "learning_rate": 2.322867398834815e-05, | |
| "loss": 1.6411, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.8308351177730193, | |
| "grad_norm": 0.36693401699800615, | |
| "learning_rate": 2.320601592125889e-05, | |
| "loss": 1.5276, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.8351177730192719, | |
| "grad_norm": 0.44127614034536217, | |
| "learning_rate": 2.318322505435384e-05, | |
| "loss": 1.4782, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.8394004282655246, | |
| "grad_norm": 0.3608904748036842, | |
| "learning_rate": 2.316030167033601e-05, | |
| "loss": 1.5273, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.8436830835117773, | |
| "grad_norm": 0.38846305560083205, | |
| "learning_rate": 2.313724605355218e-05, | |
| "loss": 1.4738, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.8479657387580299, | |
| "grad_norm": 0.3509606665662544, | |
| "learning_rate": 2.3114058489989378e-05, | |
| "loss": 1.4431, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.8522483940042827, | |
| "grad_norm": 0.3858549090015476, | |
| "learning_rate": 2.3090739267271332e-05, | |
| "loss": 1.515, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.8565310492505354, | |
| "grad_norm": 0.4113088516859706, | |
| "learning_rate": 2.306728867465489e-05, | |
| "loss": 1.5244, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.860813704496788, | |
| "grad_norm": 0.44586677646136047, | |
| "learning_rate": 2.3043707003026452e-05, | |
| "loss": 1.4043, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.8650963597430407, | |
| "grad_norm": 0.43850720329826914, | |
| "learning_rate": 2.3019994544898345e-05, | |
| "loss": 1.5149, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.8693790149892934, | |
| "grad_norm": 0.4409370304445262, | |
| "learning_rate": 2.2996151594405196e-05, | |
| "loss": 1.5645, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.8736616702355461, | |
| "grad_norm": 0.5520670678955565, | |
| "learning_rate": 2.2972178447300305e-05, | |
| "loss": 1.5525, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.8779443254817987, | |
| "grad_norm": 0.44992372464956326, | |
| "learning_rate": 2.2948075400951946e-05, | |
| "loss": 1.5927, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.8822269807280514, | |
| "grad_norm": 0.5250810847046828, | |
| "learning_rate": 2.2923842754339696e-05, | |
| "loss": 1.5617, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.8865096359743041, | |
| "grad_norm": 0.43126340615021524, | |
| "learning_rate": 2.2899480808050724e-05, | |
| "loss": 1.6348, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.8907922912205567, | |
| "grad_norm": 0.5913654606733179, | |
| "learning_rate": 2.2874989864276058e-05, | |
| "loss": 1.5646, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.8950749464668094, | |
| "grad_norm": 0.5253786434201022, | |
| "learning_rate": 2.2850370226806846e-05, | |
| "loss": 1.5984, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.8993576017130621, | |
| "grad_norm": 0.4009456934819743, | |
| "learning_rate": 2.2825622201030572e-05, | |
| "loss": 1.5283, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.9036402569593148, | |
| "grad_norm": 0.5333990945105044, | |
| "learning_rate": 2.280074609392729e-05, | |
| "loss": 1.5867, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.9079229122055674, | |
| "grad_norm": 0.3887789131541451, | |
| "learning_rate": 2.2775742214065786e-05, | |
| "loss": 1.3414, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.9122055674518201, | |
| "grad_norm": 0.5198803692192113, | |
| "learning_rate": 2.2750610871599782e-05, | |
| "loss": 1.5405, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.9164882226980728, | |
| "grad_norm": 0.3926454337534817, | |
| "learning_rate": 2.2725352378264074e-05, | |
| "loss": 1.509, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.9207708779443254, | |
| "grad_norm": 0.42675935243666635, | |
| "learning_rate": 2.2699967047370656e-05, | |
| "loss": 1.5438, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.9250535331905781, | |
| "grad_norm": 0.3709378032432874, | |
| "learning_rate": 2.2674455193804857e-05, | |
| "loss": 1.6725, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.9293361884368309, | |
| "grad_norm": 0.40669761633617474, | |
| "learning_rate": 2.26488171340214e-05, | |
| "loss": 1.485, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.9336188436830836, | |
| "grad_norm": 0.41102950360303664, | |
| "learning_rate": 2.2623053186040533e-05, | |
| "loss": 1.6809, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.9379014989293362, | |
| "grad_norm": 0.40461859144094875, | |
| "learning_rate": 2.259716366944401e-05, | |
| "loss": 1.4951, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.9421841541755889, | |
| "grad_norm": 0.3897126856825778, | |
| "learning_rate": 2.25711489053712e-05, | |
| "loss": 1.5844, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.9464668094218416, | |
| "grad_norm": 0.42222904373725634, | |
| "learning_rate": 2.2545009216515038e-05, | |
| "loss": 1.4944, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.9507494646680942, | |
| "grad_norm": 0.40547118703731166, | |
| "learning_rate": 2.2518744927118085e-05, | |
| "loss": 1.5574, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.9550321199143469, | |
| "grad_norm": 0.3513543405028927, | |
| "learning_rate": 2.2492356362968452e-05, | |
| "loss": 1.4118, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.9593147751605996, | |
| "grad_norm": 0.48633500004889796, | |
| "learning_rate": 2.2465843851395796e-05, | |
| "loss": 1.5477, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.9635974304068522, | |
| "grad_norm": 0.3590985254593397, | |
| "learning_rate": 2.2439207721267236e-05, | |
| "loss": 1.4816, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.9678800856531049, | |
| "grad_norm": 0.3702932493860504, | |
| "learning_rate": 2.2412448302983286e-05, | |
| "loss": 1.5548, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.9721627408993576, | |
| "grad_norm": 0.40425531625329014, | |
| "learning_rate": 2.2385565928473758e-05, | |
| "loss": 1.6429, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.9764453961456103, | |
| "grad_norm": 0.4058276769467583, | |
| "learning_rate": 2.2358560931193636e-05, | |
| "loss": 1.4335, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.9807280513918629, | |
| "grad_norm": 0.3312315245440172, | |
| "learning_rate": 2.2331433646118946e-05, | |
| "loss": 1.3716, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.9850107066381156, | |
| "grad_norm": 0.45936537843711933, | |
| "learning_rate": 2.2304184409742602e-05, | |
| "loss": 1.6051, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.9892933618843683, | |
| "grad_norm": 0.41972232909317975, | |
| "learning_rate": 2.227681356007022e-05, | |
| "loss": 1.5685, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.9935760171306209, | |
| "grad_norm": 0.3634109524654273, | |
| "learning_rate": 2.224932143661594e-05, | |
| "loss": 1.5598, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.9978586723768736, | |
| "grad_norm": 0.45907719960230176, | |
| "learning_rate": 2.222170838039822e-05, | |
| "loss": 1.5116, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.45907719960230176, | |
| "learning_rate": 2.2193974733935573e-05, | |
| "loss": 1.6087, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.0042826552462527, | |
| "grad_norm": 0.5861411036899304, | |
| "learning_rate": 2.216612084124236e-05, | |
| "loss": 1.3689, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.0085653104925054, | |
| "grad_norm": 0.4289041901369656, | |
| "learning_rate": 2.213814704782449e-05, | |
| "loss": 1.5579, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.0128479657387581, | |
| "grad_norm": 0.6259476055605661, | |
| "learning_rate": 2.2110053700675153e-05, | |
| "loss": 1.4052, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.0171306209850106, | |
| "grad_norm": 0.38820446634590455, | |
| "learning_rate": 2.2081841148270517e-05, | |
| "loss": 1.4333, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.0214132762312633, | |
| "grad_norm": 0.5061006213518089, | |
| "learning_rate": 2.205350974056538e-05, | |
| "loss": 1.356, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.025695931477516, | |
| "grad_norm": 0.3610425739202918, | |
| "learning_rate": 2.2025059828988873e-05, | |
| "loss": 1.3948, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.0299785867237687, | |
| "grad_norm": 0.423679381495652, | |
| "learning_rate": 2.1996491766440047e-05, | |
| "loss": 1.3546, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.0342612419700214, | |
| "grad_norm": 0.34897540177436914, | |
| "learning_rate": 2.196780590728355e-05, | |
| "loss": 1.4721, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.0385438972162742, | |
| "grad_norm": 0.425385319438199, | |
| "learning_rate": 2.193900260734519e-05, | |
| "loss": 1.4658, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.0428265524625269, | |
| "grad_norm": 0.3792487113919495, | |
| "learning_rate": 2.191008222390754e-05, | |
| "loss": 1.4699, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.0471092077087794, | |
| "grad_norm": 0.40281504819932906, | |
| "learning_rate": 2.188104511570551e-05, | |
| "loss": 1.3331, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.051391862955032, | |
| "grad_norm": 0.395699301044668, | |
| "learning_rate": 2.1851891642921875e-05, | |
| "loss": 1.4023, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.0556745182012848, | |
| "grad_norm": 0.37492910340499946, | |
| "learning_rate": 2.1822622167182837e-05, | |
| "loss": 1.4737, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.0599571734475375, | |
| "grad_norm": 0.3952955885524941, | |
| "learning_rate": 2.1793237051553516e-05, | |
| "loss": 1.3771, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.0642398286937902, | |
| "grad_norm": 0.3870229140110392, | |
| "learning_rate": 2.176373666053346e-05, | |
| "loss": 1.4438, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.068522483940043, | |
| "grad_norm": 0.40050568793681735, | |
| "learning_rate": 2.1734121360052117e-05, | |
| "loss": 1.3037, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.0728051391862956, | |
| "grad_norm": 0.36180001178651866, | |
| "learning_rate": 2.1704391517464297e-05, | |
| "loss": 1.4278, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.077087794432548, | |
| "grad_norm": 0.4411737907590586, | |
| "learning_rate": 2.1674547501545615e-05, | |
| "loss": 1.3945, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.0813704496788008, | |
| "grad_norm": 0.49343649178046994, | |
| "learning_rate": 2.164458968248792e-05, | |
| "loss": 1.3915, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.0856531049250535, | |
| "grad_norm": 0.3296867039273728, | |
| "learning_rate": 2.16145184318947e-05, | |
| "loss": 1.3265, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.0899357601713062, | |
| "grad_norm": 0.39840035584346023, | |
| "learning_rate": 2.158433412277647e-05, | |
| "loss": 1.3751, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.094218415417559, | |
| "grad_norm": 0.3633584286546075, | |
| "learning_rate": 2.1554037129546153e-05, | |
| "loss": 1.354, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.0985010706638116, | |
| "grad_norm": 0.4160505299653988, | |
| "learning_rate": 2.152362782801443e-05, | |
| "loss": 1.4007, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.1027837259100641, | |
| "grad_norm": 0.41007015982955497, | |
| "learning_rate": 2.1493106595385075e-05, | |
| "loss": 1.5213, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.1070663811563168, | |
| "grad_norm": 0.4650280917344183, | |
| "learning_rate": 2.1462473810250283e-05, | |
| "loss": 1.3312, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.1113490364025695, | |
| "grad_norm": 0.4266636624788006, | |
| "learning_rate": 2.1431729852585973e-05, | |
| "loss": 1.4889, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.1156316916488223, | |
| "grad_norm": 0.3484736446907606, | |
| "learning_rate": 2.140087510374707e-05, | |
| "loss": 1.3312, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.119914346895075, | |
| "grad_norm": 0.41911843923802033, | |
| "learning_rate": 2.1369909946462785e-05, | |
| "loss": 1.3692, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.1241970021413277, | |
| "grad_norm": 0.3732407300025524, | |
| "learning_rate": 2.1338834764831845e-05, | |
| "loss": 1.3838, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.1284796573875804, | |
| "grad_norm": 0.38178586641917484, | |
| "learning_rate": 2.1307649944317757e-05, | |
| "loss": 1.2793, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.132762312633833, | |
| "grad_norm": 0.3673713909731938, | |
| "learning_rate": 2.1276355871744014e-05, | |
| "loss": 1.4399, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.1370449678800856, | |
| "grad_norm": 0.3901268012108484, | |
| "learning_rate": 2.124495293528928e-05, | |
| "loss": 1.4587, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.1413276231263383, | |
| "grad_norm": 0.3360533239959902, | |
| "learning_rate": 2.121344152448261e-05, | |
| "loss": 1.243, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.145610278372591, | |
| "grad_norm": 0.3771399946534415, | |
| "learning_rate": 2.118182203019859e-05, | |
| "loss": 1.3957, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.1498929336188437, | |
| "grad_norm": 0.4880244995913143, | |
| "learning_rate": 2.1150094844652493e-05, | |
| "loss": 1.3888, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.1541755888650964, | |
| "grad_norm": 0.3578978890422881, | |
| "learning_rate": 2.1118260361395428e-05, | |
| "loss": 1.4619, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.1584582441113491, | |
| "grad_norm": 0.4201377835773034, | |
| "learning_rate": 2.108631897530945e-05, | |
| "loss": 1.4785, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.1627408993576016, | |
| "grad_norm": 0.4499980376910688, | |
| "learning_rate": 2.1054271082602646e-05, | |
| "loss": 1.4159, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.1670235546038543, | |
| "grad_norm": 0.3320870014261129, | |
| "learning_rate": 2.102211708080425e-05, | |
| "loss": 1.3894, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.171306209850107, | |
| "grad_norm": 0.42013650446350975, | |
| "learning_rate": 2.0989857368759686e-05, | |
| "loss": 1.3316, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.1755888650963597, | |
| "grad_norm": 0.35386203059819066, | |
| "learning_rate": 2.0957492346625647e-05, | |
| "loss": 1.4005, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.1798715203426124, | |
| "grad_norm": 0.3484835954332615, | |
| "learning_rate": 2.0925022415865093e-05, | |
| "loss": 1.275, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.1841541755888652, | |
| "grad_norm": 0.4266307426695914, | |
| "learning_rate": 2.0892447979242314e-05, | |
| "loss": 1.3413, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.1884368308351179, | |
| "grad_norm": 0.4145417718791916, | |
| "learning_rate": 2.085976944081791e-05, | |
| "loss": 1.4286, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.1927194860813706, | |
| "grad_norm": 0.4464633405061637, | |
| "learning_rate": 2.0826987205943772e-05, | |
| "loss": 1.4146, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.197002141327623, | |
| "grad_norm": 0.3813440974126778, | |
| "learning_rate": 2.0794101681258077e-05, | |
| "loss": 1.4651, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.2012847965738758, | |
| "grad_norm": 0.37367647405069787, | |
| "learning_rate": 2.0761113274680227e-05, | |
| "loss": 1.3905, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.2055674518201285, | |
| "grad_norm": 0.4209973043589035, | |
| "learning_rate": 2.0728022395405794e-05, | |
| "loss": 1.3164, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.2098501070663812, | |
| "grad_norm": 0.35285764889842397, | |
| "learning_rate": 2.069482945390145e-05, | |
| "loss": 1.3184, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.214132762312634, | |
| "grad_norm": 0.6553038505857459, | |
| "learning_rate": 2.0661534861899858e-05, | |
| "loss": 1.2821, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.2184154175588866, | |
| "grad_norm": 0.4444549917679711, | |
| "learning_rate": 2.0628139032394582e-05, | |
| "loss": 1.3502, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.222698072805139, | |
| "grad_norm": 0.3352896065598441, | |
| "learning_rate": 2.0594642379634972e-05, | |
| "loss": 1.4577, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.2269807280513918, | |
| "grad_norm": 0.47069617049270435, | |
| "learning_rate": 2.0561045319120986e-05, | |
| "loss": 1.4025, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.2312633832976445, | |
| "grad_norm": 0.3991774380744109, | |
| "learning_rate": 2.0527348267598085e-05, | |
| "loss": 1.3674, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.2355460385438972, | |
| "grad_norm": 0.45298444147723504, | |
| "learning_rate": 2.049355164305203e-05, | |
| "loss": 1.2552, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.23982869379015, | |
| "grad_norm": 0.33638821026760457, | |
| "learning_rate": 2.0459655864703708e-05, | |
| "loss": 1.2414, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.2441113490364026, | |
| "grad_norm": 0.4270670356767359, | |
| "learning_rate": 2.0425661353003932e-05, | |
| "loss": 1.261, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.2483940042826553, | |
| "grad_norm": 0.40636537980947196, | |
| "learning_rate": 2.0391568529628237e-05, | |
| "loss": 1.3725, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.252676659528908, | |
| "grad_norm": 0.36195547030323016, | |
| "learning_rate": 2.035737781747162e-05, | |
| "loss": 1.3342, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.2569593147751605, | |
| "grad_norm": 0.3539734470288324, | |
| "learning_rate": 2.0323089640643326e-05, | |
| "loss": 1.2697, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.2612419700214133, | |
| "grad_norm": 0.3540155063008326, | |
| "learning_rate": 2.0288704424461565e-05, | |
| "loss": 1.3329, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.265524625267666, | |
| "grad_norm": 0.4090169739563911, | |
| "learning_rate": 2.0254222595448248e-05, | |
| "loss": 1.4402, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.2698072805139187, | |
| "grad_norm": 0.4193574818141074, | |
| "learning_rate": 2.0219644581323698e-05, | |
| "loss": 1.3086, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.2740899357601714, | |
| "grad_norm": 0.38365729947629434, | |
| "learning_rate": 2.0184970811001337e-05, | |
| "loss": 1.4018, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.2783725910064239, | |
| "grad_norm": 0.4219737883083424, | |
| "learning_rate": 2.0150201714582356e-05, | |
| "loss": 1.3844, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.2826552462526766, | |
| "grad_norm": 0.43507834104776355, | |
| "learning_rate": 2.011533772335041e-05, | |
| "loss": 1.3706, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.2869379014989293, | |
| "grad_norm": 0.4133280809903553, | |
| "learning_rate": 2.008037926976625e-05, | |
| "loss": 1.376, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.291220556745182, | |
| "grad_norm": 0.36852825890998525, | |
| "learning_rate": 2.0045326787462333e-05, | |
| "loss": 1.328, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.2955032119914347, | |
| "grad_norm": 0.4205230066377953, | |
| "learning_rate": 2.001018071123751e-05, | |
| "loss": 1.2974, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.2997858672376874, | |
| "grad_norm": 0.4329679857419846, | |
| "learning_rate": 1.9974941477051558e-05, | |
| "loss": 1.3526, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.3040685224839401, | |
| "grad_norm": 0.3705004730863205, | |
| "learning_rate": 1.9939609522019818e-05, | |
| "loss": 1.2298, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.3083511777301928, | |
| "grad_norm": 0.39436925521218896, | |
| "learning_rate": 1.9904185284407772e-05, | |
| "loss": 1.3945, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.3126338329764453, | |
| "grad_norm": 0.35298924796738734, | |
| "learning_rate": 1.986866920362558e-05, | |
| "loss": 1.3016, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.316916488222698, | |
| "grad_norm": 0.3894071215590034, | |
| "learning_rate": 1.9833061720222647e-05, | |
| "loss": 1.2325, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.3211991434689507, | |
| "grad_norm": 0.3213378234068627, | |
| "learning_rate": 1.9797363275882165e-05, | |
| "loss": 1.2817, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.3254817987152034, | |
| "grad_norm": 0.4084287292776311, | |
| "learning_rate": 1.9761574313415617e-05, | |
| "loss": 1.4881, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.3297644539614561, | |
| "grad_norm": 0.40532300063738275, | |
| "learning_rate": 1.9725695276757302e-05, | |
| "loss": 1.4029, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.3340471092077089, | |
| "grad_norm": 0.3507190637097869, | |
| "learning_rate": 1.9689726610958814e-05, | |
| "loss": 1.4194, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.3383297644539613, | |
| "grad_norm": 0.3805072033067047, | |
| "learning_rate": 1.9653668762183526e-05, | |
| "loss": 1.3264, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.342612419700214, | |
| "grad_norm": 0.3367128120964735, | |
| "learning_rate": 1.9617522177701058e-05, | |
| "loss": 1.3298, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.3468950749464668, | |
| "grad_norm": 0.3977736636900147, | |
| "learning_rate": 1.9581287305881733e-05, | |
| "loss": 1.3487, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.3511777301927195, | |
| "grad_norm": 0.3236399137428874, | |
| "learning_rate": 1.9544964596190996e-05, | |
| "loss": 1.2795, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.3554603854389722, | |
| "grad_norm": 0.4410261852426088, | |
| "learning_rate": 1.9508554499183867e-05, | |
| "loss": 1.2954, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.359743040685225, | |
| "grad_norm": 0.33824185574060495, | |
| "learning_rate": 1.9472057466499332e-05, | |
| "loss": 1.2966, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.3640256959314776, | |
| "grad_norm": 0.5560403035800862, | |
| "learning_rate": 1.9435473950854745e-05, | |
| "loss": 1.4434, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.3683083511777303, | |
| "grad_norm": 0.36625625108883125, | |
| "learning_rate": 1.939880440604021e-05, | |
| "loss": 1.2226, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.3725910064239828, | |
| "grad_norm": 0.35699181136533303, | |
| "learning_rate": 1.9362049286912976e-05, | |
| "loss": 1.2464, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.3768736616702355, | |
| "grad_norm": 0.3813490989402076, | |
| "learning_rate": 1.9325209049391745e-05, | |
| "loss": 1.3279, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.3811563169164882, | |
| "grad_norm": 0.37459529309165335, | |
| "learning_rate": 1.9288284150451075e-05, | |
| "loss": 1.4422, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.385438972162741, | |
| "grad_norm": 0.39667372726355776, | |
| "learning_rate": 1.9251275048115664e-05, | |
| "loss": 1.5061, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.3897216274089936, | |
| "grad_norm": 0.34082355171490486, | |
| "learning_rate": 1.9214182201454695e-05, | |
| "loss": 1.3049, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.3940042826552461, | |
| "grad_norm": 0.4260735758035037, | |
| "learning_rate": 1.917700607057613e-05, | |
| "loss": 1.3912, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.3982869379014988, | |
| "grad_norm": 0.4021033157629882, | |
| "learning_rate": 1.9139747116621015e-05, | |
| "loss": 1.4421, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.4025695931477515, | |
| "grad_norm": 0.4034799522400383, | |
| "learning_rate": 1.910240580175775e-05, | |
| "loss": 1.3598, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.4068522483940042, | |
| "grad_norm": 0.44358114185104625, | |
| "learning_rate": 1.906498258917635e-05, | |
| "loss": 1.4136, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.411134903640257, | |
| "grad_norm": 0.3945332504871927, | |
| "learning_rate": 1.9027477943082713e-05, | |
| "loss": 1.2517, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.4154175588865097, | |
| "grad_norm": 0.3778742839914516, | |
| "learning_rate": 1.8989892328692864e-05, | |
| "loss": 1.333, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.4197002141327624, | |
| "grad_norm": 0.3796237837136356, | |
| "learning_rate": 1.895222621222716e-05, | |
| "loss": 1.3931, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.423982869379015, | |
| "grad_norm": 0.38301575785071823, | |
| "learning_rate": 1.8914480060904537e-05, | |
| "loss": 1.424, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.4282655246252678, | |
| "grad_norm": 0.421930928101693, | |
| "learning_rate": 1.88766543429367e-05, | |
| "loss": 1.402, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.4325481798715203, | |
| "grad_norm": 0.3699757863435036, | |
| "learning_rate": 1.8838749527522315e-05, | |
| "loss": 1.4079, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.436830835117773, | |
| "grad_norm": 0.42666319657235885, | |
| "learning_rate": 1.8800766084841183e-05, | |
| "loss": 1.3614, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.4411134903640257, | |
| "grad_norm": 0.35291694731273704, | |
| "learning_rate": 1.8762704486048427e-05, | |
| "loss": 1.3407, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.4453961456102784, | |
| "grad_norm": 0.37044240049931565, | |
| "learning_rate": 1.872456520326863e-05, | |
| "loss": 1.3531, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.4496788008565311, | |
| "grad_norm": 0.374037870809853, | |
| "learning_rate": 1.8686348709589982e-05, | |
| "loss": 1.4962, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.4539614561027836, | |
| "grad_norm": 0.39143283644429916, | |
| "learning_rate": 1.8648055479058422e-05, | |
| "loss": 1.3451, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.4582441113490363, | |
| "grad_norm": 0.3862274046133055, | |
| "learning_rate": 1.8609685986671744e-05, | |
| "loss": 1.4157, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.462526766595289, | |
| "grad_norm": 0.32589359289541453, | |
| "learning_rate": 1.8571240708373707e-05, | |
| "loss": 1.3611, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.4668094218415417, | |
| "grad_norm": 0.38467743700470014, | |
| "learning_rate": 1.853272012104815e-05, | |
| "loss": 1.4441, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.4710920770877944, | |
| "grad_norm": 0.3740956575298423, | |
| "learning_rate": 1.849412470251305e-05, | |
| "loss": 1.4004, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.4753747323340471, | |
| "grad_norm": 0.30848423646912154, | |
| "learning_rate": 1.8455454931514605e-05, | |
| "loss": 1.262, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.4796573875802999, | |
| "grad_norm": 0.3740097120746422, | |
| "learning_rate": 1.8416711287721303e-05, | |
| "loss": 1.2179, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.4839400428265526, | |
| "grad_norm": 0.4082863839360843, | |
| "learning_rate": 1.8377894251717974e-05, | |
| "loss": 1.4259, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.4882226980728053, | |
| "grad_norm": 0.3948652596870541, | |
| "learning_rate": 1.8339004304999806e-05, | |
| "loss": 1.3442, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.4925053533190578, | |
| "grad_norm": 0.4678512487151559, | |
| "learning_rate": 1.8300041929966404e-05, | |
| "loss": 1.4306, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.4967880085653105, | |
| "grad_norm": 0.45548221851750526, | |
| "learning_rate": 1.8261007609915773e-05, | |
| "loss": 1.3257, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.5010706638115632, | |
| "grad_norm": 0.3961504677246392, | |
| "learning_rate": 1.8221901829038347e-05, | |
| "loss": 1.4226, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.5053533190578159, | |
| "grad_norm": 0.48575304661026586, | |
| "learning_rate": 1.818272507241099e-05, | |
| "loss": 1.3101, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.5096359743040684, | |
| "grad_norm": 0.4223474689775986, | |
| "learning_rate": 1.8143477825990938e-05, | |
| "loss": 1.3738, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.513918629550321, | |
| "grad_norm": 0.4328835573924883, | |
| "learning_rate": 1.8104160576609828e-05, | |
| "loss": 1.4613, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.5182012847965738, | |
| "grad_norm": 0.36894215625076815, | |
| "learning_rate": 1.80647738119676e-05, | |
| "loss": 1.4421, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.5224839400428265, | |
| "grad_norm": 0.42960329602264624, | |
| "learning_rate": 1.8025318020626497e-05, | |
| "loss": 1.4449, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.5267665952890792, | |
| "grad_norm": 0.4381808830561339, | |
| "learning_rate": 1.7985793692004983e-05, | |
| "loss": 1.3895, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.531049250535332, | |
| "grad_norm": 0.511639740310659, | |
| "learning_rate": 1.7946201316371665e-05, | |
| "loss": 1.5033, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.5353319057815846, | |
| "grad_norm": 0.30935207991898406, | |
| "learning_rate": 1.7906541384839226e-05, | |
| "loss": 1.2179, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.5396145610278373, | |
| "grad_norm": 0.5149363491855712, | |
| "learning_rate": 1.7866814389358323e-05, | |
| "loss": 1.3692, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.54389721627409, | |
| "grad_norm": 0.3768568355085642, | |
| "learning_rate": 1.7827020822711493e-05, | |
| "loss": 1.4404, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.5481798715203428, | |
| "grad_norm": 0.5075668454602467, | |
| "learning_rate": 1.7787161178507045e-05, | |
| "loss": 1.4351, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.5524625267665952, | |
| "grad_norm": 0.429005671047687, | |
| "learning_rate": 1.7747235951172908e-05, | |
| "loss": 1.2954, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.556745182012848, | |
| "grad_norm": 0.4773307561454311, | |
| "learning_rate": 1.7707245635950536e-05, | |
| "loss": 1.3229, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.5610278372591007, | |
| "grad_norm": 0.46224461269568345, | |
| "learning_rate": 1.7667190728888743e-05, | |
| "loss": 1.4701, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.5653104925053534, | |
| "grad_norm": 0.4398714446841838, | |
| "learning_rate": 1.7627071726837556e-05, | |
| "loss": 1.3617, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.5695931477516059, | |
| "grad_norm": 0.3774107684610511, | |
| "learning_rate": 1.7586889127442045e-05, | |
| "loss": 1.3137, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.5738758029978586, | |
| "grad_norm": 0.4646696934362882, | |
| "learning_rate": 1.754664342913616e-05, | |
| "loss": 1.3487, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.5781584582441113, | |
| "grad_norm": 0.3570064846109861, | |
| "learning_rate": 1.7506335131136548e-05, | |
| "loss": 1.3087, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.582441113490364, | |
| "grad_norm": 0.4493705452348863, | |
| "learning_rate": 1.7465964733436342e-05, | |
| "loss": 1.5064, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.5867237687366167, | |
| "grad_norm": 0.35347935083263654, | |
| "learning_rate": 1.7425532736798994e-05, | |
| "loss": 1.354, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.5910064239828694, | |
| "grad_norm": 0.38802945271200445, | |
| "learning_rate": 1.7385039642752026e-05, | |
| "loss": 1.3905, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.595289079229122, | |
| "grad_norm": 0.3971847941983123, | |
| "learning_rate": 1.7344485953580834e-05, | |
| "loss": 1.3172, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.5995717344753748, | |
| "grad_norm": 0.4063900151850949, | |
| "learning_rate": 1.730387217232245e-05, | |
| "loss": 1.3902, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.6038543897216275, | |
| "grad_norm": 0.3482101582890047, | |
| "learning_rate": 1.72631988027593e-05, | |
| "loss": 1.4267, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.6081370449678802, | |
| "grad_norm": 0.3907023409634497, | |
| "learning_rate": 1.7222466349412953e-05, | |
| "loss": 1.3657, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.6124197002141327, | |
| "grad_norm": 0.39648365466974855, | |
| "learning_rate": 1.718167531753787e-05, | |
| "loss": 1.3757, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.6167023554603854, | |
| "grad_norm": 0.3482003705389042, | |
| "learning_rate": 1.7140826213115134e-05, | |
| "loss": 1.3889, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.6209850107066381, | |
| "grad_norm": 0.43357670792552266, | |
| "learning_rate": 1.7099919542846174e-05, | |
| "loss": 1.3975, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.6252676659528906, | |
| "grad_norm": 0.344012746609685, | |
| "learning_rate": 1.705895581414647e-05, | |
| "loss": 1.3761, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.6295503211991433, | |
| "grad_norm": 0.3912736883863624, | |
| "learning_rate": 1.7017935535139286e-05, | |
| "loss": 1.2256, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.633832976445396, | |
| "grad_norm": 0.32389309159432333, | |
| "learning_rate": 1.697685921464932e-05, | |
| "loss": 1.2611, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.6381156316916488, | |
| "grad_norm": 0.3808112089261434, | |
| "learning_rate": 1.6935727362196453e-05, | |
| "loss": 1.3773, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.6423982869379015, | |
| "grad_norm": 0.3815707909378436, | |
| "learning_rate": 1.6894540487989374e-05, | |
| "loss": 1.4341, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.6466809421841542, | |
| "grad_norm": 0.3707311578105496, | |
| "learning_rate": 1.6853299102919278e-05, | |
| "loss": 1.3912, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.6509635974304069, | |
| "grad_norm": 0.3477881955581895, | |
| "learning_rate": 1.681200371855354e-05, | |
| "loss": 1.4454, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.6552462526766596, | |
| "grad_norm": 0.3749155440303463, | |
| "learning_rate": 1.6770654847129336e-05, | |
| "loss": 1.3565, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.6595289079229123, | |
| "grad_norm": 0.37356126951976065, | |
| "learning_rate": 1.6729253001547313e-05, | |
| "loss": 1.2841, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.663811563169165, | |
| "grad_norm": 0.3479511050011833, | |
| "learning_rate": 1.6687798695365224e-05, | |
| "loss": 1.3371, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.6680942184154177, | |
| "grad_norm": 0.3581912213414331, | |
| "learning_rate": 1.6646292442791557e-05, | |
| "loss": 1.232, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.6723768736616702, | |
| "grad_norm": 0.3215446113048358, | |
| "learning_rate": 1.6604734758679147e-05, | |
| "loss": 1.3963, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.676659528907923, | |
| "grad_norm": 0.4376359515021747, | |
| "learning_rate": 1.6563126158518806e-05, | |
| "loss": 1.3747, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.6809421841541756, | |
| "grad_norm": 0.3060677115981459, | |
| "learning_rate": 1.6521467158432916e-05, | |
| "loss": 1.3455, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.685224839400428, | |
| "grad_norm": 0.39842372210368826, | |
| "learning_rate": 1.647975827516902e-05, | |
| "loss": 1.3162, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.6895074946466808, | |
| "grad_norm": 0.32860459996161495, | |
| "learning_rate": 1.6438000026093447e-05, | |
| "loss": 1.4114, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.6937901498929335, | |
| "grad_norm": 0.42177195772773357, | |
| "learning_rate": 1.6396192929184852e-05, | |
| "loss": 1.3835, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.6980728051391862, | |
| "grad_norm": 0.37483985613490883, | |
| "learning_rate": 1.6354337503027817e-05, | |
| "loss": 1.4495, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.702355460385439, | |
| "grad_norm": 0.3287442844969753, | |
| "learning_rate": 1.6312434266806406e-05, | |
| "loss": 1.3417, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.7066381156316917, | |
| "grad_norm": 0.3409487933679222, | |
| "learning_rate": 1.627048374029773e-05, | |
| "loss": 1.3727, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.7109207708779444, | |
| "grad_norm": 0.364966633180017, | |
| "learning_rate": 1.622848644386551e-05, | |
| "loss": 1.3445, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.715203426124197, | |
| "grad_norm": 0.40782880089567125, | |
| "learning_rate": 1.6186442898453593e-05, | |
| "loss": 1.4314, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.7194860813704498, | |
| "grad_norm": 0.35338981155106325, | |
| "learning_rate": 1.614435362557953e-05, | |
| "loss": 1.2992, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.7237687366167025, | |
| "grad_norm": 0.3458710703190408, | |
| "learning_rate": 1.6102219147328064e-05, | |
| "loss": 1.2444, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.728051391862955, | |
| "grad_norm": 0.34047208337511875, | |
| "learning_rate": 1.6060039986344692e-05, | |
| "loss": 1.3841, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.7323340471092077, | |
| "grad_norm": 0.34973667960604016, | |
| "learning_rate": 1.601781666582916e-05, | |
| "loss": 1.3197, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.7366167023554604, | |
| "grad_norm": 0.3619484642212399, | |
| "learning_rate": 1.5975549709528977e-05, | |
| "loss": 1.3597, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.740899357601713, | |
| "grad_norm": 0.3485323431598921, | |
| "learning_rate": 1.593323964173292e-05, | |
| "loss": 1.3541, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.7451820128479656, | |
| "grad_norm": 0.3722079995799495, | |
| "learning_rate": 1.5890886987264536e-05, | |
| "loss": 1.3639, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.7494646680942183, | |
| "grad_norm": 0.32734387518519825, | |
| "learning_rate": 1.5848492271475622e-05, | |
| "loss": 1.4136, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.753747323340471, | |
| "grad_norm": 0.3864261811647076, | |
| "learning_rate": 1.5806056020239714e-05, | |
| "loss": 1.4231, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.7580299785867237, | |
| "grad_norm": 0.341163146089911, | |
| "learning_rate": 1.576357875994556e-05, | |
| "loss": 1.3912, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.7623126338329764, | |
| "grad_norm": 0.4322424139588224, | |
| "learning_rate": 1.5721061017490594e-05, | |
| "loss": 1.3543, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.7665952890792291, | |
| "grad_norm": 0.3430090140811513, | |
| "learning_rate": 1.5678503320274407e-05, | |
| "loss": 1.4195, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.7708779443254818, | |
| "grad_norm": 0.39442054888019096, | |
| "learning_rate": 1.5635906196192194e-05, | |
| "loss": 1.3609, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.7751605995717346, | |
| "grad_norm": 0.39246818337147305, | |
| "learning_rate": 1.5593270173628208e-05, | |
| "loss": 1.3496, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.7794432548179873, | |
| "grad_norm": 0.3896357465642991, | |
| "learning_rate": 1.5550595781449205e-05, | |
| "loss": 1.2962, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.78372591006424, | |
| "grad_norm": 0.40875227853762397, | |
| "learning_rate": 1.550788354899789e-05, | |
| "loss": 1.2827, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.7880085653104925, | |
| "grad_norm": 0.32384312840403434, | |
| "learning_rate": 1.5465134006086347e-05, | |
| "loss": 1.4018, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.7922912205567452, | |
| "grad_norm": 0.4319845932792659, | |
| "learning_rate": 1.5422347682989467e-05, | |
| "loss": 1.2837, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.7965738758029979, | |
| "grad_norm": 0.4015204521770257, | |
| "learning_rate": 1.5379525110438374e-05, | |
| "loss": 1.445, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.8008565310492506, | |
| "grad_norm": 0.3636542581207264, | |
| "learning_rate": 1.5336666819613832e-05, | |
| "loss": 1.3278, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.805139186295503, | |
| "grad_norm": 0.42635584079656125, | |
| "learning_rate": 1.5293773342139662e-05, | |
| "loss": 1.3899, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.8094218415417558, | |
| "grad_norm": 0.3796172113574308, | |
| "learning_rate": 1.5250845210076151e-05, | |
| "loss": 1.2944, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.8137044967880085, | |
| "grad_norm": 0.4222877528683101, | |
| "learning_rate": 1.5207882955913457e-05, | |
| "loss": 1.4121, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.8179871520342612, | |
| "grad_norm": 0.6206094866942423, | |
| "learning_rate": 1.5164887112564985e-05, | |
| "loss": 1.3037, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.822269807280514, | |
| "grad_norm": 0.338186939979986, | |
| "learning_rate": 1.5121858213360793e-05, | |
| "loss": 1.4515, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.8265524625267666, | |
| "grad_norm": 0.42085883637300137, | |
| "learning_rate": 1.507879679204096e-05, | |
| "loss": 1.3801, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.8308351177730193, | |
| "grad_norm": 0.33029638552346774, | |
| "learning_rate": 1.5035703382749e-05, | |
| "loss": 1.3197, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.835117773019272, | |
| "grad_norm": 0.3796212349112593, | |
| "learning_rate": 1.4992578520025194e-05, | |
| "loss": 1.3341, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.8394004282655247, | |
| "grad_norm": 0.3416764792743133, | |
| "learning_rate": 1.4949422738799982e-05, | |
| "loss": 1.2933, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.8436830835117775, | |
| "grad_norm": 0.37923918821239594, | |
| "learning_rate": 1.4906236574387326e-05, | |
| "loss": 1.3359, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.84796573875803, | |
| "grad_norm": 0.30907027792758374, | |
| "learning_rate": 1.4863020562478064e-05, | |
| "loss": 1.2737, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.8522483940042827, | |
| "grad_norm": 0.3903264898543205, | |
| "learning_rate": 1.4819775239133283e-05, | |
| "loss": 1.3131, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.8565310492505354, | |
| "grad_norm": 0.3841336756186868, | |
| "learning_rate": 1.4776501140777637e-05, | |
| "loss": 1.3649, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.8608137044967878, | |
| "grad_norm": 0.4074493999576374, | |
| "learning_rate": 1.4733198804192724e-05, | |
| "loss": 1.2991, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.8650963597430406, | |
| "grad_norm": 0.3855125688098399, | |
| "learning_rate": 1.4689868766510406e-05, | |
| "loss": 1.3823, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.8693790149892933, | |
| "grad_norm": 0.37126874922918807, | |
| "learning_rate": 1.4646511565206164e-05, | |
| "loss": 1.3426, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.873661670235546, | |
| "grad_norm": 0.3714258164077467, | |
| "learning_rate": 1.4603127738092423e-05, | |
| "loss": 1.2718, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.8779443254817987, | |
| "grad_norm": 0.3429261958678687, | |
| "learning_rate": 1.455971782331187e-05, | |
| "loss": 1.3858, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.8822269807280514, | |
| "grad_norm": 0.38495602247470384, | |
| "learning_rate": 1.4516282359330801e-05, | |
| "loss": 1.2777, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.886509635974304, | |
| "grad_norm": 0.3699329784967151, | |
| "learning_rate": 1.4472821884932426e-05, | |
| "loss": 1.3578, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.8907922912205568, | |
| "grad_norm": 0.3599785136664482, | |
| "learning_rate": 1.442933693921018e-05, | |
| "loss": 1.416, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.8950749464668095, | |
| "grad_norm": 0.33538664994930595, | |
| "learning_rate": 1.4385828061561066e-05, | |
| "loss": 1.3407, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.8993576017130622, | |
| "grad_norm": 0.36336031298257154, | |
| "learning_rate": 1.434229579167893e-05, | |
| "loss": 1.2169, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.903640256959315, | |
| "grad_norm": 0.31518334287029476, | |
| "learning_rate": 1.429874066954778e-05, | |
| "loss": 1.3974, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.9079229122055674, | |
| "grad_norm": 0.380470589989531, | |
| "learning_rate": 1.425516323543509e-05, | |
| "loss": 1.3915, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.9122055674518201, | |
| "grad_norm": 0.3510136894640434, | |
| "learning_rate": 1.4211564029885102e-05, | |
| "loss": 1.3113, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.9164882226980728, | |
| "grad_norm": 0.34050831451001196, | |
| "learning_rate": 1.4167943593712113e-05, | |
| "loss": 1.3751, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.9207708779443253, | |
| "grad_norm": 0.3583661125603097, | |
| "learning_rate": 1.4124302467993769e-05, | |
| "loss": 1.3255, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.925053533190578, | |
| "grad_norm": 0.3389101579476846, | |
| "learning_rate": 1.4080641194064348e-05, | |
| "loss": 1.4168, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.9293361884368307, | |
| "grad_norm": 0.3834913291170707, | |
| "learning_rate": 1.403696031350806e-05, | |
| "loss": 1.3644, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.9336188436830835, | |
| "grad_norm": 0.4308322141053784, | |
| "learning_rate": 1.3993260368152317e-05, | |
| "loss": 1.4786, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.9379014989293362, | |
| "grad_norm": 0.3537841876121041, | |
| "learning_rate": 1.3949541900061014e-05, | |
| "loss": 1.2849, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.9421841541755889, | |
| "grad_norm": 0.3739024334028022, | |
| "learning_rate": 1.3905805451527806e-05, | |
| "loss": 1.2974, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.9464668094218416, | |
| "grad_norm": 0.3756096151923131, | |
| "learning_rate": 1.386205156506938e-05, | |
| "loss": 1.2532, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.9507494646680943, | |
| "grad_norm": 0.3642163049913141, | |
| "learning_rate": 1.381828078341873e-05, | |
| "loss": 1.3066, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.955032119914347, | |
| "grad_norm": 0.4016856878315503, | |
| "learning_rate": 1.3774493649518424e-05, | |
| "loss": 1.3514, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.9593147751605997, | |
| "grad_norm": 0.3570908964430489, | |
| "learning_rate": 1.373069070651386e-05, | |
| "loss": 1.3798, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.9635974304068522, | |
| "grad_norm": 0.4546768723455663, | |
| "learning_rate": 1.3686872497746539e-05, | |
| "loss": 1.2297, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.967880085653105, | |
| "grad_norm": 0.39770363928777963, | |
| "learning_rate": 1.364303956674732e-05, | |
| "loss": 1.3251, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.9721627408993576, | |
| "grad_norm": 0.4625841972208585, | |
| "learning_rate": 1.359919245722969e-05, | |
| "loss": 1.4199, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.9764453961456103, | |
| "grad_norm": 0.4133274366928544, | |
| "learning_rate": 1.3555331713082991e-05, | |
| "loss": 1.3047, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.9807280513918628, | |
| "grad_norm": 0.4108939632332837, | |
| "learning_rate": 1.351145787836571e-05, | |
| "loss": 1.3929, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.9850107066381155, | |
| "grad_norm": 0.37835291483581496, | |
| "learning_rate": 1.3467571497298703e-05, | |
| "loss": 1.1941, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.9892933618843682, | |
| "grad_norm": 0.37813972695047565, | |
| "learning_rate": 1.342367311425845e-05, | |
| "loss": 1.4973, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.993576017130621, | |
| "grad_norm": 1.6403016895398341, | |
| "learning_rate": 1.3379763273770324e-05, | |
| "loss": 1.3624, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.9978586723768736, | |
| "grad_norm": 0.4830892612436795, | |
| "learning_rate": 1.3335842520501795e-05, | |
| "loss": 1.302, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.6829899377473765, | |
| "learning_rate": 1.3291911399255713e-05, | |
| "loss": 1.2285, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 2.0042826552462527, | |
| "grad_norm": 0.8136378650415125, | |
| "learning_rate": 1.3247970454963531e-05, | |
| "loss": 1.1863, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 2.0085653104925054, | |
| "grad_norm": 0.6124913953543332, | |
| "learning_rate": 1.3204020232678549e-05, | |
| "loss": 1.1323, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.012847965738758, | |
| "grad_norm": 0.9415264304617837, | |
| "learning_rate": 1.3160061277569156e-05, | |
| "loss": 1.1341, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 2.017130620985011, | |
| "grad_norm": 0.5598470498427739, | |
| "learning_rate": 1.3116094134912055e-05, | |
| "loss": 1.0978, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 2.0214132762312635, | |
| "grad_norm": 0.5199782381878686, | |
| "learning_rate": 1.3072119350085524e-05, | |
| "loss": 1.15, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 2.0256959314775163, | |
| "grad_norm": 0.4796395014344232, | |
| "learning_rate": 1.3028137468562624e-05, | |
| "loss": 1.2802, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 2.0299785867237685, | |
| "grad_norm": 0.4542325665519593, | |
| "learning_rate": 1.2984149035904447e-05, | |
| "loss": 1.0659, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.0342612419700212, | |
| "grad_norm": 0.4431903012032383, | |
| "learning_rate": 1.2940154597753356e-05, | |
| "loss": 1.0986, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 2.038543897216274, | |
| "grad_norm": 0.46952279850037054, | |
| "learning_rate": 1.2896154699826201e-05, | |
| "loss": 1.1216, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 2.0428265524625266, | |
| "grad_norm": 0.45033430393074514, | |
| "learning_rate": 1.2852149887907553e-05, | |
| "loss": 1.1881, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 2.0471092077087794, | |
| "grad_norm": 0.4606628838219141, | |
| "learning_rate": 1.2808140707842936e-05, | |
| "loss": 1.0762, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 2.051391862955032, | |
| "grad_norm": 0.4522706754261223, | |
| "learning_rate": 1.276412770553207e-05, | |
| "loss": 1.1182, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.0556745182012848, | |
| "grad_norm": 0.4275410449005914, | |
| "learning_rate": 1.2720111426922072e-05, | |
| "loss": 1.1262, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 2.0599571734475375, | |
| "grad_norm": 0.4117922922818347, | |
| "learning_rate": 1.2676092418000709e-05, | |
| "loss": 1.0937, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 2.06423982869379, | |
| "grad_norm": 0.4076420511090681, | |
| "learning_rate": 1.2632071224789613e-05, | |
| "loss": 1.1588, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 2.068522483940043, | |
| "grad_norm": 0.39985814020478855, | |
| "learning_rate": 1.2588048393337503e-05, | |
| "loss": 1.2315, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 2.0728051391862956, | |
| "grad_norm": 0.41357202132909343, | |
| "learning_rate": 1.2544024469713437e-05, | |
| "loss": 1.1924, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 2.0770877944325483, | |
| "grad_norm": 0.39780940223532485, | |
| "learning_rate": 1.25e-05, | |
| "loss": 1.1816, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 2.081370449678801, | |
| "grad_norm": 0.42899527932620385, | |
| "learning_rate": 1.245597553028657e-05, | |
| "loss": 1.1841, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 2.0856531049250537, | |
| "grad_norm": 0.39083398721432966, | |
| "learning_rate": 1.2411951606662498e-05, | |
| "loss": 1.1098, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 2.089935760171306, | |
| "grad_norm": 0.43420822774302814, | |
| "learning_rate": 1.2367928775210393e-05, | |
| "loss": 1.1627, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 2.0942184154175587, | |
| "grad_norm": 0.3732705280561028, | |
| "learning_rate": 1.2323907581999292e-05, | |
| "loss": 1.129, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.0985010706638114, | |
| "grad_norm": 0.41632399144455645, | |
| "learning_rate": 1.2279888573077935e-05, | |
| "loss": 0.9738, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 2.102783725910064, | |
| "grad_norm": 0.38659287989811325, | |
| "learning_rate": 1.2235872294467934e-05, | |
| "loss": 1.1593, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 2.107066381156317, | |
| "grad_norm": 0.3920026187084851, | |
| "learning_rate": 1.2191859292157066e-05, | |
| "loss": 1.0827, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 2.1113490364025695, | |
| "grad_norm": 0.3994514767198869, | |
| "learning_rate": 1.2147850112092448e-05, | |
| "loss": 1.1405, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 2.1156316916488223, | |
| "grad_norm": 0.43445357298460374, | |
| "learning_rate": 1.2103845300173801e-05, | |
| "loss": 1.0986, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 2.119914346895075, | |
| "grad_norm": 0.4042400771293127, | |
| "learning_rate": 1.2059845402246642e-05, | |
| "loss": 1.1418, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 2.1241970021413277, | |
| "grad_norm": 0.3788718739976897, | |
| "learning_rate": 1.2015850964095555e-05, | |
| "loss": 1.1349, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 2.1284796573875804, | |
| "grad_norm": 0.3821076969792679, | |
| "learning_rate": 1.197186253143738e-05, | |
| "loss": 1.2081, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 2.132762312633833, | |
| "grad_norm": 0.4411851187923958, | |
| "learning_rate": 1.192788064991448e-05, | |
| "loss": 1.1522, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 2.137044967880086, | |
| "grad_norm": 0.404962832392533, | |
| "learning_rate": 1.1883905865087944e-05, | |
| "loss": 1.1383, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.1413276231263385, | |
| "grad_norm": 0.39962573083698255, | |
| "learning_rate": 1.1839938722430849e-05, | |
| "loss": 1.0717, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 2.145610278372591, | |
| "grad_norm": 0.4004973819254198, | |
| "learning_rate": 1.1795979767321451e-05, | |
| "loss": 1.2155, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.1498929336188435, | |
| "grad_norm": 0.42839296529898985, | |
| "learning_rate": 1.175202954503647e-05, | |
| "loss": 1.1801, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 2.154175588865096, | |
| "grad_norm": 0.39581686357900003, | |
| "learning_rate": 1.1708088600744292e-05, | |
| "loss": 1.1871, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.158458244111349, | |
| "grad_norm": 0.3515337940814968, | |
| "learning_rate": 1.166415747949821e-05, | |
| "loss": 1.0689, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 2.1627408993576016, | |
| "grad_norm": 0.38280355472311695, | |
| "learning_rate": 1.1620236726229684e-05, | |
| "loss": 1.1653, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.1670235546038543, | |
| "grad_norm": 0.3601455061997376, | |
| "learning_rate": 1.157632688574155e-05, | |
| "loss": 1.1316, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 2.171306209850107, | |
| "grad_norm": 0.4036025468502878, | |
| "learning_rate": 1.1532428502701303e-05, | |
| "loss": 1.1332, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.1755888650963597, | |
| "grad_norm": 0.3689501638767867, | |
| "learning_rate": 1.1488542121634292e-05, | |
| "loss": 1.1398, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 2.1798715203426124, | |
| "grad_norm": 0.44516877676862204, | |
| "learning_rate": 1.1444668286917013e-05, | |
| "loss": 1.1009, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.184154175588865, | |
| "grad_norm": 0.35171086043635746, | |
| "learning_rate": 1.1400807542770314e-05, | |
| "loss": 1.1452, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 2.188436830835118, | |
| "grad_norm": 0.37133980314166626, | |
| "learning_rate": 1.135696043325268e-05, | |
| "loss": 1.1579, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.1927194860813706, | |
| "grad_norm": 0.34968878321273367, | |
| "learning_rate": 1.1313127502253462e-05, | |
| "loss": 1.1296, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 2.1970021413276233, | |
| "grad_norm": 0.35409451711365186, | |
| "learning_rate": 1.1269309293486144e-05, | |
| "loss": 1.149, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.201284796573876, | |
| "grad_norm": 0.39987353315213703, | |
| "learning_rate": 1.1225506350481577e-05, | |
| "loss": 1.0483, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 2.2055674518201283, | |
| "grad_norm": 0.37950153309424184, | |
| "learning_rate": 1.1181719216581272e-05, | |
| "loss": 1.123, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.209850107066381, | |
| "grad_norm": 0.3738479054688087, | |
| "learning_rate": 1.1137948434930622e-05, | |
| "loss": 1.1478, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 2.2141327623126337, | |
| "grad_norm": 0.37447253121660345, | |
| "learning_rate": 1.1094194548472197e-05, | |
| "loss": 1.1929, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.2184154175588864, | |
| "grad_norm": 0.36554010421344446, | |
| "learning_rate": 1.1050458099938985e-05, | |
| "loss": 1.1651, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 2.222698072805139, | |
| "grad_norm": 0.35742517390118567, | |
| "learning_rate": 1.1006739631847684e-05, | |
| "loss": 1.0415, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.226980728051392, | |
| "grad_norm": 0.3678474681557672, | |
| "learning_rate": 1.0963039686491942e-05, | |
| "loss": 1.0773, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 2.2312633832976445, | |
| "grad_norm": 0.35021617103631075, | |
| "learning_rate": 1.0919358805935653e-05, | |
| "loss": 1.0147, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.235546038543897, | |
| "grad_norm": 0.3725259580183268, | |
| "learning_rate": 1.0875697532006237e-05, | |
| "loss": 1.1326, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 2.23982869379015, | |
| "grad_norm": 0.36036157437462213, | |
| "learning_rate": 1.0832056406287888e-05, | |
| "loss": 1.1178, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.2441113490364026, | |
| "grad_norm": 0.38080054734059177, | |
| "learning_rate": 1.0788435970114902e-05, | |
| "loss": 1.2065, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.2483940042826553, | |
| "grad_norm": 0.3744350777602071, | |
| "learning_rate": 1.0744836764564914e-05, | |
| "loss": 1.1504, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.252676659528908, | |
| "grad_norm": 0.37119670203538174, | |
| "learning_rate": 1.0701259330452227e-05, | |
| "loss": 1.1754, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 2.2569593147751608, | |
| "grad_norm": 0.3450626261101503, | |
| "learning_rate": 1.0657704208321073e-05, | |
| "loss": 1.1758, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.2612419700214135, | |
| "grad_norm": 0.3761085257204848, | |
| "learning_rate": 1.0614171938438937e-05, | |
| "loss": 1.1058, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 2.265524625267666, | |
| "grad_norm": 0.3534345956983803, | |
| "learning_rate": 1.0570663060789819e-05, | |
| "loss": 1.0396, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.2698072805139184, | |
| "grad_norm": 0.3339089724596173, | |
| "learning_rate": 1.0527178115067577e-05, | |
| "loss": 1.0607, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 2.274089935760171, | |
| "grad_norm": 0.36758786848355013, | |
| "learning_rate": 1.0483717640669198e-05, | |
| "loss": 1.096, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.278372591006424, | |
| "grad_norm": 0.37103014849499344, | |
| "learning_rate": 1.0440282176688132e-05, | |
| "loss": 1.2022, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 2.2826552462526766, | |
| "grad_norm": 0.3933653572064292, | |
| "learning_rate": 1.0396872261907578e-05, | |
| "loss": 1.1886, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.2869379014989293, | |
| "grad_norm": 0.3478722741253696, | |
| "learning_rate": 1.0353488434793839e-05, | |
| "loss": 1.1061, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 2.291220556745182, | |
| "grad_norm": 0.38454344787523614, | |
| "learning_rate": 1.0310131233489595e-05, | |
| "loss": 1.1058, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.2955032119914347, | |
| "grad_norm": 0.3964599267526657, | |
| "learning_rate": 1.0266801195807279e-05, | |
| "loss": 1.1536, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 2.2997858672376874, | |
| "grad_norm": 0.3505311887204956, | |
| "learning_rate": 1.0223498859222367e-05, | |
| "loss": 1.005, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.30406852248394, | |
| "grad_norm": 0.42646591198465056, | |
| "learning_rate": 1.018022476086672e-05, | |
| "loss": 1.1385, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 2.308351177730193, | |
| "grad_norm": 0.3516417735648486, | |
| "learning_rate": 1.0136979437521937e-05, | |
| "loss": 1.1299, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.3126338329764455, | |
| "grad_norm": 0.37292041166385276, | |
| "learning_rate": 1.0093763425612677e-05, | |
| "loss": 1.1697, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 2.3169164882226982, | |
| "grad_norm": 0.37139285774167097, | |
| "learning_rate": 1.0050577261200025e-05, | |
| "loss": 1.0958, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.3211991434689505, | |
| "grad_norm": 0.36732514272211636, | |
| "learning_rate": 1.000742147997481e-05, | |
| "loss": 1.0663, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 2.325481798715203, | |
| "grad_norm": 0.425696024428236, | |
| "learning_rate": 9.964296617251004e-06, | |
| "loss": 1.0172, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.329764453961456, | |
| "grad_norm": 0.45633961518765603, | |
| "learning_rate": 9.92120320795904e-06, | |
| "loss": 1.2115, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 2.3340471092077086, | |
| "grad_norm": 0.42776392011984465, | |
| "learning_rate": 9.878141786639212e-06, | |
| "loss": 1.1263, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.3383297644539613, | |
| "grad_norm": 0.4063925688250011, | |
| "learning_rate": 9.835112887435014e-06, | |
| "loss": 1.1167, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 2.342612419700214, | |
| "grad_norm": 0.347005382841865, | |
| "learning_rate": 9.792117044086544e-06, | |
| "loss": 1.0471, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.3468950749464668, | |
| "grad_norm": 0.41426650830417605, | |
| "learning_rate": 9.749154789923847e-06, | |
| "loss": 1.2857, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 2.3511777301927195, | |
| "grad_norm": 0.3732639695626659, | |
| "learning_rate": 9.70622665786034e-06, | |
| "loss": 1.133, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.355460385438972, | |
| "grad_norm": 0.3953893693115576, | |
| "learning_rate": 9.663333180386169e-06, | |
| "loss": 1.1723, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 2.359743040685225, | |
| "grad_norm": 0.3945096837746996, | |
| "learning_rate": 9.620474889561629e-06, | |
| "loss": 1.1853, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.3640256959314776, | |
| "grad_norm": 0.353229521713685, | |
| "learning_rate": 9.57765231701053e-06, | |
| "loss": 1.224, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 2.3683083511777303, | |
| "grad_norm": 0.38038911754225274, | |
| "learning_rate": 9.534865993913656e-06, | |
| "loss": 1.0707, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.372591006423983, | |
| "grad_norm": 0.40137304773118665, | |
| "learning_rate": 9.492116451002114e-06, | |
| "loss": 1.0614, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 2.3768736616702357, | |
| "grad_norm": 0.3799373348779043, | |
| "learning_rate": 9.4494042185508e-06, | |
| "loss": 1.0317, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 2.3811563169164884, | |
| "grad_norm": 0.35846465331360783, | |
| "learning_rate": 9.4067298263718e-06, | |
| "loss": 1.0816, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 2.385438972162741, | |
| "grad_norm": 0.3892380274193281, | |
| "learning_rate": 9.364093803807807e-06, | |
| "loss": 1.0922, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 2.3897216274089934, | |
| "grad_norm": 0.40336093781540333, | |
| "learning_rate": 9.321496679725596e-06, | |
| "loss": 1.0938, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 2.394004282655246, | |
| "grad_norm": 0.3817697005333532, | |
| "learning_rate": 9.278938982509409e-06, | |
| "loss": 1.0803, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.398286937901499, | |
| "grad_norm": 0.3881301113148313, | |
| "learning_rate": 9.236421240054449e-06, | |
| "loss": 1.1377, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 2.4025695931477515, | |
| "grad_norm": 0.445891116690163, | |
| "learning_rate": 9.193943979760292e-06, | |
| "loss": 1.0991, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 2.4068522483940042, | |
| "grad_norm": 0.4010581039655185, | |
| "learning_rate": 9.151507728524382e-06, | |
| "loss": 1.041, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 2.411134903640257, | |
| "grad_norm": 0.3694140168350837, | |
| "learning_rate": 9.109113012735467e-06, | |
| "loss": 0.9861, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 2.4154175588865097, | |
| "grad_norm": 0.38742555130206846, | |
| "learning_rate": 9.066760358267081e-06, | |
| "loss": 1.0938, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 2.4197002141327624, | |
| "grad_norm": 0.3559783185134848, | |
| "learning_rate": 9.024450290471026e-06, | |
| "loss": 1.0395, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 2.423982869379015, | |
| "grad_norm": 0.3636369864702618, | |
| "learning_rate": 8.982183334170844e-06, | |
| "loss": 1.0933, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 2.428265524625268, | |
| "grad_norm": 0.35525649048200675, | |
| "learning_rate": 8.939960013655311e-06, | |
| "loss": 1.0766, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 2.4325481798715205, | |
| "grad_norm": 0.3775765508703813, | |
| "learning_rate": 8.897780852671939e-06, | |
| "loss": 1.0256, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 2.436830835117773, | |
| "grad_norm": 0.42139839896816106, | |
| "learning_rate": 8.855646374420472e-06, | |
| "loss": 1.1425, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.4411134903640255, | |
| "grad_norm": 0.3511194625690293, | |
| "learning_rate": 8.813557101546408e-06, | |
| "loss": 0.9875, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 2.445396145610278, | |
| "grad_norm": 0.35870293115859425, | |
| "learning_rate": 8.771513556134497e-06, | |
| "loss": 1.1143, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 2.449678800856531, | |
| "grad_norm": 0.3511476581215571, | |
| "learning_rate": 8.729516259702272e-06, | |
| "loss": 1.1216, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 2.4539614561027836, | |
| "grad_norm": 0.3896756471995198, | |
| "learning_rate": 8.6875657331936e-06, | |
| "loss": 1.2131, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 2.4582441113490363, | |
| "grad_norm": 0.346301000515738, | |
| "learning_rate": 8.645662496972186e-06, | |
| "loss": 1.1267, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 2.462526766595289, | |
| "grad_norm": 0.3279075184069246, | |
| "learning_rate": 8.603807070815152e-06, | |
| "loss": 1.0078, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 2.4668094218415417, | |
| "grad_norm": 0.3524877782412061, | |
| "learning_rate": 8.561999973906554e-06, | |
| "loss": 1.1589, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 2.4710920770877944, | |
| "grad_norm": 0.3744186526110544, | |
| "learning_rate": 8.520241724830983e-06, | |
| "loss": 1.1987, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 2.475374732334047, | |
| "grad_norm": 0.37193508975714884, | |
| "learning_rate": 8.478532841567089e-06, | |
| "loss": 1.143, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 2.4796573875803, | |
| "grad_norm": 0.3563664250992986, | |
| "learning_rate": 8.436873841481197e-06, | |
| "loss": 1.1024, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.4839400428265526, | |
| "grad_norm": 0.3621802163845544, | |
| "learning_rate": 8.395265241320852e-06, | |
| "loss": 1.1237, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 2.4882226980728053, | |
| "grad_norm": 0.3534462614928483, | |
| "learning_rate": 8.353707557208448e-06, | |
| "loss": 0.9731, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 2.492505353319058, | |
| "grad_norm": 0.3756351095987366, | |
| "learning_rate": 8.312201304634775e-06, | |
| "loss": 1.0517, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 2.4967880085653107, | |
| "grad_norm": 0.3810521940082933, | |
| "learning_rate": 8.270746998452688e-06, | |
| "loss": 1.0853, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 2.5010706638115634, | |
| "grad_norm": 0.39222567553145227, | |
| "learning_rate": 8.229345152870666e-06, | |
| "loss": 1.1764, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 2.505353319057816, | |
| "grad_norm": 0.3739366136336243, | |
| "learning_rate": 8.18799628144646e-06, | |
| "loss": 1.1238, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 2.5096359743040684, | |
| "grad_norm": 0.38711368859863554, | |
| "learning_rate": 8.14670089708072e-06, | |
| "loss": 1.1465, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 2.513918629550321, | |
| "grad_norm": 0.41098527253509576, | |
| "learning_rate": 8.105459512010629e-06, | |
| "loss": 1.041, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 2.518201284796574, | |
| "grad_norm": 0.406134178093035, | |
| "learning_rate": 8.064272637803553e-06, | |
| "loss": 1.1861, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 2.5224839400428265, | |
| "grad_norm": 0.3736862306104564, | |
| "learning_rate": 8.02314078535068e-06, | |
| "loss": 1.0904, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.526766595289079, | |
| "grad_norm": 0.3781276058410365, | |
| "learning_rate": 7.982064464860722e-06, | |
| "loss": 1.1083, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 2.531049250535332, | |
| "grad_norm": 0.40011911371910797, | |
| "learning_rate": 7.94104418585353e-06, | |
| "loss": 1.0687, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 2.5353319057815846, | |
| "grad_norm": 0.3683735339293543, | |
| "learning_rate": 7.90008045715383e-06, | |
| "loss": 1.1211, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 2.5396145610278373, | |
| "grad_norm": 0.3878127219742661, | |
| "learning_rate": 7.859173786884867e-06, | |
| "loss": 1.086, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 2.54389721627409, | |
| "grad_norm": 0.37501963993427256, | |
| "learning_rate": 7.818324682462135e-06, | |
| "loss": 1.0673, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 2.5481798715203428, | |
| "grad_norm": 0.37276593704270844, | |
| "learning_rate": 7.77753365058705e-06, | |
| "loss": 1.1055, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 2.552462526766595, | |
| "grad_norm": 0.38843603696651813, | |
| "learning_rate": 7.736801197240703e-06, | |
| "loss": 1.0339, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 2.5567451820128477, | |
| "grad_norm": 0.4110286435387141, | |
| "learning_rate": 7.696127827677551e-06, | |
| "loss": 1.0975, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 2.5610278372591004, | |
| "grad_norm": 0.3610377475070173, | |
| "learning_rate": 7.655514046419169e-06, | |
| "loss": 1.0753, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 2.565310492505353, | |
| "grad_norm": 0.46624031730321613, | |
| "learning_rate": 7.614960357247974e-06, | |
| "loss": 1.0819, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.569593147751606, | |
| "grad_norm": 0.35714403479890183, | |
| "learning_rate": 7.57446726320101e-06, | |
| "loss": 1.0661, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 2.5738758029978586, | |
| "grad_norm": 0.3537005412507155, | |
| "learning_rate": 7.534035266563657e-06, | |
| "loss": 1.0783, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 2.5781584582441113, | |
| "grad_norm": 0.3609965104402262, | |
| "learning_rate": 7.493664868863456e-06, | |
| "loss": 1.1183, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 2.582441113490364, | |
| "grad_norm": 0.3414893487662722, | |
| "learning_rate": 7.453356570863838e-06, | |
| "loss": 1.1513, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 2.5867237687366167, | |
| "grad_norm": 0.34768494822065116, | |
| "learning_rate": 7.413110872557957e-06, | |
| "loss": 1.075, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 2.5910064239828694, | |
| "grad_norm": 0.35110711512371934, | |
| "learning_rate": 7.372928273162444e-06, | |
| "loss": 1.0302, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 2.595289079229122, | |
| "grad_norm": 0.37389978926958345, | |
| "learning_rate": 7.332809271111258e-06, | |
| "loss": 1.127, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 2.599571734475375, | |
| "grad_norm": 0.36202234697320473, | |
| "learning_rate": 7.2927543640494675e-06, | |
| "loss": 1.0841, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 2.6038543897216275, | |
| "grad_norm": 0.3692912620672064, | |
| "learning_rate": 7.252764048827096e-06, | |
| "loss": 1.0937, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 2.6081370449678802, | |
| "grad_norm": 0.371407363782464, | |
| "learning_rate": 7.212838821492962e-06, | |
| "loss": 1.1222, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.612419700214133, | |
| "grad_norm": 0.34843882518833746, | |
| "learning_rate": 7.172979177288505e-06, | |
| "loss": 0.945, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 2.6167023554603857, | |
| "grad_norm": 0.3677558592711015, | |
| "learning_rate": 7.133185610641683e-06, | |
| "loss": 1.1127, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 2.6209850107066384, | |
| "grad_norm": 0.36958952805111067, | |
| "learning_rate": 7.0934586151607764e-06, | |
| "loss": 1.1137, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 2.6252676659528906, | |
| "grad_norm": 0.3474020257100841, | |
| "learning_rate": 7.053798683628335e-06, | |
| "loss": 0.9744, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 2.6295503211991433, | |
| "grad_norm": 0.3558866341734782, | |
| "learning_rate": 7.014206307995016e-06, | |
| "loss": 1.1125, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 2.633832976445396, | |
| "grad_norm": 0.3614597470882593, | |
| "learning_rate": 6.974681979373501e-06, | |
| "loss": 1.1009, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 2.6381156316916488, | |
| "grad_norm": 0.3714477690148325, | |
| "learning_rate": 6.935226188032401e-06, | |
| "loss": 0.9984, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 2.6423982869379015, | |
| "grad_norm": 0.3317262663806771, | |
| "learning_rate": 6.895839423390175e-06, | |
| "loss": 1.0966, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 2.646680942184154, | |
| "grad_norm": 0.36917263116104493, | |
| "learning_rate": 6.856522174009061e-06, | |
| "loss": 1.0764, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 2.650963597430407, | |
| "grad_norm": 0.3777881832761566, | |
| "learning_rate": 6.817274927589014e-06, | |
| "loss": 1.0345, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.6552462526766596, | |
| "grad_norm": 0.35567953357582066, | |
| "learning_rate": 6.7780981709616495e-06, | |
| "loss": 1.1184, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 2.6595289079229123, | |
| "grad_norm": 0.3719255516818532, | |
| "learning_rate": 6.738992390084232e-06, | |
| "loss": 1.1226, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 2.663811563169165, | |
| "grad_norm": 0.3829939577200986, | |
| "learning_rate": 6.699958070033596e-06, | |
| "loss": 1.0708, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 2.6680942184154177, | |
| "grad_norm": 0.36003883214692967, | |
| "learning_rate": 6.660995695000191e-06, | |
| "loss": 1.1787, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 2.67237687366167, | |
| "grad_norm": 0.3688924024392204, | |
| "learning_rate": 6.622105748282031e-06, | |
| "loss": 1.0507, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 2.6766595289079227, | |
| "grad_norm": 0.37105335768283265, | |
| "learning_rate": 6.583288712278697e-06, | |
| "loss": 1.0864, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 2.6809421841541754, | |
| "grad_norm": 0.3676936052384596, | |
| "learning_rate": 6.544545068485404e-06, | |
| "loss": 1.1649, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 2.685224839400428, | |
| "grad_norm": 0.35833428730388167, | |
| "learning_rate": 6.5058752974869545e-06, | |
| "loss": 1.0467, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 2.689507494646681, | |
| "grad_norm": 0.3560192973325353, | |
| "learning_rate": 6.4672798789518515e-06, | |
| "loss": 1.0385, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 2.6937901498929335, | |
| "grad_norm": 0.3422819495514087, | |
| "learning_rate": 6.428759291626294e-06, | |
| "loss": 1.0643, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.6980728051391862, | |
| "grad_norm": 0.3596524934289582, | |
| "learning_rate": 6.39031401332826e-06, | |
| "loss": 1.0874, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 2.702355460385439, | |
| "grad_norm": 0.3581329395952061, | |
| "learning_rate": 6.35194452094158e-06, | |
| "loss": 1.029, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 2.7066381156316917, | |
| "grad_norm": 0.3646878019734804, | |
| "learning_rate": 6.313651290410021e-06, | |
| "loss": 1.1463, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 2.7109207708779444, | |
| "grad_norm": 0.46965105187278144, | |
| "learning_rate": 6.2754347967313694e-06, | |
| "loss": 1.1599, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 2.715203426124197, | |
| "grad_norm": 0.35199634686850134, | |
| "learning_rate": 6.237295513951577e-06, | |
| "loss": 1.0447, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 2.71948608137045, | |
| "grad_norm": 0.3552040815294978, | |
| "learning_rate": 6.199233915158817e-06, | |
| "loss": 1.0355, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 2.7237687366167025, | |
| "grad_norm": 0.3701464344073716, | |
| "learning_rate": 6.161250472477692e-06, | |
| "loss": 1.1069, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 2.728051391862955, | |
| "grad_norm": 0.3481745786199797, | |
| "learning_rate": 6.123345657063299e-06, | |
| "loss": 1.0379, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 2.732334047109208, | |
| "grad_norm": 0.34908887773290137, | |
| "learning_rate": 6.085519939095463e-06, | |
| "loss": 1.0759, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 2.7366167023554606, | |
| "grad_norm": 0.406969071848584, | |
| "learning_rate": 6.047773787772843e-06, | |
| "loss": 1.1397, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.7408993576017133, | |
| "grad_norm": 0.369214552502764, | |
| "learning_rate": 6.01010767130714e-06, | |
| "loss": 1.1652, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 2.7451820128479656, | |
| "grad_norm": 0.35958281005557274, | |
| "learning_rate": 5.972522056917287e-06, | |
| "loss": 1.0651, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 2.7494646680942183, | |
| "grad_norm": 0.34773227498527454, | |
| "learning_rate": 5.9350174108236525e-06, | |
| "loss": 1.2105, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 2.753747323340471, | |
| "grad_norm": 0.3785529745910018, | |
| "learning_rate": 5.897594198242253e-06, | |
| "loss": 1.1186, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 2.7580299785867237, | |
| "grad_norm": 0.3476745823127357, | |
| "learning_rate": 5.860252883378986e-06, | |
| "loss": 1.1053, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 2.7623126338329764, | |
| "grad_norm": 0.35740833434939384, | |
| "learning_rate": 5.822993929423872e-06, | |
| "loss": 1.156, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 2.766595289079229, | |
| "grad_norm": 0.3461287440443304, | |
| "learning_rate": 5.78581779854531e-06, | |
| "loss": 1.034, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 2.770877944325482, | |
| "grad_norm": 0.3484778190549007, | |
| "learning_rate": 5.748724951884339e-06, | |
| "loss": 1.147, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 2.7751605995717346, | |
| "grad_norm": 0.3463824371518374, | |
| "learning_rate": 5.711715849548924e-06, | |
| "loss": 1.2487, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 2.7794432548179873, | |
| "grad_norm": 0.3609765242563188, | |
| "learning_rate": 5.674790950608257e-06, | |
| "loss": 1.0038, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.78372591006424, | |
| "grad_norm": 0.3678624338311653, | |
| "learning_rate": 5.6379507130870245e-06, | |
| "loss": 1.1145, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 2.7880085653104922, | |
| "grad_norm": 0.35376315009965914, | |
| "learning_rate": 5.601195593959788e-06, | |
| "loss": 1.0577, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 2.792291220556745, | |
| "grad_norm": 0.3363214828483723, | |
| "learning_rate": 5.5645260491452575e-06, | |
| "loss": 1.0486, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 2.7965738758029977, | |
| "grad_norm": 0.3622636185655521, | |
| "learning_rate": 5.52794253350067e-06, | |
| "loss": 1.0547, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 2.8008565310492504, | |
| "grad_norm": 0.5067875911549902, | |
| "learning_rate": 5.491445500816134e-06, | |
| "loss": 1.1395, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 2.805139186295503, | |
| "grad_norm": 0.34289895282316957, | |
| "learning_rate": 5.4550354038090055e-06, | |
| "loss": 1.1781, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 2.809421841541756, | |
| "grad_norm": 0.35445697790502123, | |
| "learning_rate": 5.41871269411827e-06, | |
| "loss": 1.1037, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 2.8137044967880085, | |
| "grad_norm": 0.360842710721591, | |
| "learning_rate": 5.3824778222989424e-06, | |
| "loss": 1.1276, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 2.817987152034261, | |
| "grad_norm": 0.3432929406538927, | |
| "learning_rate": 5.346331237816477e-06, | |
| "loss": 1.0847, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 2.822269807280514, | |
| "grad_norm": 0.34235194233646365, | |
| "learning_rate": 5.31027338904119e-06, | |
| "loss": 1.099, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.8265524625267666, | |
| "grad_norm": 0.3494573350685968, | |
| "learning_rate": 5.274304723242701e-06, | |
| "loss": 1.0714, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 2.8308351177730193, | |
| "grad_norm": 0.36423601172734904, | |
| "learning_rate": 5.238425686584383e-06, | |
| "loss": 1.0917, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 2.835117773019272, | |
| "grad_norm": 0.3390326644331241, | |
| "learning_rate": 5.2026367241178415e-06, | |
| "loss": 1.0927, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 2.8394004282655247, | |
| "grad_norm": 0.3389574380550951, | |
| "learning_rate": 5.166938279777356e-06, | |
| "loss": 1.0654, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 2.8436830835117775, | |
| "grad_norm": 0.3558059969945493, | |
| "learning_rate": 5.131330796374428e-06, | |
| "loss": 1.2394, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 2.84796573875803, | |
| "grad_norm": 0.3449281004788474, | |
| "learning_rate": 5.095814715592229e-06, | |
| "loss": 1.104, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 2.852248394004283, | |
| "grad_norm": 0.5741950084872994, | |
| "learning_rate": 5.060390477980181e-06, | |
| "loss": 1.1246, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 2.8565310492505356, | |
| "grad_norm": 0.3518602777082471, | |
| "learning_rate": 5.0250585229484445e-06, | |
| "loss": 1.0384, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 2.860813704496788, | |
| "grad_norm": 0.33201611617766386, | |
| "learning_rate": 4.9898192887624946e-06, | |
| "loss": 0.99, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 2.8650963597430406, | |
| "grad_norm": 0.33654063236244514, | |
| "learning_rate": 4.954673212537668e-06, | |
| "loss": 1.0835, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.8693790149892933, | |
| "grad_norm": 0.35749153943774153, | |
| "learning_rate": 4.9196207302337564e-06, | |
| "loss": 1.238, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 2.873661670235546, | |
| "grad_norm": 0.3963712296443138, | |
| "learning_rate": 4.884662276649588e-06, | |
| "loss": 1.0847, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 2.8779443254817987, | |
| "grad_norm": 0.33900776494342877, | |
| "learning_rate": 4.8497982854176475e-06, | |
| "loss": 0.9872, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 2.8822269807280514, | |
| "grad_norm": 0.3390240674831931, | |
| "learning_rate": 4.8150291889986655e-06, | |
| "loss": 1.1353, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 2.886509635974304, | |
| "grad_norm": 0.3789710837716194, | |
| "learning_rate": 4.780355418676305e-06, | |
| "loss": 1.1636, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 2.890792291220557, | |
| "grad_norm": 0.3773675590887804, | |
| "learning_rate": 4.745777404551755e-06, | |
| "loss": 1.1598, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 2.8950749464668095, | |
| "grad_norm": 0.350034350612991, | |
| "learning_rate": 4.711295575538437e-06, | |
| "loss": 0.9807, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 2.8993576017130622, | |
| "grad_norm": 0.35389009806788396, | |
| "learning_rate": 4.6769103593566805e-06, | |
| "loss": 1.1225, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 2.903640256959315, | |
| "grad_norm": 0.3480099705955127, | |
| "learning_rate": 4.6426221825283804e-06, | |
| "loss": 1.0797, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 2.907922912205567, | |
| "grad_norm": 0.4017077706255267, | |
| "learning_rate": 4.608431470371764e-06, | |
| "loss": 1.0613, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.91220556745182, | |
| "grad_norm": 0.3918078161458431, | |
| "learning_rate": 4.574338646996068e-06, | |
| "loss": 1.1085, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 2.9164882226980726, | |
| "grad_norm": 0.32920278218913035, | |
| "learning_rate": 4.540344135296296e-06, | |
| "loss": 0.9627, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 2.9207708779443253, | |
| "grad_norm": 0.3684497632182809, | |
| "learning_rate": 4.506448356947973e-06, | |
| "loss": 1.1601, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 2.925053533190578, | |
| "grad_norm": 0.3433737649981929, | |
| "learning_rate": 4.4726517324019165e-06, | |
| "loss": 1.0455, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 2.9293361884368307, | |
| "grad_norm": 0.35325748706550913, | |
| "learning_rate": 4.438954680879015e-06, | |
| "loss": 1.0403, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 2.9336188436830835, | |
| "grad_norm": 0.34196653123502885, | |
| "learning_rate": 4.405357620365032e-06, | |
| "loss": 1.2242, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 2.937901498929336, | |
| "grad_norm": 0.3473358887939904, | |
| "learning_rate": 4.371860967605413e-06, | |
| "loss": 0.9848, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 2.942184154175589, | |
| "grad_norm": 0.3408666843863744, | |
| "learning_rate": 4.338465138100147e-06, | |
| "loss": 1.0415, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 2.9464668094218416, | |
| "grad_norm": 0.3480886088157686, | |
| "learning_rate": 4.305170546098551e-06, | |
| "loss": 1.0479, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 2.9507494646680943, | |
| "grad_norm": 0.35083424116981776, | |
| "learning_rate": 4.271977604594206e-06, | |
| "loss": 1.1681, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.955032119914347, | |
| "grad_norm": 0.35317744200985374, | |
| "learning_rate": 4.238886725319774e-06, | |
| "loss": 1.1004, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 2.9593147751605997, | |
| "grad_norm": 0.36992718168834315, | |
| "learning_rate": 4.205898318741925e-06, | |
| "loss": 1.1501, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 2.9635974304068524, | |
| "grad_norm": 0.368258055811205, | |
| "learning_rate": 4.173012794056235e-06, | |
| "loss": 1.0589, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 2.967880085653105, | |
| "grad_norm": 0.3542218292326262, | |
| "learning_rate": 4.1402305591820945e-06, | |
| "loss": 1.1059, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 2.972162740899358, | |
| "grad_norm": 0.34221816300659097, | |
| "learning_rate": 4.107552020757688e-06, | |
| "loss": 0.9976, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 2.9764453961456105, | |
| "grad_norm": 0.3798509842359927, | |
| "learning_rate": 4.07497758413491e-06, | |
| "loss": 1.0692, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 2.980728051391863, | |
| "grad_norm": 0.3371568887516198, | |
| "learning_rate": 4.0425076533743585e-06, | |
| "loss": 1.1132, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 2.9850107066381155, | |
| "grad_norm": 0.34200886091760746, | |
| "learning_rate": 4.010142631240317e-06, | |
| "loss": 1.1367, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 2.9892933618843682, | |
| "grad_norm": 0.3874331285336969, | |
| "learning_rate": 3.977882919195755e-06, | |
| "loss": 1.1251, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 2.993576017130621, | |
| "grad_norm": 0.6572496407131426, | |
| "learning_rate": 3.945728917397355e-06, | |
| "loss": 1.1292, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.9978586723768736, | |
| "grad_norm": 0.8967911622926727, | |
| "learning_rate": 3.913681024690556e-06, | |
| "loss": 1.2485, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.8967911622926727, | |
| "learning_rate": 3.88173963860457e-06, | |
| "loss": 1.1349, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 3.0042826552462527, | |
| "grad_norm": 0.7045871892163175, | |
| "learning_rate": 3.849905155347512e-06, | |
| "loss": 0.919, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 3.0085653104925054, | |
| "grad_norm": 0.8731451662221503, | |
| "learning_rate": 3.818177969801412e-06, | |
| "loss": 0.9352, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 3.012847965738758, | |
| "grad_norm": 0.5862193210847736, | |
| "learning_rate": 3.7865584755173907e-06, | |
| "loss": 0.8273, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 3.017130620985011, | |
| "grad_norm": 0.4530975739265527, | |
| "learning_rate": 3.7550470647107205e-06, | |
| "loss": 0.8568, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 3.0214132762312635, | |
| "grad_norm": 0.775182178811676, | |
| "learning_rate": 3.723644128255989e-06, | |
| "loss": 0.8563, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 3.0256959314775163, | |
| "grad_norm": 0.8036787462194873, | |
| "learning_rate": 3.6923500556822433e-06, | |
| "loss": 0.9373, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 3.0299785867237685, | |
| "grad_norm": 0.938179132189991, | |
| "learning_rate": 3.6611652351681568e-06, | |
| "loss": 0.9144, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 3.0342612419700212, | |
| "grad_norm": 0.533475946230046, | |
| "learning_rate": 3.630090053537219e-06, | |
| "loss": 0.9413, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 3.038543897216274, | |
| "grad_norm": 0.4769499859035611, | |
| "learning_rate": 3.5991248962529313e-06, | |
| "loss": 0.8983, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 3.0428265524625266, | |
| "grad_norm": 0.7175275279939133, | |
| "learning_rate": 3.568270147414031e-06, | |
| "loss": 1.0184, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 3.0471092077087794, | |
| "grad_norm": 0.6710751659916476, | |
| "learning_rate": 3.5375261897497208e-06, | |
| "loss": 0.8867, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 3.051391862955032, | |
| "grad_norm": 0.5533721206962046, | |
| "learning_rate": 3.5068934046149303e-06, | |
| "loss": 0.9861, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 3.0556745182012848, | |
| "grad_norm": 0.5096487279270119, | |
| "learning_rate": 3.47637217198557e-06, | |
| "loss": 0.9957, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 3.0599571734475375, | |
| "grad_norm": 0.392777064751308, | |
| "learning_rate": 3.4459628704538503e-06, | |
| "loss": 0.8717, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 3.06423982869379, | |
| "grad_norm": 0.5848251912632335, | |
| "learning_rate": 3.41566587722353e-06, | |
| "loss": 0.9097, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 3.068522483940043, | |
| "grad_norm": 0.6598671290081435, | |
| "learning_rate": 3.3854815681053045e-06, | |
| "loss": 0.8214, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 3.0728051391862956, | |
| "grad_norm": 0.5792866171130799, | |
| "learning_rate": 3.355410317512081e-06, | |
| "loss": 0.939, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 3.0770877944325483, | |
| "grad_norm": 0.5597042015871566, | |
| "learning_rate": 3.3254524984543858e-06, | |
| "loss": 0.973, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 3.081370449678801, | |
| "grad_norm": 0.43120275321986723, | |
| "learning_rate": 3.2956084825357046e-06, | |
| "loss": 0.9494, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 3.0856531049250537, | |
| "grad_norm": 0.43798987398686245, | |
| "learning_rate": 3.265878639947885e-06, | |
| "loss": 0.9386, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 3.089935760171306, | |
| "grad_norm": 0.5043861622984578, | |
| "learning_rate": 3.2362633394665414e-06, | |
| "loss": 0.8571, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 3.0942184154175587, | |
| "grad_norm": 0.47877006992255494, | |
| "learning_rate": 3.206762948446486e-06, | |
| "loss": 0.8921, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 3.0985010706638114, | |
| "grad_norm": 0.48382021072189335, | |
| "learning_rate": 3.177377832817163e-06, | |
| "loss": 0.9232, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 3.102783725910064, | |
| "grad_norm": 0.4428791922415224, | |
| "learning_rate": 3.148108357078128e-06, | |
| "loss": 0.8745, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 3.107066381156317, | |
| "grad_norm": 0.3690822664254283, | |
| "learning_rate": 3.118954884294495e-06, | |
| "loss": 0.9788, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 3.1113490364025695, | |
| "grad_norm": 0.43897184340546713, | |
| "learning_rate": 3.0899177760924616e-06, | |
| "loss": 0.9244, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 3.1156316916488223, | |
| "grad_norm": 0.4770826738507552, | |
| "learning_rate": 3.060997392654813e-06, | |
| "loss": 0.8922, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 3.119914346895075, | |
| "grad_norm": 0.4254105042307734, | |
| "learning_rate": 3.032194092716449e-06, | |
| "loss": 0.8362, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 3.1241970021413277, | |
| "grad_norm": 0.4468366976539863, | |
| "learning_rate": 3.0035082335599555e-06, | |
| "loss": 0.87, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 3.1284796573875804, | |
| "grad_norm": 0.4429010036845597, | |
| "learning_rate": 2.9749401710111286e-06, | |
| "loss": 0.9305, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 3.132762312633833, | |
| "grad_norm": 0.4127010809706913, | |
| "learning_rate": 2.9464902594346185e-06, | |
| "loss": 0.9775, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 3.137044967880086, | |
| "grad_norm": 0.4086014968435575, | |
| "learning_rate": 2.9181588517294857e-06, | |
| "loss": 0.999, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 3.1413276231263385, | |
| "grad_norm": 0.3990791790573375, | |
| "learning_rate": 2.8899462993248473e-06, | |
| "loss": 0.9982, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 3.145610278372591, | |
| "grad_norm": 0.39305406800729714, | |
| "learning_rate": 2.861852952175513e-06, | |
| "loss": 0.8755, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 3.1498929336188435, | |
| "grad_norm": 0.42386938503526844, | |
| "learning_rate": 2.8338791587576435e-06, | |
| "loss": 0.9166, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 3.154175588865096, | |
| "grad_norm": 0.39610798719172463, | |
| "learning_rate": 2.80602526606443e-06, | |
| "loss": 0.8548, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 3.158458244111349, | |
| "grad_norm": 0.39866226920058223, | |
| "learning_rate": 2.7782916196017846e-06, | |
| "loss": 0.9252, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 3.1627408993576016, | |
| "grad_norm": 0.37822843502350373, | |
| "learning_rate": 2.7506785633840583e-06, | |
| "loss": 0.9459, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 3.1670235546038543, | |
| "grad_norm": 0.3821806357973024, | |
| "learning_rate": 2.7231864399297856e-06, | |
| "loss": 0.8745, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 3.171306209850107, | |
| "grad_norm": 0.42244290458780526, | |
| "learning_rate": 2.6958155902574e-06, | |
| "loss": 0.8758, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 3.1755888650963597, | |
| "grad_norm": 0.3891254431155144, | |
| "learning_rate": 2.6685663538810536e-06, | |
| "loss": 0.8505, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 3.1798715203426124, | |
| "grad_norm": 0.40848076108585224, | |
| "learning_rate": 2.6414390688063687e-06, | |
| "loss": 0.9505, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 3.184154175588865, | |
| "grad_norm": 0.3911863355408845, | |
| "learning_rate": 2.6144340715262437e-06, | |
| "loss": 0.9777, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 3.188436830835118, | |
| "grad_norm": 0.3568604123347815, | |
| "learning_rate": 2.58755169701672e-06, | |
| "loss": 0.9195, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 3.1927194860813706, | |
| "grad_norm": 0.4017015638792494, | |
| "learning_rate": 2.560792278732768e-06, | |
| "loss": 0.9821, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 3.1970021413276233, | |
| "grad_norm": 0.4407901593054486, | |
| "learning_rate": 2.534156148604207e-06, | |
| "loss": 0.8664, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 3.201284796573876, | |
| "grad_norm": 0.3486898375672858, | |
| "learning_rate": 2.5076436370315496e-06, | |
| "loss": 0.9108, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 3.2055674518201283, | |
| "grad_norm": 0.38504490186433393, | |
| "learning_rate": 2.4812550728819188e-06, | |
| "loss": 0.9088, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 3.209850107066381, | |
| "grad_norm": 0.4564848674737477, | |
| "learning_rate": 2.4549907834849644e-06, | |
| "loss": 0.9815, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 3.2141327623126337, | |
| "grad_norm": 0.3627458052575124, | |
| "learning_rate": 2.4288510946288063e-06, | |
| "loss": 0.9947, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 3.2184154175588864, | |
| "grad_norm": 0.39127133347387394, | |
| "learning_rate": 2.4028363305559894e-06, | |
| "loss": 0.855, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 3.222698072805139, | |
| "grad_norm": 1.0193828924775918, | |
| "learning_rate": 2.3769468139594727e-06, | |
| "loss": 0.9804, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 3.226980728051392, | |
| "grad_norm": 0.37456178415299207, | |
| "learning_rate": 2.3511828659785975e-06, | |
| "loss": 0.9075, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 3.2312633832976445, | |
| "grad_norm": 0.39856388723773317, | |
| "learning_rate": 2.3255448061951514e-06, | |
| "loss": 0.8887, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 3.235546038543897, | |
| "grad_norm": 0.38837064304140856, | |
| "learning_rate": 2.3000329526293456e-06, | |
| "loss": 0.9574, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 3.23982869379015, | |
| "grad_norm": 0.40519546139819784, | |
| "learning_rate": 2.2746476217359285e-06, | |
| "loss": 0.9492, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 3.2441113490364026, | |
| "grad_norm": 0.37621301359779613, | |
| "learning_rate": 2.249389128400219e-06, | |
| "loss": 0.9414, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 3.2483940042826553, | |
| "grad_norm": 0.40315000345725827, | |
| "learning_rate": 2.224257785934217e-06, | |
| "loss": 0.8958, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 3.252676659528908, | |
| "grad_norm": 0.3767855628173954, | |
| "learning_rate": 2.1992539060727137e-06, | |
| "loss": 0.8632, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 3.2569593147751608, | |
| "grad_norm": 0.38794589527885637, | |
| "learning_rate": 2.1743777989694292e-06, | |
| "loss": 0.8607, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 3.2612419700214135, | |
| "grad_norm": 0.365416432156038, | |
| "learning_rate": 2.1496297731931557e-06, | |
| "loss": 0.9429, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 3.265524625267666, | |
| "grad_norm": 0.38839779583938294, | |
| "learning_rate": 2.1250101357239426e-06, | |
| "loss": 0.8837, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 3.2698072805139184, | |
| "grad_norm": 0.3983702485606278, | |
| "learning_rate": 2.1005191919492795e-06, | |
| "loss": 0.9003, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 3.274089935760171, | |
| "grad_norm": 0.36784959637004716, | |
| "learning_rate": 2.0761572456603066e-06, | |
| "loss": 0.9904, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 3.278372591006424, | |
| "grad_norm": 0.4086191337846277, | |
| "learning_rate": 2.051924599048058e-06, | |
| "loss": 0.9865, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 3.2826552462526766, | |
| "grad_norm": 0.385807346281981, | |
| "learning_rate": 2.027821552699695e-06, | |
| "loss": 0.8834, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 3.2869379014989293, | |
| "grad_norm": 0.38623842578363365, | |
| "learning_rate": 2.0038484055948076e-06, | |
| "loss": 0.8881, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 3.291220556745182, | |
| "grad_norm": 0.43545389555296216, | |
| "learning_rate": 1.9800054551016593e-06, | |
| "loss": 0.9753, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 3.2955032119914347, | |
| "grad_norm": 0.41514320812303884, | |
| "learning_rate": 1.9562929969735494e-06, | |
| "loss": 0.9497, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 3.2997858672376874, | |
| "grad_norm": 0.4038608215680401, | |
| "learning_rate": 1.93271132534511e-06, | |
| "loss": 0.8644, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 3.30406852248394, | |
| "grad_norm": 0.3644719902383785, | |
| "learning_rate": 1.909260732728668e-06, | |
| "loss": 0.9556, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 3.308351177730193, | |
| "grad_norm": 0.42036574911137053, | |
| "learning_rate": 1.885941510010622e-06, | |
| "loss": 0.8886, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 3.3126338329764455, | |
| "grad_norm": 0.42796972706377573, | |
| "learning_rate": 1.8627539464478219e-06, | |
| "loss": 0.9207, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 3.3169164882226982, | |
| "grad_norm": 0.42284493016560876, | |
| "learning_rate": 1.8396983296639928e-06, | |
| "loss": 0.9094, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 3.3211991434689505, | |
| "grad_norm": 0.34934919011874943, | |
| "learning_rate": 1.816774945646163e-06, | |
| "loss": 0.8775, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 3.325481798715203, | |
| "grad_norm": 0.6600800009141096, | |
| "learning_rate": 1.7939840787411135e-06, | |
| "loss": 1.0994, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 3.329764453961456, | |
| "grad_norm": 0.3976354396493046, | |
| "learning_rate": 1.771326011651854e-06, | |
| "loss": 0.9024, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 3.3340471092077086, | |
| "grad_norm": 0.376362118495897, | |
| "learning_rate": 1.7488010254341172e-06, | |
| "loss": 0.8615, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 3.3383297644539613, | |
| "grad_norm": 0.40607166419814433, | |
| "learning_rate": 1.7264093994928648e-06, | |
| "loss": 0.912, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 3.342612419700214, | |
| "grad_norm": 0.4191724820681144, | |
| "learning_rate": 1.7041514115788428e-06, | |
| "loss": 0.8292, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 3.3468950749464668, | |
| "grad_norm": 0.3781354302914862, | |
| "learning_rate": 1.6820273377850997e-06, | |
| "loss": 0.8707, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 3.3511777301927195, | |
| "grad_norm": 0.42426853842502676, | |
| "learning_rate": 1.6600374525436057e-06, | |
| "loss": 0.7958, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 3.355460385438972, | |
| "grad_norm": 0.39253316989568815, | |
| "learning_rate": 1.6381820286218027e-06, | |
| "loss": 0.9362, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 3.359743040685225, | |
| "grad_norm": 0.42081804014283164, | |
| "learning_rate": 1.6164613371192668e-06, | |
| "loss": 0.8808, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 3.3640256959314776, | |
| "grad_norm": 0.3805908666364616, | |
| "learning_rate": 1.5948756474643098e-06, | |
| "loss": 0.9281, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 3.3683083511777303, | |
| "grad_norm": 0.3931155152751046, | |
| "learning_rate": 1.5734252274106549e-06, | |
| "loss": 0.8649, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 3.372591006423983, | |
| "grad_norm": 0.36893746954226686, | |
| "learning_rate": 1.5521103430341063e-06, | |
| "loss": 0.9245, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 3.3768736616702357, | |
| "grad_norm": 0.421055167309563, | |
| "learning_rate": 1.5309312587292595e-06, | |
| "loss": 0.9075, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 3.3811563169164884, | |
| "grad_norm": 0.39708496701725404, | |
| "learning_rate": 1.5098882372062084e-06, | |
| "loss": 0.9268, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 3.385438972162741, | |
| "grad_norm": 0.4147457741610103, | |
| "learning_rate": 1.488981539487308e-06, | |
| "loss": 0.9095, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 3.3897216274089934, | |
| "grad_norm": 0.3870528540533133, | |
| "learning_rate": 1.4682114249039007e-06, | |
| "loss": 0.9108, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 3.394004282655246, | |
| "grad_norm": 0.37912624135371875, | |
| "learning_rate": 1.447578151093143e-06, | |
| "loss": 0.8086, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 3.398286937901499, | |
| "grad_norm": 0.39893951982141634, | |
| "learning_rate": 1.427081973994769e-06, | |
| "loss": 0.8207, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 3.4025695931477515, | |
| "grad_norm": 0.41813759081817203, | |
| "learning_rate": 1.4067231478479465e-06, | |
| "loss": 0.8587, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 3.4068522483940042, | |
| "grad_norm": 0.37522463321771077, | |
| "learning_rate": 1.386501925188112e-06, | |
| "loss": 0.9387, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 3.411134903640257, | |
| "grad_norm": 0.40082201779472715, | |
| "learning_rate": 1.3664185568438252e-06, | |
| "loss": 0.8501, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 3.4154175588865097, | |
| "grad_norm": 0.4044778971930763, | |
| "learning_rate": 1.3464732919336877e-06, | |
| "loss": 0.9708, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 3.4197002141327624, | |
| "grad_norm": 0.3999055484285562, | |
| "learning_rate": 1.32666637786322e-06, | |
| "loss": 0.832, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.423982869379015, | |
| "grad_norm": 0.3940297074656928, | |
| "learning_rate": 1.3069980603218165e-06, | |
| "loss": 0.8606, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 3.428265524625268, | |
| "grad_norm": 0.4037209018320114, | |
| "learning_rate": 1.2874685832796856e-06, | |
| "loss": 0.9606, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 3.4325481798715205, | |
| "grad_norm": 0.36235619726375323, | |
| "learning_rate": 1.2680781889848296e-06, | |
| "loss": 0.8037, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 3.436830835117773, | |
| "grad_norm": 0.4134563817140967, | |
| "learning_rate": 1.248827117960033e-06, | |
| "loss": 0.9296, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 3.4411134903640255, | |
| "grad_norm": 0.37477385004204616, | |
| "learning_rate": 1.2297156089998887e-06, | |
| "loss": 0.8875, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 3.445396145610278, | |
| "grad_norm": 0.3598044961225808, | |
| "learning_rate": 1.2107438991678252e-06, | |
| "loss": 0.9181, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 3.449678800856531, | |
| "grad_norm": 0.4068544774348545, | |
| "learning_rate": 1.191912223793179e-06, | |
| "loss": 0.802, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 3.4539614561027836, | |
| "grad_norm": 0.39025679795801216, | |
| "learning_rate": 1.1732208164682567e-06, | |
| "loss": 0.9481, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 3.4582441113490363, | |
| "grad_norm": 0.40099768389636997, | |
| "learning_rate": 1.1546699090454596e-06, | |
| "loss": 0.8793, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 3.462526766595289, | |
| "grad_norm": 0.3527515368666591, | |
| "learning_rate": 1.1362597316343897e-06, | |
| "loss": 0.8926, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 3.4668094218415417, | |
| "grad_norm": 0.3960092351592858, | |
| "learning_rate": 1.117990512599007e-06, | |
| "loss": 0.8198, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 3.4710920770877944, | |
| "grad_norm": 0.37647074443425715, | |
| "learning_rate": 1.0998624785547916e-06, | |
| "loss": 0.8726, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 3.475374732334047, | |
| "grad_norm": 0.4260177464381465, | |
| "learning_rate": 1.081875854365924e-06, | |
| "loss": 0.8411, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 3.4796573875803, | |
| "grad_norm": 0.3678229667943419, | |
| "learning_rate": 1.0640308631425206e-06, | |
| "loss": 0.9303, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 3.4839400428265526, | |
| "grad_norm": 0.40562771211697285, | |
| "learning_rate": 1.0463277262378418e-06, | |
| "loss": 0.9258, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 3.4882226980728053, | |
| "grad_norm": 0.39758544559495274, | |
| "learning_rate": 1.0287666632455562e-06, | |
| "loss": 0.8981, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 3.492505353319058, | |
| "grad_norm": 0.4330255432907014, | |
| "learning_rate": 1.0113478919970166e-06, | |
| "loss": 0.877, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 3.4967880085653107, | |
| "grad_norm": 0.4091350493182955, | |
| "learning_rate": 9.940716285585572e-07, | |
| "loss": 0.8589, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 3.5010706638115634, | |
| "grad_norm": 0.3756040003940408, | |
| "learning_rate": 9.769380872288112e-07, | |
| "loss": 0.8303, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 3.505353319057816, | |
| "grad_norm": 0.3845542537371508, | |
| "learning_rate": 9.599474805360636e-07, | |
| "loss": 0.8673, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 3.5096359743040684, | |
| "grad_norm": 0.3621491496685947, | |
| "learning_rate": 9.431000192355904e-07, | |
| "loss": 0.8285, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 3.513918629550321, | |
| "grad_norm": 0.38581119937487457, | |
| "learning_rate": 9.263959123070792e-07, | |
| "loss": 0.9607, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 3.518201284796574, | |
| "grad_norm": 0.40699298803550954, | |
| "learning_rate": 9.098353669519985e-07, | |
| "loss": 0.9999, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 3.5224839400428265, | |
| "grad_norm": 0.36404111618752655, | |
| "learning_rate": 8.934185885910634e-07, | |
| "loss": 0.9621, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 3.526766595289079, | |
| "grad_norm": 0.4080837339902542, | |
| "learning_rate": 8.771457808616615e-07, | |
| "loss": 0.9385, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 3.531049250535332, | |
| "grad_norm": 0.37542101809408207, | |
| "learning_rate": 8.610171456153407e-07, | |
| "loss": 0.8838, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 3.5353319057815846, | |
| "grad_norm": 0.3622139219889446, | |
| "learning_rate": 8.450328829152962e-07, | |
| "loss": 0.9147, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 3.5396145610278373, | |
| "grad_norm": 0.41604941573448845, | |
| "learning_rate": 8.291931910339016e-07, | |
| "loss": 1.0337, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 3.54389721627409, | |
| "grad_norm": 0.3702662014383576, | |
| "learning_rate": 8.134982664502313e-07, | |
| "loss": 0.8722, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 3.5481798715203428, | |
| "grad_norm": 0.3968324847661136, | |
| "learning_rate": 7.979483038476496e-07, | |
| "loss": 0.8719, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 3.552462526766595, | |
| "grad_norm": 0.37196472198781777, | |
| "learning_rate": 7.825434961113612e-07, | |
| "loss": 0.9101, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 3.5567451820128477, | |
| "grad_norm": 0.404292826856257, | |
| "learning_rate": 7.672840343260503e-07, | |
| "loss": 0.883, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 3.5610278372591004, | |
| "grad_norm": 0.3986607359258053, | |
| "learning_rate": 7.521701077734921e-07, | |
| "loss": 0.914, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 3.565310492505353, | |
| "grad_norm": 0.37342839604299854, | |
| "learning_rate": 7.372019039302111e-07, | |
| "loss": 0.8733, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 3.569593147751606, | |
| "grad_norm": 0.3789431810782268, | |
| "learning_rate": 7.223796084651596e-07, | |
| "loss": 1.0656, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 3.5738758029978586, | |
| "grad_norm": 0.4143391476747435, | |
| "learning_rate": 7.077034052373991e-07, | |
| "loss": 0.9481, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 3.5781584582441113, | |
| "grad_norm": 0.3802282910841205, | |
| "learning_rate": 6.931734762938416e-07, | |
| "loss": 0.8704, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 3.582441113490364, | |
| "grad_norm": 0.4383295863697292, | |
| "learning_rate": 6.787900018669747e-07, | |
| "loss": 0.8664, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 3.5867237687366167, | |
| "grad_norm": 0.3620529674823113, | |
| "learning_rate": 6.645531603726287e-07, | |
| "loss": 0.8701, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 3.5910064239828694, | |
| "grad_norm": 0.4003391688371413, | |
| "learning_rate": 6.50463128407773e-07, | |
| "loss": 0.956, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 3.595289079229122, | |
| "grad_norm": 0.35710168185845254, | |
| "learning_rate": 6.365200807483138e-07, | |
| "loss": 0.9395, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 3.599571734475375, | |
| "grad_norm": 0.3888127985496108, | |
| "learning_rate": 6.227241903469322e-07, | |
| "loss": 0.868, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 3.6038543897216275, | |
| "grad_norm": 0.3788842530917126, | |
| "learning_rate": 6.090756283309379e-07, | |
| "loss": 0.9023, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 3.6081370449678802, | |
| "grad_norm": 0.4293764780811211, | |
| "learning_rate": 5.955745640001453e-07, | |
| "loss": 0.912, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 3.612419700214133, | |
| "grad_norm": 0.36701829937079145, | |
| "learning_rate": 5.822211648247797e-07, | |
| "loss": 0.9178, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 3.6167023554603857, | |
| "grad_norm": 0.420252154230346, | |
| "learning_rate": 5.690155964433868e-07, | |
| "loss": 0.9341, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 3.6209850107066384, | |
| "grad_norm": 0.4321448436806155, | |
| "learning_rate": 5.559580226607921e-07, | |
| "loss": 0.9177, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 3.6252676659528906, | |
| "grad_norm": 0.37257126041542216, | |
| "learning_rate": 5.430486054460629e-07, | |
| "loss": 0.9424, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 3.6295503211991433, | |
| "grad_norm": 0.3772731501472801, | |
| "learning_rate": 5.30287504930492e-07, | |
| "loss": 0.9146, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 3.633832976445396, | |
| "grad_norm": 0.3877711033336446, | |
| "learning_rate": 5.176748794056316e-07, | |
| "loss": 0.912, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 3.6381156316916488, | |
| "grad_norm": 0.3770006556479151, | |
| "learning_rate": 5.052108853213e-07, | |
| "loss": 1.0339, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 3.6423982869379015, | |
| "grad_norm": 0.40082811910610466, | |
| "learning_rate": 4.928956772836751e-07, | |
| "loss": 0.9, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 3.646680942184154, | |
| "grad_norm": 0.4080349447803649, | |
| "learning_rate": 4.807294080533486e-07, | |
| "loss": 0.9017, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 3.650963597430407, | |
| "grad_norm": 0.3750103705444987, | |
| "learning_rate": 4.687122285434456e-07, | |
| "loss": 0.9218, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 3.6552462526766596, | |
| "grad_norm": 0.4168122554116308, | |
| "learning_rate": 4.568442878177467e-07, | |
| "loss": 0.9165, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 3.6595289079229123, | |
| "grad_norm": 0.42052436299883195, | |
| "learning_rate": 4.451257330888442e-07, | |
| "loss": 1.0046, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 3.663811563169165, | |
| "grad_norm": 0.3775819321872966, | |
| "learning_rate": 4.33556709716311e-07, | |
| "loss": 0.8148, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 3.6680942184154177, | |
| "grad_norm": 0.40588411521050055, | |
| "learning_rate": 4.2213736120490373e-07, | |
| "loss": 0.9766, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 3.67237687366167, | |
| "grad_norm": 0.3879183257896917, | |
| "learning_rate": 4.1086782920276845e-07, | |
| "loss": 0.9038, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 3.6766595289079227, | |
| "grad_norm": 0.371088950938356, | |
| "learning_rate": 3.997482534997071e-07, | |
| "loss": 0.9691, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 3.6809421841541754, | |
| "grad_norm": 0.3974254305078794, | |
| "learning_rate": 3.8877877202541793e-07, | |
| "loss": 0.9505, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 3.685224839400428, | |
| "grad_norm": 0.38333801357842573, | |
| "learning_rate": 3.779595208478065e-07, | |
| "loss": 0.8308, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 3.689507494646681, | |
| "grad_norm": 0.37315579927328224, | |
| "learning_rate": 3.6729063417128285e-07, | |
| "loss": 0.8951, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 3.6937901498929335, | |
| "grad_norm": 0.41169860046752177, | |
| "learning_rate": 3.567722443351032e-07, | |
| "loss": 0.856, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 3.6980728051391862, | |
| "grad_norm": 0.3540168865001641, | |
| "learning_rate": 3.464044818117268e-07, | |
| "loss": 0.9567, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 3.702355460385439, | |
| "grad_norm": 0.41805384086496045, | |
| "learning_rate": 3.361874752051991e-07, | |
| "loss": 0.8485, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 3.7066381156316917, | |
| "grad_norm": 0.3932453571640372, | |
| "learning_rate": 3.2612135124955453e-07, | |
| "loss": 0.8981, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 3.7109207708779444, | |
| "grad_norm": 0.35556756655208993, | |
| "learning_rate": 3.1620623480724807e-07, | |
| "loss": 0.7991, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 3.715203426124197, | |
| "grad_norm": 0.38025591039841, | |
| "learning_rate": 3.064422488675986e-07, | |
| "loss": 0.921, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 3.71948608137045, | |
| "grad_norm": 0.39447979117902376, | |
| "learning_rate": 2.968295145452715e-07, | |
| "loss": 0.8516, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 3.7237687366167025, | |
| "grad_norm": 0.36729974047622016, | |
| "learning_rate": 2.8736815107877626e-07, | |
| "loss": 0.9292, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 3.728051391862955, | |
| "grad_norm": 0.3892287341045359, | |
| "learning_rate": 2.7805827582897683e-07, | |
| "loss": 0.8804, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 3.732334047109208, | |
| "grad_norm": 0.41914843746271097, | |
| "learning_rate": 2.6890000427765157e-07, | |
| "loss": 0.8756, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 3.7366167023554606, | |
| "grad_norm": 0.39627355945962395, | |
| "learning_rate": 2.598934500260455e-07, | |
| "loss": 0.9612, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 3.7408993576017133, | |
| "grad_norm": 0.40215083865929563, | |
| "learning_rate": 2.510387247934759e-07, | |
| "loss": 1.0171, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 3.7451820128479656, | |
| "grad_norm": 0.3908638307412036, | |
| "learning_rate": 2.4233593841593295e-07, | |
| "loss": 0.8599, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 3.7494646680942183, | |
| "grad_norm": 0.4326871280589204, | |
| "learning_rate": 2.3378519884472428e-07, | |
| "loss": 1.0263, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 3.753747323340471, | |
| "grad_norm": 0.38245250647594886, | |
| "learning_rate": 2.25386612145137e-07, | |
| "loss": 0.9593, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 3.7580299785867237, | |
| "grad_norm": 0.3778573404558164, | |
| "learning_rate": 2.1714028249511798e-07, | |
| "loss": 0.9466, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 3.7623126338329764, | |
| "grad_norm": 0.3700075006593136, | |
| "learning_rate": 2.0904631218398445e-07, | |
| "loss": 0.8128, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 3.766595289079229, | |
| "grad_norm": 0.3843775256635492, | |
| "learning_rate": 2.011048016111544e-07, | |
| "loss": 0.9134, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 3.770877944325482, | |
| "grad_norm": 0.385219325392379, | |
| "learning_rate": 1.9331584928490159e-07, | |
| "loss": 0.8527, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 3.7751605995717346, | |
| "grad_norm": 0.36661581147669026, | |
| "learning_rate": 1.8567955182113295e-07, | |
| "loss": 0.8592, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 3.7794432548179873, | |
| "grad_norm": 0.401361109957553, | |
| "learning_rate": 1.7819600394218956e-07, | |
| "loss": 0.9088, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 3.78372591006424, | |
| "grad_norm": 0.32988480791991265, | |
| "learning_rate": 1.7086529847566979e-07, | |
| "loss": 0.7957, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 3.7880085653104922, | |
| "grad_norm": 0.37989640262936986, | |
| "learning_rate": 1.6368752635328998e-07, | |
| "loss": 0.8675, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 3.792291220556745, | |
| "grad_norm": 0.3937658078234294, | |
| "learning_rate": 1.5666277660973533e-07, | |
| "loss": 0.8864, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 3.7965738758029977, | |
| "grad_norm": 0.3722219853982238, | |
| "learning_rate": 1.49791136381576e-07, | |
| "loss": 0.9096, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 3.8008565310492504, | |
| "grad_norm": 0.37559569493426515, | |
| "learning_rate": 1.430726909061722e-07, | |
| "loss": 0.8924, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 3.805139186295503, | |
| "grad_norm": 0.38719709372883876, | |
| "learning_rate": 1.3650752352062508e-07, | |
| "loss": 0.8479, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 3.809421841541756, | |
| "grad_norm": 0.3911144136584381, | |
| "learning_rate": 1.3009571566073853e-07, | |
| "loss": 0.9491, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 3.8137044967880085, | |
| "grad_norm": 0.37807417768830925, | |
| "learning_rate": 1.238373468600118e-07, | |
| "loss": 0.9301, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 3.817987152034261, | |
| "grad_norm": 0.37694080855509665, | |
| "learning_rate": 1.1773249474865133e-07, | |
| "loss": 0.8065, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 3.822269807280514, | |
| "grad_norm": 0.388921594089528, | |
| "learning_rate": 1.1178123505260623e-07, | |
| "loss": 0.9592, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 3.8265524625267666, | |
| "grad_norm": 0.4116324419131167, | |
| "learning_rate": 1.0598364159263436e-07, | |
| "loss": 0.8211, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 3.8308351177730193, | |
| "grad_norm": 0.36448244518924466, | |
| "learning_rate": 1.0033978628338214e-07, | |
| "loss": 0.8574, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 3.835117773019272, | |
| "grad_norm": 0.37097780876337194, | |
| "learning_rate": 9.484973913249096e-08, | |
| "loss": 0.9514, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 3.8394004282655247, | |
| "grad_norm": 0.36937494307460916, | |
| "learning_rate": 8.95135682397366e-08, | |
| "loss": 1.0152, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 3.8436830835117775, | |
| "grad_norm": 0.38701761947361546, | |
| "learning_rate": 8.433133979617313e-08, | |
| "loss": 0.944, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 3.84796573875803, | |
| "grad_norm": 0.4062184881145919, | |
| "learning_rate": 7.930311808332092e-08, | |
| "loss": 0.9758, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.852248394004283, | |
| "grad_norm": 0.37343762843807315, | |
| "learning_rate": 7.442896547237011e-08, | |
| "loss": 0.8735, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 3.8565310492505356, | |
| "grad_norm": 0.3671379727642055, | |
| "learning_rate": 6.970894242339516e-08, | |
| "loss": 0.8647, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 3.860813704496788, | |
| "grad_norm": 0.3958355267876771, | |
| "learning_rate": 6.514310748462205e-08, | |
| "loss": 0.9561, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 3.8650963597430406, | |
| "grad_norm": 0.382409326734392, | |
| "learning_rate": 6.073151729168585e-08, | |
| "loss": 0.8091, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 3.8693790149892933, | |
| "grad_norm": 0.4074968347015751, | |
| "learning_rate": 5.6474226566938236e-08, | |
| "loss": 0.9165, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 3.873661670235546, | |
| "grad_norm": 0.3600503231444295, | |
| "learning_rate": 5.2371288118764626e-08, | |
| "loss": 0.8608, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 3.8779443254817987, | |
| "grad_norm": 0.4385570932475021, | |
| "learning_rate": 4.8422752840933393e-08, | |
| "loss": 1.0001, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 3.8822269807280514, | |
| "grad_norm": 0.3526075337659528, | |
| "learning_rate": 4.462866971195745e-08, | |
| "loss": 0.8845, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 3.886509635974304, | |
| "grad_norm": 0.3912267742586606, | |
| "learning_rate": 4.098908579449334e-08, | |
| "loss": 0.9521, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 3.890792291220557, | |
| "grad_norm": 0.38329918065994895, | |
| "learning_rate": 3.750404623475284e-08, | |
| "loss": 0.9337, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 3.8950749464668095, | |
| "grad_norm": 0.3671570660694989, | |
| "learning_rate": 3.4173594261947826e-08, | |
| "loss": 0.8763, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 3.8993576017130622, | |
| "grad_norm": 0.36467443117322795, | |
| "learning_rate": 3.099777118774766e-08, | |
| "loss": 0.7929, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 3.903640256959315, | |
| "grad_norm": 0.3850495904484138, | |
| "learning_rate": 2.797661640577265e-08, | |
| "loss": 0.8685, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 3.907922912205567, | |
| "grad_norm": 0.3947552978578375, | |
| "learning_rate": 2.511016739110139e-08, | |
| "loss": 1.0001, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 3.91220556745182, | |
| "grad_norm": 0.35654913515444236, | |
| "learning_rate": 2.2398459699811415e-08, | |
| "loss": 0.8357, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 3.9164882226980726, | |
| "grad_norm": 0.3755511134463352, | |
| "learning_rate": 1.9841526968528145e-08, | |
| "loss": 0.8337, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 3.9207708779443253, | |
| "grad_norm": 0.4178807150767805, | |
| "learning_rate": 1.74394009140183e-08, | |
| "loss": 1.0103, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 3.925053533190578, | |
| "grad_norm": 0.36343375952599793, | |
| "learning_rate": 1.5192111332791582e-08, | |
| "loss": 1.0066, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 3.9293361884368307, | |
| "grad_norm": 0.42967159160836504, | |
| "learning_rate": 1.3099686100728758e-08, | |
| "loss": 0.8981, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 3.9336188436830835, | |
| "grad_norm": 0.37863498483420355, | |
| "learning_rate": 1.1162151172741664e-08, | |
| "loss": 0.9011, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 3.937901498929336, | |
| "grad_norm": 0.36158296723772976, | |
| "learning_rate": 9.379530582445672e-09, | |
| "loss": 0.9935, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 3.942184154175589, | |
| "grad_norm": 0.3992757868964545, | |
| "learning_rate": 7.751846441866883e-09, | |
| "loss": 0.9523, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 3.9464668094218416, | |
| "grad_norm": 0.4093754834796768, | |
| "learning_rate": 6.279118941163176e-09, | |
| "loss": 0.9193, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 3.9507494646680943, | |
| "grad_norm": 0.3812402104730706, | |
| "learning_rate": 4.961366348374408e-09, | |
| "loss": 0.8255, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 3.955032119914347, | |
| "grad_norm": 0.3932024496097198, | |
| "learning_rate": 3.798605009198986e-09, | |
| "loss": 0.8468, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 3.9593147751605997, | |
| "grad_norm": 0.36137752700716075, | |
| "learning_rate": 2.790849346788471e-09, | |
| "loss": 0.8799, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 3.9635974304068524, | |
| "grad_norm": 0.39672575824023565, | |
| "learning_rate": 1.9381118615699467e-09, | |
| "loss": 0.9367, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 3.967880085653105, | |
| "grad_norm": 0.4049246679049995, | |
| "learning_rate": 1.240403131090584e-09, | |
| "loss": 0.9305, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 3.972162740899358, | |
| "grad_norm": 0.36851044379383624, | |
| "learning_rate": 6.977318098844165e-10, | |
| "loss": 0.8928, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 3.9764453961456105, | |
| "grad_norm": 0.3887303558382742, | |
| "learning_rate": 3.1010462936825745e-10, | |
| "loss": 0.8732, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 3.980728051391863, | |
| "grad_norm": 0.38791626187967704, | |
| "learning_rate": 7.752639775565618e-11, | |
| "loss": 0.9141, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 3.9850107066381155, | |
| "grad_norm": 0.3676480337759505, | |
| "learning_rate": 0.0, | |
| "loss": 0.9296, | |
| "step": 932 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 932, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 117, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.2867186494210048e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |