| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 10.0, |
| "eval_steps": 2500, |
| "global_step": 5540, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.009025270758122744, |
| "grad_norm": 9.466440213077052, |
| "learning_rate": 1.9985559566787006e-05, |
| "loss": 2.4118, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.018050541516245487, |
| "grad_norm": 5.092031681557354, |
| "learning_rate": 1.996750902527076e-05, |
| "loss": 0.8115, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02707581227436823, |
| "grad_norm": 3.581292467477243, |
| "learning_rate": 1.9949458483754514e-05, |
| "loss": 0.7566, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.036101083032490974, |
| "grad_norm": 3.0664426091496577, |
| "learning_rate": 1.9931407942238267e-05, |
| "loss": 0.7238, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04512635379061372, |
| "grad_norm": 2.953202771996899, |
| "learning_rate": 1.9913357400722025e-05, |
| "loss": 0.7088, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.05415162454873646, |
| "grad_norm": 3.2107443547649233, |
| "learning_rate": 1.989530685920578e-05, |
| "loss": 0.6899, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0631768953068592, |
| "grad_norm": 2.606024610125901, |
| "learning_rate": 1.9877256317689532e-05, |
| "loss": 0.6928, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.07220216606498195, |
| "grad_norm": 3.2049516084300245, |
| "learning_rate": 1.9859205776173286e-05, |
| "loss": 0.69, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0812274368231047, |
| "grad_norm": 2.761288783636034, |
| "learning_rate": 1.984115523465704e-05, |
| "loss": 0.6867, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.09025270758122744, |
| "grad_norm": 2.996941813764781, |
| "learning_rate": 1.9823104693140797e-05, |
| "loss": 0.6675, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.09927797833935018, |
| "grad_norm": 2.7492945960977617, |
| "learning_rate": 1.980505415162455e-05, |
| "loss": 0.6852, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.10830324909747292, |
| "grad_norm": 3.1658110710465124, |
| "learning_rate": 1.9787003610108305e-05, |
| "loss": 0.6447, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.11732851985559567, |
| "grad_norm": 2.5627271539534777, |
| "learning_rate": 1.976895306859206e-05, |
| "loss": 0.6562, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.1263537906137184, |
| "grad_norm": 2.752341860058262, |
| "learning_rate": 1.9750902527075816e-05, |
| "loss": 0.6771, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.13537906137184116, |
| "grad_norm": 3.047967974028345, |
| "learning_rate": 1.973285198555957e-05, |
| "loss": 0.6656, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.1444043321299639, |
| "grad_norm": 2.9706224809053916, |
| "learning_rate": 1.9714801444043323e-05, |
| "loss": 0.6635, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.15342960288808663, |
| "grad_norm": 2.4472232682052253, |
| "learning_rate": 1.9696750902527077e-05, |
| "loss": 0.6474, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.1624548736462094, |
| "grad_norm": 2.701915790395711, |
| "learning_rate": 1.967870036101083e-05, |
| "loss": 0.6641, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.17148014440433212, |
| "grad_norm": 2.4548463645426946, |
| "learning_rate": 1.9660649819494585e-05, |
| "loss": 0.6509, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.18050541516245489, |
| "grad_norm": 12.210856636716178, |
| "learning_rate": 1.964259927797834e-05, |
| "loss": 0.6646, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.18953068592057762, |
| "grad_norm": 2.603197676839273, |
| "learning_rate": 1.9624548736462096e-05, |
| "loss": 0.6975, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.19855595667870035, |
| "grad_norm": 2.414541801668063, |
| "learning_rate": 1.960649819494585e-05, |
| "loss": 0.6412, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.2075812274368231, |
| "grad_norm": 2.5004150715560787, |
| "learning_rate": 1.9588447653429607e-05, |
| "loss": 0.6477, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.21660649819494585, |
| "grad_norm": 2.65590491408136, |
| "learning_rate": 1.957039711191336e-05, |
| "loss": 0.6328, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.22563176895306858, |
| "grad_norm": 2.8094738107146107, |
| "learning_rate": 1.9552346570397115e-05, |
| "loss": 0.643, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.23465703971119134, |
| "grad_norm": 2.4628672221036787, |
| "learning_rate": 1.953429602888087e-05, |
| "loss": 0.6295, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.24368231046931407, |
| "grad_norm": 2.6028074637701315, |
| "learning_rate": 1.9516245487364622e-05, |
| "loss": 0.6231, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.2527075812274368, |
| "grad_norm": 2.6627654496834112, |
| "learning_rate": 1.9498194945848376e-05, |
| "loss": 0.6164, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.26173285198555957, |
| "grad_norm": 2.428314569670895, |
| "learning_rate": 1.948014440433213e-05, |
| "loss": 0.6137, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.27075812274368233, |
| "grad_norm": 2.145860067780341, |
| "learning_rate": 1.9462093862815884e-05, |
| "loss": 0.6162, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.27978339350180503, |
| "grad_norm": 2.6330932206114865, |
| "learning_rate": 1.944404332129964e-05, |
| "loss": 0.6152, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.2888086642599278, |
| "grad_norm": 1.9933015700312968, |
| "learning_rate": 1.9425992779783395e-05, |
| "loss": 0.6333, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.29783393501805056, |
| "grad_norm": 2.532862933716995, |
| "learning_rate": 1.940794223826715e-05, |
| "loss": 0.6284, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.30685920577617326, |
| "grad_norm": 2.1667192528112755, |
| "learning_rate": 1.9389891696750906e-05, |
| "loss": 0.6237, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.315884476534296, |
| "grad_norm": 2.1701228962066295, |
| "learning_rate": 1.937184115523466e-05, |
| "loss": 0.6336, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.3249097472924188, |
| "grad_norm": 2.273883822591775, |
| "learning_rate": 1.9353790613718413e-05, |
| "loss": 0.6209, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.33393501805054154, |
| "grad_norm": 2.29978243530194, |
| "learning_rate": 1.9335740072202167e-05, |
| "loss": 0.6318, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.34296028880866425, |
| "grad_norm": 2.411589937459258, |
| "learning_rate": 1.931768953068592e-05, |
| "loss": 0.6107, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.351985559566787, |
| "grad_norm": 2.175980233783614, |
| "learning_rate": 1.9299638989169675e-05, |
| "loss": 0.6034, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.36101083032490977, |
| "grad_norm": 2.3182975023815566, |
| "learning_rate": 1.9281588447653432e-05, |
| "loss": 0.6054, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3700361010830325, |
| "grad_norm": 2.301500912368193, |
| "learning_rate": 1.9263537906137186e-05, |
| "loss": 0.608, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.37906137184115524, |
| "grad_norm": 2.3411097096245146, |
| "learning_rate": 1.924548736462094e-05, |
| "loss": 0.6096, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.388086642599278, |
| "grad_norm": 2.3477579646460267, |
| "learning_rate": 1.9227436823104693e-05, |
| "loss": 0.5981, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.3971119133574007, |
| "grad_norm": 2.574256614403349, |
| "learning_rate": 1.920938628158845e-05, |
| "loss": 0.6032, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.40613718411552346, |
| "grad_norm": 2.30445852375371, |
| "learning_rate": 1.9191335740072204e-05, |
| "loss": 0.6263, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.4151624548736462, |
| "grad_norm": 1.9463153842646943, |
| "learning_rate": 1.9173285198555958e-05, |
| "loss": 0.5888, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.42418772563176893, |
| "grad_norm": 2.2957992259075692, |
| "learning_rate": 1.9155234657039712e-05, |
| "loss": 0.5919, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.4332129963898917, |
| "grad_norm": 2.4850310058268397, |
| "learning_rate": 1.913718411552347e-05, |
| "loss": 0.5982, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.44223826714801445, |
| "grad_norm": 2.1484807254693665, |
| "learning_rate": 1.9119133574007223e-05, |
| "loss": 0.5922, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.45126353790613716, |
| "grad_norm": 2.117521935012365, |
| "learning_rate": 1.9101083032490977e-05, |
| "loss": 0.614, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.4602888086642599, |
| "grad_norm": 2.129059383234727, |
| "learning_rate": 1.908303249097473e-05, |
| "loss": 0.6065, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.4693140794223827, |
| "grad_norm": 2.1304018405195904, |
| "learning_rate": 1.9064981949458485e-05, |
| "loss": 0.5776, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.47833935018050544, |
| "grad_norm": 2.2992759766146973, |
| "learning_rate": 1.904693140794224e-05, |
| "loss": 0.5832, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.48736462093862815, |
| "grad_norm": 2.0969841001906704, |
| "learning_rate": 1.9028880866425992e-05, |
| "loss": 0.6178, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.4963898916967509, |
| "grad_norm": 2.2377624378205834, |
| "learning_rate": 1.901083032490975e-05, |
| "loss": 0.5877, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.5054151624548736, |
| "grad_norm": 2.2091730376930308, |
| "learning_rate": 1.8992779783393503e-05, |
| "loss": 0.6093, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.5144404332129964, |
| "grad_norm": 2.0821252876149274, |
| "learning_rate": 1.897472924187726e-05, |
| "loss": 0.5971, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.5234657039711191, |
| "grad_norm": 2.4846088029201923, |
| "learning_rate": 1.8956678700361014e-05, |
| "loss": 0.5894, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.5324909747292419, |
| "grad_norm": 2.253250469898687, |
| "learning_rate": 1.8938628158844768e-05, |
| "loss": 0.6035, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.5415162454873647, |
| "grad_norm": 2.1890406664703237, |
| "learning_rate": 1.8920577617328522e-05, |
| "loss": 0.5905, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5505415162454874, |
| "grad_norm": 1.830902528715668, |
| "learning_rate": 1.8902527075812276e-05, |
| "loss": 0.5666, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.5595667870036101, |
| "grad_norm": 2.2712137897785216, |
| "learning_rate": 1.888447653429603e-05, |
| "loss": 0.5794, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.5685920577617328, |
| "grad_norm": 2.064463681190312, |
| "learning_rate": 1.8866425992779783e-05, |
| "loss": 0.5813, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.5776173285198556, |
| "grad_norm": 2.0670626484908046, |
| "learning_rate": 1.8848375451263537e-05, |
| "loss": 0.5989, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.5866425992779783, |
| "grad_norm": 2.4382023992648563, |
| "learning_rate": 1.8830324909747294e-05, |
| "loss": 0.5844, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.5956678700361011, |
| "grad_norm": 2.3559788464517935, |
| "learning_rate": 1.8812274368231048e-05, |
| "loss": 0.5751, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.6046931407942239, |
| "grad_norm": 2.0417255214726655, |
| "learning_rate": 1.8794223826714802e-05, |
| "loss": 0.582, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.6137184115523465, |
| "grad_norm": 2.0606597313764174, |
| "learning_rate": 1.877617328519856e-05, |
| "loss": 0.5563, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.6227436823104693, |
| "grad_norm": 2.1279001628524714, |
| "learning_rate": 1.8758122743682313e-05, |
| "loss": 0.5646, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.631768953068592, |
| "grad_norm": 2.1228151514711318, |
| "learning_rate": 1.8740072202166067e-05, |
| "loss": 0.5687, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.6407942238267148, |
| "grad_norm": 2.1178045046697997, |
| "learning_rate": 1.872202166064982e-05, |
| "loss": 0.5808, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.6498194945848376, |
| "grad_norm": 2.110658120249641, |
| "learning_rate": 1.8703971119133574e-05, |
| "loss": 0.5576, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.6588447653429603, |
| "grad_norm": 2.0620796847472835, |
| "learning_rate": 1.8685920577617328e-05, |
| "loss": 0.5817, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.6678700361010831, |
| "grad_norm": 2.2036387339163217, |
| "learning_rate": 1.8667870036101086e-05, |
| "loss": 0.583, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.6768953068592057, |
| "grad_norm": 2.169248099645488, |
| "learning_rate": 1.864981949458484e-05, |
| "loss": 0.5711, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.6859205776173285, |
| "grad_norm": 2.0070917491256237, |
| "learning_rate": 1.8631768953068593e-05, |
| "loss": 0.5714, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.6949458483754513, |
| "grad_norm": 2.412092286853898, |
| "learning_rate": 1.8613718411552347e-05, |
| "loss": 0.558, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.703971119133574, |
| "grad_norm": 2.1573044881131636, |
| "learning_rate": 1.8595667870036104e-05, |
| "loss": 0.5605, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.7129963898916968, |
| "grad_norm": 2.0060925615864234, |
| "learning_rate": 1.8577617328519858e-05, |
| "loss": 0.5646, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.7220216606498195, |
| "grad_norm": 2.0721270781349856, |
| "learning_rate": 1.8559566787003612e-05, |
| "loss": 0.5591, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.7310469314079422, |
| "grad_norm": 1.9740854158027745, |
| "learning_rate": 1.8541516245487366e-05, |
| "loss": 0.5749, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.740072202166065, |
| "grad_norm": 2.1666939121131343, |
| "learning_rate": 1.852346570397112e-05, |
| "loss": 0.5559, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.7490974729241877, |
| "grad_norm": 2.006153861447105, |
| "learning_rate": 1.8505415162454877e-05, |
| "loss": 0.5579, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.7581227436823105, |
| "grad_norm": 2.1562521533225896, |
| "learning_rate": 1.848736462093863e-05, |
| "loss": 0.5641, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.7671480144404332, |
| "grad_norm": 2.4213982076348386, |
| "learning_rate": 1.8469314079422384e-05, |
| "loss": 0.5697, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.776173285198556, |
| "grad_norm": 2.021492521441799, |
| "learning_rate": 1.8451263537906138e-05, |
| "loss": 0.5512, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.7851985559566786, |
| "grad_norm": 2.2387067716748903, |
| "learning_rate": 1.8433212996389892e-05, |
| "loss": 0.5567, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.7942238267148014, |
| "grad_norm": 2.1182127501600654, |
| "learning_rate": 1.8415162454873646e-05, |
| "loss": 0.5529, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.8032490974729242, |
| "grad_norm": 1.8809686502549097, |
| "learning_rate": 1.8397111913357403e-05, |
| "loss": 0.549, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.8122743682310469, |
| "grad_norm": 1.9559681017618193, |
| "learning_rate": 1.8379061371841157e-05, |
| "loss": 0.546, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.8212996389891697, |
| "grad_norm": 1.8843502716508203, |
| "learning_rate": 1.836101083032491e-05, |
| "loss": 0.5281, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.8303249097472925, |
| "grad_norm": 2.0782130238317365, |
| "learning_rate": 1.8342960288808668e-05, |
| "loss": 0.5355, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.8393501805054152, |
| "grad_norm": 2.028491033091686, |
| "learning_rate": 1.832490974729242e-05, |
| "loss": 0.5573, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.8483754512635379, |
| "grad_norm": 1.997600040747785, |
| "learning_rate": 1.8306859205776175e-05, |
| "loss": 0.5503, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.8574007220216606, |
| "grad_norm": 2.086622795789731, |
| "learning_rate": 1.828880866425993e-05, |
| "loss": 0.5271, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.8664259927797834, |
| "grad_norm": 2.2126356409155603, |
| "learning_rate": 1.8270758122743683e-05, |
| "loss": 0.5295, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.8754512635379061, |
| "grad_norm": 1.9420226465771289, |
| "learning_rate": 1.8252707581227437e-05, |
| "loss": 0.5229, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.8844765342960289, |
| "grad_norm": 2.3845783258984605, |
| "learning_rate": 1.823465703971119e-05, |
| "loss": 0.5302, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.8935018050541517, |
| "grad_norm": 1.990644749395453, |
| "learning_rate": 1.8216606498194948e-05, |
| "loss": 0.5327, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.9025270758122743, |
| "grad_norm": 1.9296515724126473, |
| "learning_rate": 1.81985559566787e-05, |
| "loss": 0.5368, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.9115523465703971, |
| "grad_norm": 2.08354639136672, |
| "learning_rate": 1.8180505415162456e-05, |
| "loss": 0.537, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.9205776173285198, |
| "grad_norm": 1.8810121127412915, |
| "learning_rate": 1.8162454873646213e-05, |
| "loss": 0.5233, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.9296028880866426, |
| "grad_norm": 2.1865339910493, |
| "learning_rate": 1.8144404332129967e-05, |
| "loss": 0.5229, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.9386281588447654, |
| "grad_norm": 1.8530594783548222, |
| "learning_rate": 1.812635379061372e-05, |
| "loss": 0.5489, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.9476534296028881, |
| "grad_norm": 1.8045811881947107, |
| "learning_rate": 1.8108303249097474e-05, |
| "loss": 0.5216, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.9566787003610109, |
| "grad_norm": 2.0216105196649465, |
| "learning_rate": 1.8090252707581228e-05, |
| "loss": 0.5327, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.9657039711191335, |
| "grad_norm": 2.070828240474061, |
| "learning_rate": 1.8072202166064982e-05, |
| "loss": 0.5341, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.9747292418772563, |
| "grad_norm": 2.118337434473814, |
| "learning_rate": 1.8054151624548736e-05, |
| "loss": 0.5206, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.983754512635379, |
| "grad_norm": 1.9327007846480317, |
| "learning_rate": 1.8036101083032493e-05, |
| "loss": 0.5223, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.9927797833935018, |
| "grad_norm": 1.8102667165760828, |
| "learning_rate": 1.8018050541516247e-05, |
| "loss": 0.5183, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.0018050541516246, |
| "grad_norm": 1.7565629809184389, |
| "learning_rate": 1.8e-05, |
| "loss": 0.5008, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.0108303249097472, |
| "grad_norm": 1.7812661895325377, |
| "learning_rate": 1.7981949458483758e-05, |
| "loss": 0.403, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.01985559566787, |
| "grad_norm": 1.9167006814291294, |
| "learning_rate": 1.796389891696751e-05, |
| "loss": 0.412, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.0288808664259927, |
| "grad_norm": 1.9923575509001286, |
| "learning_rate": 1.7945848375451265e-05, |
| "loss": 0.3926, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.0379061371841156, |
| "grad_norm": 1.7192659741327438, |
| "learning_rate": 1.792779783393502e-05, |
| "loss": 0.4072, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.0469314079422383, |
| "grad_norm": 1.8513603883344656, |
| "learning_rate": 1.7909747292418773e-05, |
| "loss": 0.4031, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.055956678700361, |
| "grad_norm": 1.828939711963778, |
| "learning_rate": 1.7891696750902527e-05, |
| "loss": 0.3984, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.0649819494584838, |
| "grad_norm": 1.789210627896697, |
| "learning_rate": 1.7873646209386284e-05, |
| "loss": 0.4054, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.0740072202166064, |
| "grad_norm": 1.8182359140012072, |
| "learning_rate": 1.7855595667870038e-05, |
| "loss": 0.4016, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.0830324909747293, |
| "grad_norm": 1.8738010102389915, |
| "learning_rate": 1.783754512635379e-05, |
| "loss": 0.3916, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.092057761732852, |
| "grad_norm": 1.7056071281982275, |
| "learning_rate": 1.7819494584837545e-05, |
| "loss": 0.4005, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.1010830324909748, |
| "grad_norm": 1.7631642754877512, |
| "learning_rate": 1.7801444043321303e-05, |
| "loss": 0.4044, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.1101083032490975, |
| "grad_norm": 1.9174467447157293, |
| "learning_rate": 1.7783393501805056e-05, |
| "loss": 0.3994, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.1191335740072201, |
| "grad_norm": 1.9504105068703066, |
| "learning_rate": 1.776534296028881e-05, |
| "loss": 0.413, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.128158844765343, |
| "grad_norm": 1.7274668622868, |
| "learning_rate": 1.7747292418772564e-05, |
| "loss": 0.4035, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.1371841155234657, |
| "grad_norm": 1.8575861518758143, |
| "learning_rate": 1.772924187725632e-05, |
| "loss": 0.4098, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.1462093862815885, |
| "grad_norm": 1.834605129701837, |
| "learning_rate": 1.7711191335740075e-05, |
| "loss": 0.4107, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.1552346570397112, |
| "grad_norm": 1.6992491657802056, |
| "learning_rate": 1.769314079422383e-05, |
| "loss": 0.4104, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.164259927797834, |
| "grad_norm": 1.6426546320530329, |
| "learning_rate": 1.7675090252707583e-05, |
| "loss": 0.4078, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.1732851985559567, |
| "grad_norm": 1.8449792200452055, |
| "learning_rate": 1.7657039711191337e-05, |
| "loss": 0.4138, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.1823104693140793, |
| "grad_norm": 1.7495204322144347, |
| "learning_rate": 1.763898916967509e-05, |
| "loss": 0.4084, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.1913357400722022, |
| "grad_norm": 1.6260627217547752, |
| "learning_rate": 1.7620938628158844e-05, |
| "loss": 0.4197, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.2003610108303249, |
| "grad_norm": 1.803828249935731, |
| "learning_rate": 1.76028880866426e-05, |
| "loss": 0.4157, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.2093862815884477, |
| "grad_norm": 1.6698584933682465, |
| "learning_rate": 1.7584837545126355e-05, |
| "loss": 0.4108, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.2184115523465704, |
| "grad_norm": 1.8547896347467827, |
| "learning_rate": 1.756678700361011e-05, |
| "loss": 0.4049, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.2274368231046933, |
| "grad_norm": 1.9793660391728152, |
| "learning_rate": 1.7548736462093866e-05, |
| "loss": 0.4086, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.236462093862816, |
| "grad_norm": 1.9481106812224744, |
| "learning_rate": 1.753068592057762e-05, |
| "loss": 0.4217, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.2454873646209386, |
| "grad_norm": 1.8589145184235236, |
| "learning_rate": 1.7512635379061374e-05, |
| "loss": 0.4077, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.2545126353790614, |
| "grad_norm": 1.7491717689464192, |
| "learning_rate": 1.7494584837545128e-05, |
| "loss": 0.4041, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.263537906137184, |
| "grad_norm": 1.8301618024998683, |
| "learning_rate": 1.747653429602888e-05, |
| "loss": 0.4156, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.2725631768953067, |
| "grad_norm": 1.8157533205183023, |
| "learning_rate": 1.7458483754512635e-05, |
| "loss": 0.4066, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.2815884476534296, |
| "grad_norm": 1.618452105653501, |
| "learning_rate": 1.744043321299639e-05, |
| "loss": 0.4045, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.2906137184115525, |
| "grad_norm": 1.801416162193692, |
| "learning_rate": 1.7422382671480146e-05, |
| "loss": 0.4114, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.2996389891696751, |
| "grad_norm": 1.9120529810373577, |
| "learning_rate": 1.74043321299639e-05, |
| "loss": 0.4053, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.3086642599277978, |
| "grad_norm": 1.8649971513309, |
| "learning_rate": 1.7386281588447654e-05, |
| "loss": 0.3994, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.3176895306859207, |
| "grad_norm": 1.5823876746717431, |
| "learning_rate": 1.736823104693141e-05, |
| "loss": 0.405, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.3267148014440433, |
| "grad_norm": 1.6565144465938595, |
| "learning_rate": 1.7350180505415165e-05, |
| "loss": 0.3956, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.335740072202166, |
| "grad_norm": 1.7349944474589856, |
| "learning_rate": 1.733212996389892e-05, |
| "loss": 0.4004, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.3447653429602888, |
| "grad_norm": 1.8413333458623544, |
| "learning_rate": 1.7314079422382673e-05, |
| "loss": 0.3933, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.3537906137184115, |
| "grad_norm": 1.5956337270311123, |
| "learning_rate": 1.7296028880866426e-05, |
| "loss": 0.4027, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.3628158844765343, |
| "grad_norm": 1.7395021140734128, |
| "learning_rate": 1.727797833935018e-05, |
| "loss": 0.4071, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.371841155234657, |
| "grad_norm": 1.7983719467870525, |
| "learning_rate": 1.7259927797833937e-05, |
| "loss": 0.404, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.3808664259927799, |
| "grad_norm": 1.7263325979359656, |
| "learning_rate": 1.724187725631769e-05, |
| "loss": 0.3933, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.3898916967509025, |
| "grad_norm": 1.7378004534277407, |
| "learning_rate": 1.7223826714801445e-05, |
| "loss": 0.3839, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.3989169675090252, |
| "grad_norm": 1.8567819478825005, |
| "learning_rate": 1.72057761732852e-05, |
| "loss": 0.4032, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.407942238267148, |
| "grad_norm": 1.752933416288969, |
| "learning_rate": 1.7187725631768956e-05, |
| "loss": 0.3914, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.4169675090252707, |
| "grad_norm": 1.7945976502850631, |
| "learning_rate": 1.716967509025271e-05, |
| "loss": 0.4052, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.4259927797833936, |
| "grad_norm": 1.8440958122894435, |
| "learning_rate": 1.7151624548736464e-05, |
| "loss": 0.3889, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.4350180505415162, |
| "grad_norm": 1.9541732635364462, |
| "learning_rate": 1.7133574007220218e-05, |
| "loss": 0.3897, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.444043321299639, |
| "grad_norm": 1.7128039074068433, |
| "learning_rate": 1.711552346570397e-05, |
| "loss": 0.3949, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.4530685920577617, |
| "grad_norm": 1.7805663890604293, |
| "learning_rate": 1.709747292418773e-05, |
| "loss": 0.4046, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.4620938628158844, |
| "grad_norm": 1.7176479703166927, |
| "learning_rate": 1.7079422382671482e-05, |
| "loss": 0.3948, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.4711191335740073, |
| "grad_norm": 1.7476594687945506, |
| "learning_rate": 1.7061371841155236e-05, |
| "loss": 0.3987, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.48014440433213, |
| "grad_norm": 1.7097737410315963, |
| "learning_rate": 1.704332129963899e-05, |
| "loss": 0.3971, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.4891696750902528, |
| "grad_norm": 1.6154261828372305, |
| "learning_rate": 1.7025270758122744e-05, |
| "loss": 0.3965, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.4981949458483754, |
| "grad_norm": 1.6439021077041545, |
| "learning_rate": 1.7007220216606498e-05, |
| "loss": 0.3864, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.5072202166064983, |
| "grad_norm": 1.7335388992906235, |
| "learning_rate": 1.6989169675090255e-05, |
| "loss": 0.3904, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.516245487364621, |
| "grad_norm": 1.9280976394130103, |
| "learning_rate": 1.697111913357401e-05, |
| "loss": 0.3982, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.5252707581227436, |
| "grad_norm": 1.6397519533163014, |
| "learning_rate": 1.6953068592057766e-05, |
| "loss": 0.4071, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.5342960288808665, |
| "grad_norm": 1.7403299732030928, |
| "learning_rate": 1.693501805054152e-05, |
| "loss": 0.396, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.5433212996389891, |
| "grad_norm": 1.856917104233432, |
| "learning_rate": 1.6916967509025274e-05, |
| "loss": 0.3965, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.5523465703971118, |
| "grad_norm": 2.0561648148768246, |
| "learning_rate": 1.6898916967509027e-05, |
| "loss": 0.4011, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.5613718411552346, |
| "grad_norm": 1.5934247442993748, |
| "learning_rate": 1.688086642599278e-05, |
| "loss": 0.4047, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.5703971119133575, |
| "grad_norm": 1.6742097942966434, |
| "learning_rate": 1.6862815884476535e-05, |
| "loss": 0.3768, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.5794223826714802, |
| "grad_norm": 1.6344395357039199, |
| "learning_rate": 1.684476534296029e-05, |
| "loss": 0.3994, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.5884476534296028, |
| "grad_norm": 1.947535712414049, |
| "learning_rate": 1.6826714801444043e-05, |
| "loss": 0.3859, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.5974729241877257, |
| "grad_norm": 1.8081716367634284, |
| "learning_rate": 1.68086642599278e-05, |
| "loss": 0.3971, |
| "step": 885 |
| }, |
| { |
| "epoch": 1.6064981949458483, |
| "grad_norm": 1.7555061804990388, |
| "learning_rate": 1.6790613718411554e-05, |
| "loss": 0.4033, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.615523465703971, |
| "grad_norm": 1.7977625864566746, |
| "learning_rate": 1.6772563176895307e-05, |
| "loss": 0.3869, |
| "step": 895 |
| }, |
| { |
| "epoch": 1.6245487364620939, |
| "grad_norm": 2.0151295696094156, |
| "learning_rate": 1.6754512635379065e-05, |
| "loss": 0.3896, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.6335740072202167, |
| "grad_norm": 1.7117350496546804, |
| "learning_rate": 1.673646209386282e-05, |
| "loss": 0.3931, |
| "step": 905 |
| }, |
| { |
| "epoch": 1.6425992779783394, |
| "grad_norm": 1.5367121125433267, |
| "learning_rate": 1.6718411552346572e-05, |
| "loss": 0.3925, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.651624548736462, |
| "grad_norm": 1.7252709384729956, |
| "learning_rate": 1.6700361010830326e-05, |
| "loss": 0.3813, |
| "step": 915 |
| }, |
| { |
| "epoch": 1.660649819494585, |
| "grad_norm": 1.6424146369682562, |
| "learning_rate": 1.668231046931408e-05, |
| "loss": 0.3959, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.6696750902527075, |
| "grad_norm": 1.7074460801427431, |
| "learning_rate": 1.6664259927797834e-05, |
| "loss": 0.3821, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.6787003610108302, |
| "grad_norm": 1.7779369602208115, |
| "learning_rate": 1.6646209386281588e-05, |
| "loss": 0.387, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.687725631768953, |
| "grad_norm": 1.7587322948042428, |
| "learning_rate": 1.6628158844765345e-05, |
| "loss": 0.3824, |
| "step": 935 |
| }, |
| { |
| "epoch": 1.696750902527076, |
| "grad_norm": 1.719570186327862, |
| "learning_rate": 1.66101083032491e-05, |
| "loss": 0.3968, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.7057761732851986, |
| "grad_norm": 1.758936196148823, |
| "learning_rate": 1.6592057761732852e-05, |
| "loss": 0.4, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.7148014440433212, |
| "grad_norm": 1.7037689600288302, |
| "learning_rate": 1.657400722021661e-05, |
| "loss": 0.3824, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.7238267148014441, |
| "grad_norm": 1.6704621391960432, |
| "learning_rate": 1.6555956678700363e-05, |
| "loss": 0.3904, |
| "step": 955 |
| }, |
| { |
| "epoch": 1.7328519855595668, |
| "grad_norm": 1.6276171413365443, |
| "learning_rate": 1.6537906137184117e-05, |
| "loss": 0.3936, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.7418772563176894, |
| "grad_norm": 1.4915457367830292, |
| "learning_rate": 1.651985559566787e-05, |
| "loss": 0.3878, |
| "step": 965 |
| }, |
| { |
| "epoch": 1.7509025270758123, |
| "grad_norm": 1.6998121121079899, |
| "learning_rate": 1.6501805054151625e-05, |
| "loss": 0.3851, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.7599277978339352, |
| "grad_norm": 1.7868333554788238, |
| "learning_rate": 1.6483754512635382e-05, |
| "loss": 0.4016, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.7689530685920578, |
| "grad_norm": 1.9509112307215477, |
| "learning_rate": 1.6465703971119136e-05, |
| "loss": 0.3947, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.7779783393501805, |
| "grad_norm": 1.8313430184945898, |
| "learning_rate": 1.644765342960289e-05, |
| "loss": 0.3883, |
| "step": 985 |
| }, |
| { |
| "epoch": 1.7870036101083033, |
| "grad_norm": 1.69745773450961, |
| "learning_rate": 1.6429602888086644e-05, |
| "loss": 0.3927, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.796028880866426, |
| "grad_norm": 1.5706439056988484, |
| "learning_rate": 1.6411552346570397e-05, |
| "loss": 0.3885, |
| "step": 995 |
| }, |
| { |
| "epoch": 1.8050541516245486, |
| "grad_norm": 1.9749233060644407, |
| "learning_rate": 1.639350180505415e-05, |
| "loss": 0.3911, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.8140794223826715, |
| "grad_norm": 1.835030597426223, |
| "learning_rate": 1.637545126353791e-05, |
| "loss": 0.3942, |
| "step": 1005 |
| }, |
| { |
| "epoch": 1.8231046931407944, |
| "grad_norm": 1.5738451037571743, |
| "learning_rate": 1.6357400722021662e-05, |
| "loss": 0.3812, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.8321299638989168, |
| "grad_norm": 1.751135780497672, |
| "learning_rate": 1.6339350180505416e-05, |
| "loss": 0.3862, |
| "step": 1015 |
| }, |
| { |
| "epoch": 1.8411552346570397, |
| "grad_norm": 1.864136563331331, |
| "learning_rate": 1.6321299638989173e-05, |
| "loss": 0.3942, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.8501805054151625, |
| "grad_norm": 1.5420248188805685, |
| "learning_rate": 1.6303249097472927e-05, |
| "loss": 0.3828, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.8592057761732852, |
| "grad_norm": 1.6307207463776452, |
| "learning_rate": 1.628519855595668e-05, |
| "loss": 0.3766, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.8682310469314078, |
| "grad_norm": 1.7022785037029124, |
| "learning_rate": 1.6267148014440435e-05, |
| "loss": 0.3847, |
| "step": 1035 |
| }, |
| { |
| "epoch": 1.8772563176895307, |
| "grad_norm": 1.6865169590583908, |
| "learning_rate": 1.624909747292419e-05, |
| "loss": 0.3855, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.8862815884476536, |
| "grad_norm": 1.772735245585654, |
| "learning_rate": 1.6231046931407942e-05, |
| "loss": 0.3766, |
| "step": 1045 |
| }, |
| { |
| "epoch": 1.895306859205776, |
| "grad_norm": 1.6414783417710321, |
| "learning_rate": 1.6212996389891696e-05, |
| "loss": 0.3769, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.904332129963899, |
| "grad_norm": 1.638546064732281, |
| "learning_rate": 1.6194945848375453e-05, |
| "loss": 0.3811, |
| "step": 1055 |
| }, |
| { |
| "epoch": 1.9133574007220218, |
| "grad_norm": 1.7273768032341619, |
| "learning_rate": 1.6176895306859207e-05, |
| "loss": 0.3787, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.9223826714801444, |
| "grad_norm": 1.7610887591542017, |
| "learning_rate": 1.615884476534296e-05, |
| "loss": 0.3901, |
| "step": 1065 |
| }, |
| { |
| "epoch": 1.931407942238267, |
| "grad_norm": 1.7492183570516289, |
| "learning_rate": 1.6140794223826718e-05, |
| "loss": 0.385, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.94043321299639, |
| "grad_norm": 1.6697391799649597, |
| "learning_rate": 1.6122743682310472e-05, |
| "loss": 0.3865, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.9494584837545126, |
| "grad_norm": 1.5675976217251384, |
| "learning_rate": 1.6104693140794226e-05, |
| "loss": 0.3757, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.9584837545126352, |
| "grad_norm": 1.6558779934861987, |
| "learning_rate": 1.608664259927798e-05, |
| "loss": 0.379, |
| "step": 1085 |
| }, |
| { |
| "epoch": 1.967509025270758, |
| "grad_norm": 1.6873028676221205, |
| "learning_rate": 1.6068592057761733e-05, |
| "loss": 0.3811, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.976534296028881, |
| "grad_norm": 1.6240908295441967, |
| "learning_rate": 1.6050541516245487e-05, |
| "loss": 0.3654, |
| "step": 1095 |
| }, |
| { |
| "epoch": 1.9855595667870036, |
| "grad_norm": 1.6615813580574832, |
| "learning_rate": 1.603249097472924e-05, |
| "loss": 0.3796, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.9945848375451263, |
| "grad_norm": 1.686361226213432, |
| "learning_rate": 1.6014440433212998e-05, |
| "loss": 0.3837, |
| "step": 1105 |
| }, |
| { |
| "epoch": 2.003610108303249, |
| "grad_norm": 1.4089105927337982, |
| "learning_rate": 1.5996389891696752e-05, |
| "loss": 0.3506, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.012635379061372, |
| "grad_norm": 1.375915950771055, |
| "learning_rate": 1.5978339350180506e-05, |
| "loss": 0.2759, |
| "step": 1115 |
| }, |
| { |
| "epoch": 2.0216606498194944, |
| "grad_norm": 1.5521739291638614, |
| "learning_rate": 1.5960288808664263e-05, |
| "loss": 0.2756, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.0306859205776173, |
| "grad_norm": 1.6072972838350046, |
| "learning_rate": 1.5942238267148017e-05, |
| "loss": 0.2673, |
| "step": 1125 |
| }, |
| { |
| "epoch": 2.03971119133574, |
| "grad_norm": 1.6652650820556896, |
| "learning_rate": 1.592418772563177e-05, |
| "loss": 0.27, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.0487364620938626, |
| "grad_norm": 1.4210627524187658, |
| "learning_rate": 1.5906137184115525e-05, |
| "loss": 0.2706, |
| "step": 1135 |
| }, |
| { |
| "epoch": 2.0577617328519855, |
| "grad_norm": 1.6471286272058623, |
| "learning_rate": 1.588808664259928e-05, |
| "loss": 0.2762, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.0667870036101084, |
| "grad_norm": 1.467229273738129, |
| "learning_rate": 1.5870036101083032e-05, |
| "loss": 0.2828, |
| "step": 1145 |
| }, |
| { |
| "epoch": 2.0758122743682312, |
| "grad_norm": 1.4064489033689571, |
| "learning_rate": 1.585198555956679e-05, |
| "loss": 0.2751, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.0848375451263537, |
| "grad_norm": 1.4888952106078595, |
| "learning_rate": 1.5833935018050543e-05, |
| "loss": 0.2785, |
| "step": 1155 |
| }, |
| { |
| "epoch": 2.0938628158844765, |
| "grad_norm": 1.5202276448854608, |
| "learning_rate": 1.5815884476534297e-05, |
| "loss": 0.2728, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.1028880866425994, |
| "grad_norm": 1.4911272606877095, |
| "learning_rate": 1.579783393501805e-05, |
| "loss": 0.2735, |
| "step": 1165 |
| }, |
| { |
| "epoch": 2.111913357400722, |
| "grad_norm": 1.5345612677088643, |
| "learning_rate": 1.5779783393501805e-05, |
| "loss": 0.2751, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.1209386281588447, |
| "grad_norm": 1.470603227078021, |
| "learning_rate": 1.5761732851985562e-05, |
| "loss": 0.2799, |
| "step": 1175 |
| }, |
| { |
| "epoch": 2.1299638989169676, |
| "grad_norm": 1.6228746622020027, |
| "learning_rate": 1.5743682310469316e-05, |
| "loss": 0.2751, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.1389891696750905, |
| "grad_norm": 1.5092478918219132, |
| "learning_rate": 1.572563176895307e-05, |
| "loss": 0.2837, |
| "step": 1185 |
| }, |
| { |
| "epoch": 2.148014440433213, |
| "grad_norm": 1.5022801045910057, |
| "learning_rate": 1.5707581227436823e-05, |
| "loss": 0.2806, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.1570397111913358, |
| "grad_norm": 1.5037599452460795, |
| "learning_rate": 1.568953068592058e-05, |
| "loss": 0.2767, |
| "step": 1195 |
| }, |
| { |
| "epoch": 2.1660649819494586, |
| "grad_norm": 1.579515606540816, |
| "learning_rate": 1.5671480144404334e-05, |
| "loss": 0.2849, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.175090252707581, |
| "grad_norm": 1.4754220940034453, |
| "learning_rate": 1.5653429602888088e-05, |
| "loss": 0.2791, |
| "step": 1205 |
| }, |
| { |
| "epoch": 2.184115523465704, |
| "grad_norm": 1.3959522376935156, |
| "learning_rate": 1.5635379061371842e-05, |
| "loss": 0.2881, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.193140794223827, |
| "grad_norm": 1.5444043202318694, |
| "learning_rate": 1.5617328519855596e-05, |
| "loss": 0.2786, |
| "step": 1215 |
| }, |
| { |
| "epoch": 2.2021660649819497, |
| "grad_norm": 1.4479480310561215, |
| "learning_rate": 1.559927797833935e-05, |
| "loss": 0.2797, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.211191335740072, |
| "grad_norm": 1.3937802391741012, |
| "learning_rate": 1.5581227436823107e-05, |
| "loss": 0.2799, |
| "step": 1225 |
| }, |
| { |
| "epoch": 2.220216606498195, |
| "grad_norm": 1.4393031837130634, |
| "learning_rate": 1.556317689530686e-05, |
| "loss": 0.2823, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.229241877256318, |
| "grad_norm": 1.6426238576078906, |
| "learning_rate": 1.5545126353790614e-05, |
| "loss": 0.2784, |
| "step": 1235 |
| }, |
| { |
| "epoch": 2.2382671480144403, |
| "grad_norm": 1.6072066180424502, |
| "learning_rate": 1.552707581227437e-05, |
| "loss": 0.2872, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.247292418772563, |
| "grad_norm": 1.3689820360945464, |
| "learning_rate": 1.5509025270758125e-05, |
| "loss": 0.2812, |
| "step": 1245 |
| }, |
| { |
| "epoch": 2.256317689530686, |
| "grad_norm": 1.363835208538375, |
| "learning_rate": 1.549097472924188e-05, |
| "loss": 0.283, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.265342960288809, |
| "grad_norm": 1.6184406483983944, |
| "learning_rate": 1.5472924187725633e-05, |
| "loss": 0.2819, |
| "step": 1255 |
| }, |
| { |
| "epoch": 2.2743682310469313, |
| "grad_norm": 1.452212492826692, |
| "learning_rate": 1.5454873646209387e-05, |
| "loss": 0.2886, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.283393501805054, |
| "grad_norm": 1.3645649562334174, |
| "learning_rate": 1.543682310469314e-05, |
| "loss": 0.2817, |
| "step": 1265 |
| }, |
| { |
| "epoch": 2.292418772563177, |
| "grad_norm": 1.503443086552838, |
| "learning_rate": 1.5418772563176895e-05, |
| "loss": 0.2816, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.3014440433212995, |
| "grad_norm": 1.4467084497351497, |
| "learning_rate": 1.5400722021660652e-05, |
| "loss": 0.2857, |
| "step": 1275 |
| }, |
| { |
| "epoch": 2.3104693140794224, |
| "grad_norm": 1.3905500897215815, |
| "learning_rate": 1.5382671480144406e-05, |
| "loss": 0.2858, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.3194945848375452, |
| "grad_norm": 1.5392545695875637, |
| "learning_rate": 1.536462093862816e-05, |
| "loss": 0.2828, |
| "step": 1285 |
| }, |
| { |
| "epoch": 2.328519855595668, |
| "grad_norm": 1.5670788162913514, |
| "learning_rate": 1.5346570397111917e-05, |
| "loss": 0.2819, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.3375451263537905, |
| "grad_norm": 1.4159881393523641, |
| "learning_rate": 1.532851985559567e-05, |
| "loss": 0.2824, |
| "step": 1295 |
| }, |
| { |
| "epoch": 2.3465703971119134, |
| "grad_norm": 1.4831615682921662, |
| "learning_rate": 1.5310469314079424e-05, |
| "loss": 0.2865, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.3555956678700363, |
| "grad_norm": 1.4219138664036257, |
| "learning_rate": 1.5292418772563178e-05, |
| "loss": 0.2868, |
| "step": 1305 |
| }, |
| { |
| "epoch": 2.3646209386281587, |
| "grad_norm": 1.512943638670528, |
| "learning_rate": 1.5274368231046932e-05, |
| "loss": 0.2808, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.3736462093862816, |
| "grad_norm": 1.5368411160330724, |
| "learning_rate": 1.5256317689530686e-05, |
| "loss": 0.2818, |
| "step": 1315 |
| }, |
| { |
| "epoch": 2.3826714801444044, |
| "grad_norm": 1.4589365443912974, |
| "learning_rate": 1.5238267148014441e-05, |
| "loss": 0.2858, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.3916967509025273, |
| "grad_norm": 4.062153022713927, |
| "learning_rate": 1.5220216606498197e-05, |
| "loss": 0.2859, |
| "step": 1325 |
| }, |
| { |
| "epoch": 2.4007220216606497, |
| "grad_norm": 1.5707167014303813, |
| "learning_rate": 1.520216606498195e-05, |
| "loss": 0.2848, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.4097472924187726, |
| "grad_norm": 1.435828117165283, |
| "learning_rate": 1.5184115523465706e-05, |
| "loss": 0.2882, |
| "step": 1335 |
| }, |
| { |
| "epoch": 2.4187725631768955, |
| "grad_norm": 1.4275643368334596, |
| "learning_rate": 1.516606498194946e-05, |
| "loss": 0.2867, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.427797833935018, |
| "grad_norm": 1.5650882814331575, |
| "learning_rate": 1.5148014440433214e-05, |
| "loss": 0.2802, |
| "step": 1345 |
| }, |
| { |
| "epoch": 2.436823104693141, |
| "grad_norm": 1.6485980287400839, |
| "learning_rate": 1.512996389891697e-05, |
| "loss": 0.2876, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.4458483754512637, |
| "grad_norm": 1.7648222520273187, |
| "learning_rate": 1.5111913357400723e-05, |
| "loss": 0.2918, |
| "step": 1355 |
| }, |
| { |
| "epoch": 2.4548736462093865, |
| "grad_norm": 1.3897537891471694, |
| "learning_rate": 1.5093862815884477e-05, |
| "loss": 0.2827, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.463898916967509, |
| "grad_norm": 1.363449811555752, |
| "learning_rate": 1.5075812274368234e-05, |
| "loss": 0.2755, |
| "step": 1365 |
| }, |
| { |
| "epoch": 2.472924187725632, |
| "grad_norm": 1.5054780686538907, |
| "learning_rate": 1.5057761732851988e-05, |
| "loss": 0.2818, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.4819494584837547, |
| "grad_norm": 1.4402872956422859, |
| "learning_rate": 1.5039711191335742e-05, |
| "loss": 0.2806, |
| "step": 1375 |
| }, |
| { |
| "epoch": 2.490974729241877, |
| "grad_norm": 1.531753267929379, |
| "learning_rate": 1.5021660649819495e-05, |
| "loss": 0.281, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 1.2873311123663773, |
| "learning_rate": 1.5003610108303251e-05, |
| "loss": 0.2816, |
| "step": 1385 |
| }, |
| { |
| "epoch": 2.509025270758123, |
| "grad_norm": 1.3443259078812042, |
| "learning_rate": 1.4985559566787005e-05, |
| "loss": 0.2816, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.5180505415162457, |
| "grad_norm": 1.5434631176695652, |
| "learning_rate": 1.4967509025270759e-05, |
| "loss": 0.2819, |
| "step": 1395 |
| }, |
| { |
| "epoch": 2.527075812274368, |
| "grad_norm": 1.362978897038253, |
| "learning_rate": 1.4949458483754512e-05, |
| "loss": 0.2794, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.536101083032491, |
| "grad_norm": 1.3819832377162544, |
| "learning_rate": 1.4931407942238268e-05, |
| "loss": 0.2868, |
| "step": 1405 |
| }, |
| { |
| "epoch": 2.5451263537906135, |
| "grad_norm": 1.6555554545565292, |
| "learning_rate": 1.4913357400722023e-05, |
| "loss": 0.2794, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.5541516245487363, |
| "grad_norm": 1.3799356893593522, |
| "learning_rate": 1.4895306859205779e-05, |
| "loss": 0.2895, |
| "step": 1415 |
| }, |
| { |
| "epoch": 2.563176895306859, |
| "grad_norm": 1.3978197779714834, |
| "learning_rate": 1.4877256317689533e-05, |
| "loss": 0.2823, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.572202166064982, |
| "grad_norm": 1.5752923039032896, |
| "learning_rate": 1.4859205776173287e-05, |
| "loss": 0.2864, |
| "step": 1425 |
| }, |
| { |
| "epoch": 2.581227436823105, |
| "grad_norm": 1.5198542385453242, |
| "learning_rate": 1.484115523465704e-05, |
| "loss": 0.2895, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.5902527075812274, |
| "grad_norm": 1.5294363816313867, |
| "learning_rate": 1.4823104693140796e-05, |
| "loss": 0.2841, |
| "step": 1435 |
| }, |
| { |
| "epoch": 2.5992779783393503, |
| "grad_norm": 1.6460098424826168, |
| "learning_rate": 1.480505415162455e-05, |
| "loss": 0.2905, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.6083032490974727, |
| "grad_norm": 1.4396643499754782, |
| "learning_rate": 1.4787003610108304e-05, |
| "loss": 0.2839, |
| "step": 1445 |
| }, |
| { |
| "epoch": 2.6173285198555956, |
| "grad_norm": 1.4770734853739884, |
| "learning_rate": 1.4768953068592057e-05, |
| "loss": 0.285, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.6263537906137184, |
| "grad_norm": 1.5925936800627583, |
| "learning_rate": 1.4750902527075815e-05, |
| "loss": 0.2939, |
| "step": 1455 |
| }, |
| { |
| "epoch": 2.6353790613718413, |
| "grad_norm": 1.3085799141153367, |
| "learning_rate": 1.4732851985559568e-05, |
| "loss": 0.2755, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.644404332129964, |
| "grad_norm": 1.3774680740883536, |
| "learning_rate": 1.4714801444043322e-05, |
| "loss": 0.2808, |
| "step": 1465 |
| }, |
| { |
| "epoch": 2.6534296028880866, |
| "grad_norm": 1.4223794368692813, |
| "learning_rate": 1.4696750902527078e-05, |
| "loss": 0.2767, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.6624548736462095, |
| "grad_norm": 1.579637524626807, |
| "learning_rate": 1.4678700361010832e-05, |
| "loss": 0.2849, |
| "step": 1475 |
| }, |
| { |
| "epoch": 2.671480144404332, |
| "grad_norm": 1.393710344111409, |
| "learning_rate": 1.4660649819494585e-05, |
| "loss": 0.2768, |
| "step": 1480 |
| }, |
| { |
| "epoch": 2.6805054151624548, |
| "grad_norm": 1.36136102500023, |
| "learning_rate": 1.464259927797834e-05, |
| "loss": 0.2816, |
| "step": 1485 |
| }, |
| { |
| "epoch": 2.6895306859205776, |
| "grad_norm": 1.4211824221206752, |
| "learning_rate": 1.4624548736462095e-05, |
| "loss": 0.2856, |
| "step": 1490 |
| }, |
| { |
| "epoch": 2.6985559566787005, |
| "grad_norm": 1.4156029806705734, |
| "learning_rate": 1.460649819494585e-05, |
| "loss": 0.2783, |
| "step": 1495 |
| }, |
| { |
| "epoch": 2.707581227436823, |
| "grad_norm": 1.523501338124956, |
| "learning_rate": 1.4588447653429606e-05, |
| "loss": 0.2872, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.716606498194946, |
| "grad_norm": 1.472818222499458, |
| "learning_rate": 1.457039711191336e-05, |
| "loss": 0.2806, |
| "step": 1505 |
| }, |
| { |
| "epoch": 2.7256317689530687, |
| "grad_norm": 1.3839972224563968, |
| "learning_rate": 1.4552346570397113e-05, |
| "loss": 0.2887, |
| "step": 1510 |
| }, |
| { |
| "epoch": 2.734657039711191, |
| "grad_norm": 1.4093341174867682, |
| "learning_rate": 1.4534296028880867e-05, |
| "loss": 0.2812, |
| "step": 1515 |
| }, |
| { |
| "epoch": 2.743682310469314, |
| "grad_norm": 1.4227505403261873, |
| "learning_rate": 1.4516245487364623e-05, |
| "loss": 0.2873, |
| "step": 1520 |
| }, |
| { |
| "epoch": 2.752707581227437, |
| "grad_norm": 1.532809014546811, |
| "learning_rate": 1.4498194945848376e-05, |
| "loss": 0.2804, |
| "step": 1525 |
| }, |
| { |
| "epoch": 2.7617328519855597, |
| "grad_norm": 1.5838121158952596, |
| "learning_rate": 1.448014440433213e-05, |
| "loss": 0.2841, |
| "step": 1530 |
| }, |
| { |
| "epoch": 2.770758122743682, |
| "grad_norm": 1.463955244062951, |
| "learning_rate": 1.4462093862815884e-05, |
| "loss": 0.2833, |
| "step": 1535 |
| }, |
| { |
| "epoch": 2.779783393501805, |
| "grad_norm": 1.5918158548066845, |
| "learning_rate": 1.4444043321299641e-05, |
| "loss": 0.2815, |
| "step": 1540 |
| }, |
| { |
| "epoch": 2.788808664259928, |
| "grad_norm": 1.4805453499007415, |
| "learning_rate": 1.4425992779783395e-05, |
| "loss": 0.2811, |
| "step": 1545 |
| }, |
| { |
| "epoch": 2.7978339350180503, |
| "grad_norm": 1.5502318521124943, |
| "learning_rate": 1.4407942238267149e-05, |
| "loss": 0.28, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.806859205776173, |
| "grad_norm": 1.4288682445201122, |
| "learning_rate": 1.4389891696750904e-05, |
| "loss": 0.2868, |
| "step": 1555 |
| }, |
| { |
| "epoch": 2.815884476534296, |
| "grad_norm": 1.4010775386857144, |
| "learning_rate": 1.4371841155234658e-05, |
| "loss": 0.2889, |
| "step": 1560 |
| }, |
| { |
| "epoch": 2.824909747292419, |
| "grad_norm": 1.4238966947086382, |
| "learning_rate": 1.4353790613718412e-05, |
| "loss": 0.2797, |
| "step": 1565 |
| }, |
| { |
| "epoch": 2.8339350180505414, |
| "grad_norm": 1.6701165033755396, |
| "learning_rate": 1.4335740072202166e-05, |
| "loss": 0.2821, |
| "step": 1570 |
| }, |
| { |
| "epoch": 2.8429602888086642, |
| "grad_norm": 1.52168185280516, |
| "learning_rate": 1.4317689530685921e-05, |
| "loss": 0.2836, |
| "step": 1575 |
| }, |
| { |
| "epoch": 2.851985559566787, |
| "grad_norm": 1.4368504165059217, |
| "learning_rate": 1.4299638989169675e-05, |
| "loss": 0.2821, |
| "step": 1580 |
| }, |
| { |
| "epoch": 2.8610108303249095, |
| "grad_norm": 1.5537901610258407, |
| "learning_rate": 1.4281588447653432e-05, |
| "loss": 0.2767, |
| "step": 1585 |
| }, |
| { |
| "epoch": 2.8700361010830324, |
| "grad_norm": 1.5418670823823388, |
| "learning_rate": 1.4263537906137186e-05, |
| "loss": 0.286, |
| "step": 1590 |
| }, |
| { |
| "epoch": 2.8790613718411553, |
| "grad_norm": 1.346173892816451, |
| "learning_rate": 1.424548736462094e-05, |
| "loss": 0.2855, |
| "step": 1595 |
| }, |
| { |
| "epoch": 2.888086642599278, |
| "grad_norm": 1.4152264182563925, |
| "learning_rate": 1.4227436823104694e-05, |
| "loss": 0.2815, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.8971119133574006, |
| "grad_norm": 1.3391376662233245, |
| "learning_rate": 1.420938628158845e-05, |
| "loss": 0.2851, |
| "step": 1605 |
| }, |
| { |
| "epoch": 2.9061371841155235, |
| "grad_norm": 1.4435874024341668, |
| "learning_rate": 1.4191335740072203e-05, |
| "loss": 0.2796, |
| "step": 1610 |
| }, |
| { |
| "epoch": 2.9151624548736463, |
| "grad_norm": 1.3751003396664772, |
| "learning_rate": 1.4173285198555957e-05, |
| "loss": 0.2825, |
| "step": 1615 |
| }, |
| { |
| "epoch": 2.9241877256317688, |
| "grad_norm": 1.5204342697256372, |
| "learning_rate": 1.4155234657039711e-05, |
| "loss": 0.2841, |
| "step": 1620 |
| }, |
| { |
| "epoch": 2.9332129963898916, |
| "grad_norm": 1.6012862613909276, |
| "learning_rate": 1.4137184115523468e-05, |
| "loss": 0.2772, |
| "step": 1625 |
| }, |
| { |
| "epoch": 2.9422382671480145, |
| "grad_norm": 1.40233355498611, |
| "learning_rate": 1.4119133574007222e-05, |
| "loss": 0.2813, |
| "step": 1630 |
| }, |
| { |
| "epoch": 2.9512635379061374, |
| "grad_norm": 1.4678056138681723, |
| "learning_rate": 1.4101083032490976e-05, |
| "loss": 0.2833, |
| "step": 1635 |
| }, |
| { |
| "epoch": 2.96028880866426, |
| "grad_norm": 1.5256510122152633, |
| "learning_rate": 1.4083032490974731e-05, |
| "loss": 0.2875, |
| "step": 1640 |
| }, |
| { |
| "epoch": 2.9693140794223827, |
| "grad_norm": 1.5409003183834475, |
| "learning_rate": 1.4064981949458485e-05, |
| "loss": 0.2907, |
| "step": 1645 |
| }, |
| { |
| "epoch": 2.9783393501805056, |
| "grad_norm": 1.5329424357705386, |
| "learning_rate": 1.4046931407942239e-05, |
| "loss": 0.2795, |
| "step": 1650 |
| }, |
| { |
| "epoch": 2.987364620938628, |
| "grad_norm": 1.4730310617789872, |
| "learning_rate": 1.4028880866425993e-05, |
| "loss": 0.2818, |
| "step": 1655 |
| }, |
| { |
| "epoch": 2.996389891696751, |
| "grad_norm": 1.51332909920422, |
| "learning_rate": 1.4010830324909748e-05, |
| "loss": 0.2813, |
| "step": 1660 |
| }, |
| { |
| "epoch": 3.0054151624548737, |
| "grad_norm": 1.1148635775562197, |
| "learning_rate": 1.3992779783393502e-05, |
| "loss": 0.2436, |
| "step": 1665 |
| }, |
| { |
| "epoch": 3.0144404332129966, |
| "grad_norm": 1.358530663003378, |
| "learning_rate": 1.397472924187726e-05, |
| "loss": 0.2095, |
| "step": 1670 |
| }, |
| { |
| "epoch": 3.023465703971119, |
| "grad_norm": 1.3975749965062991, |
| "learning_rate": 1.3956678700361013e-05, |
| "loss": 0.2111, |
| "step": 1675 |
| }, |
| { |
| "epoch": 3.032490974729242, |
| "grad_norm": 1.2464319048586523, |
| "learning_rate": 1.3938628158844767e-05, |
| "loss": 0.2168, |
| "step": 1680 |
| }, |
| { |
| "epoch": 3.0415162454873648, |
| "grad_norm": 1.2891229687458905, |
| "learning_rate": 1.392057761732852e-05, |
| "loss": 0.209, |
| "step": 1685 |
| }, |
| { |
| "epoch": 3.050541516245487, |
| "grad_norm": 1.4037828122476248, |
| "learning_rate": 1.3902527075812276e-05, |
| "loss": 0.2153, |
| "step": 1690 |
| }, |
| { |
| "epoch": 3.05956678700361, |
| "grad_norm": 1.2235806246519516, |
| "learning_rate": 1.388447653429603e-05, |
| "loss": 0.2096, |
| "step": 1695 |
| }, |
| { |
| "epoch": 3.068592057761733, |
| "grad_norm": 1.378724047451379, |
| "learning_rate": 1.3866425992779784e-05, |
| "loss": 0.2085, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.077617328519856, |
| "grad_norm": 1.382858335186212, |
| "learning_rate": 1.3848375451263538e-05, |
| "loss": 0.2204, |
| "step": 1705 |
| }, |
| { |
| "epoch": 3.0866425992779782, |
| "grad_norm": 1.3137128844249182, |
| "learning_rate": 1.3830324909747293e-05, |
| "loss": 0.2162, |
| "step": 1710 |
| }, |
| { |
| "epoch": 3.095667870036101, |
| "grad_norm": 1.2029503152905936, |
| "learning_rate": 1.3812274368231049e-05, |
| "loss": 0.212, |
| "step": 1715 |
| }, |
| { |
| "epoch": 3.104693140794224, |
| "grad_norm": 1.3679968447380255, |
| "learning_rate": 1.3794223826714802e-05, |
| "loss": 0.2112, |
| "step": 1720 |
| }, |
| { |
| "epoch": 3.1137184115523464, |
| "grad_norm": 1.4087458694349797, |
| "learning_rate": 1.3776173285198558e-05, |
| "loss": 0.2124, |
| "step": 1725 |
| }, |
| { |
| "epoch": 3.1227436823104693, |
| "grad_norm": 1.2653578136284922, |
| "learning_rate": 1.3758122743682312e-05, |
| "loss": 0.2138, |
| "step": 1730 |
| }, |
| { |
| "epoch": 3.131768953068592, |
| "grad_norm": 1.3112584499411382, |
| "learning_rate": 1.3740072202166066e-05, |
| "loss": 0.2163, |
| "step": 1735 |
| }, |
| { |
| "epoch": 3.140794223826715, |
| "grad_norm": 1.4058232641289103, |
| "learning_rate": 1.372202166064982e-05, |
| "loss": 0.2159, |
| "step": 1740 |
| }, |
| { |
| "epoch": 3.1498194945848375, |
| "grad_norm": 1.41030881061776, |
| "learning_rate": 1.3703971119133575e-05, |
| "loss": 0.2145, |
| "step": 1745 |
| }, |
| { |
| "epoch": 3.1588447653429603, |
| "grad_norm": 1.5104671605422084, |
| "learning_rate": 1.3685920577617329e-05, |
| "loss": 0.2139, |
| "step": 1750 |
| }, |
| { |
| "epoch": 3.167870036101083, |
| "grad_norm": 1.2630974507680133, |
| "learning_rate": 1.3667870036101086e-05, |
| "loss": 0.2157, |
| "step": 1755 |
| }, |
| { |
| "epoch": 3.1768953068592056, |
| "grad_norm": 1.272128771203331, |
| "learning_rate": 1.364981949458484e-05, |
| "loss": 0.2161, |
| "step": 1760 |
| }, |
| { |
| "epoch": 3.1859205776173285, |
| "grad_norm": 1.3478004143294164, |
| "learning_rate": 1.3631768953068594e-05, |
| "loss": 0.2163, |
| "step": 1765 |
| }, |
| { |
| "epoch": 3.1949458483754514, |
| "grad_norm": 1.450785422963031, |
| "learning_rate": 1.3613718411552347e-05, |
| "loss": 0.2073, |
| "step": 1770 |
| }, |
| { |
| "epoch": 3.2039711191335742, |
| "grad_norm": 1.4737969733507372, |
| "learning_rate": 1.3595667870036103e-05, |
| "loss": 0.214, |
| "step": 1775 |
| }, |
| { |
| "epoch": 3.2129963898916967, |
| "grad_norm": 1.55495138656805, |
| "learning_rate": 1.3577617328519857e-05, |
| "loss": 0.217, |
| "step": 1780 |
| }, |
| { |
| "epoch": 3.2220216606498195, |
| "grad_norm": 1.301565356658425, |
| "learning_rate": 1.355956678700361e-05, |
| "loss": 0.2125, |
| "step": 1785 |
| }, |
| { |
| "epoch": 3.2310469314079424, |
| "grad_norm": 1.3526575231198374, |
| "learning_rate": 1.3541516245487364e-05, |
| "loss": 0.2139, |
| "step": 1790 |
| }, |
| { |
| "epoch": 3.240072202166065, |
| "grad_norm": 1.3480835110678375, |
| "learning_rate": 1.352346570397112e-05, |
| "loss": 0.2163, |
| "step": 1795 |
| }, |
| { |
| "epoch": 3.2490974729241877, |
| "grad_norm": 1.585271243780268, |
| "learning_rate": 1.3505415162454875e-05, |
| "loss": 0.2172, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.2581227436823106, |
| "grad_norm": 1.3914377175448838, |
| "learning_rate": 1.348736462093863e-05, |
| "loss": 0.2175, |
| "step": 1805 |
| }, |
| { |
| "epoch": 3.2671480144404335, |
| "grad_norm": 1.3096937764073042, |
| "learning_rate": 1.3469314079422385e-05, |
| "loss": 0.2161, |
| "step": 1810 |
| }, |
| { |
| "epoch": 3.276173285198556, |
| "grad_norm": 1.4025247600726756, |
| "learning_rate": 1.3451263537906139e-05, |
| "loss": 0.2192, |
| "step": 1815 |
| }, |
| { |
| "epoch": 3.2851985559566788, |
| "grad_norm": 1.341953244519878, |
| "learning_rate": 1.3433212996389892e-05, |
| "loss": 0.2193, |
| "step": 1820 |
| }, |
| { |
| "epoch": 3.2942238267148016, |
| "grad_norm": 1.1736050435468526, |
| "learning_rate": 1.3415162454873646e-05, |
| "loss": 0.2169, |
| "step": 1825 |
| }, |
| { |
| "epoch": 3.303249097472924, |
| "grad_norm": 7.343497355928463, |
| "learning_rate": 1.3397111913357402e-05, |
| "loss": 0.2221, |
| "step": 1830 |
| }, |
| { |
| "epoch": 3.312274368231047, |
| "grad_norm": 1.371834881522216, |
| "learning_rate": 1.3379061371841155e-05, |
| "loss": 0.2199, |
| "step": 1835 |
| }, |
| { |
| "epoch": 3.32129963898917, |
| "grad_norm": 1.4365047454230295, |
| "learning_rate": 1.336101083032491e-05, |
| "loss": 0.2132, |
| "step": 1840 |
| }, |
| { |
| "epoch": 3.3303249097472922, |
| "grad_norm": 1.272421653704436, |
| "learning_rate": 1.3342960288808667e-05, |
| "loss": 0.2184, |
| "step": 1845 |
| }, |
| { |
| "epoch": 3.339350180505415, |
| "grad_norm": 1.4316028096472446, |
| "learning_rate": 1.332490974729242e-05, |
| "loss": 0.2132, |
| "step": 1850 |
| }, |
| { |
| "epoch": 3.348375451263538, |
| "grad_norm": 1.2759638728894496, |
| "learning_rate": 1.3306859205776174e-05, |
| "loss": 0.2171, |
| "step": 1855 |
| }, |
| { |
| "epoch": 3.357400722021661, |
| "grad_norm": 1.3243009878900587, |
| "learning_rate": 1.328880866425993e-05, |
| "loss": 0.2187, |
| "step": 1860 |
| }, |
| { |
| "epoch": 3.3664259927797833, |
| "grad_norm": 1.357548066354826, |
| "learning_rate": 1.3270758122743683e-05, |
| "loss": 0.218, |
| "step": 1865 |
| }, |
| { |
| "epoch": 3.375451263537906, |
| "grad_norm": 1.3425006805058106, |
| "learning_rate": 1.3252707581227437e-05, |
| "loss": 0.217, |
| "step": 1870 |
| }, |
| { |
| "epoch": 3.384476534296029, |
| "grad_norm": 1.285802529815462, |
| "learning_rate": 1.3234657039711191e-05, |
| "loss": 0.216, |
| "step": 1875 |
| }, |
| { |
| "epoch": 3.3935018050541514, |
| "grad_norm": 1.2778875094894446, |
| "learning_rate": 1.3216606498194947e-05, |
| "loss": 0.2193, |
| "step": 1880 |
| }, |
| { |
| "epoch": 3.4025270758122743, |
| "grad_norm": 1.1908353789550035, |
| "learning_rate": 1.3198555956678702e-05, |
| "loss": 0.2161, |
| "step": 1885 |
| }, |
| { |
| "epoch": 3.411552346570397, |
| "grad_norm": 1.34603474003137, |
| "learning_rate": 1.3180505415162456e-05, |
| "loss": 0.2141, |
| "step": 1890 |
| }, |
| { |
| "epoch": 3.4205776173285196, |
| "grad_norm": 1.4297727153398665, |
| "learning_rate": 1.3162454873646211e-05, |
| "loss": 0.2237, |
| "step": 1895 |
| }, |
| { |
| "epoch": 3.4296028880866425, |
| "grad_norm": 1.3837512629017574, |
| "learning_rate": 1.3144404332129965e-05, |
| "loss": 0.2214, |
| "step": 1900 |
| }, |
| { |
| "epoch": 3.4386281588447654, |
| "grad_norm": 1.4387141423605057, |
| "learning_rate": 1.3126353790613719e-05, |
| "loss": 0.2187, |
| "step": 1905 |
| }, |
| { |
| "epoch": 3.4476534296028882, |
| "grad_norm": 1.2799805992130007, |
| "learning_rate": 1.3108303249097475e-05, |
| "loss": 0.2179, |
| "step": 1910 |
| }, |
| { |
| "epoch": 3.4566787003610107, |
| "grad_norm": 1.4835446521559619, |
| "learning_rate": 1.3090252707581228e-05, |
| "loss": 0.2203, |
| "step": 1915 |
| }, |
| { |
| "epoch": 3.4657039711191335, |
| "grad_norm": 1.341999213127749, |
| "learning_rate": 1.3072202166064982e-05, |
| "loss": 0.2168, |
| "step": 1920 |
| }, |
| { |
| "epoch": 3.4747292418772564, |
| "grad_norm": 1.3763087376806546, |
| "learning_rate": 1.3054151624548736e-05, |
| "loss": 0.2149, |
| "step": 1925 |
| }, |
| { |
| "epoch": 3.483754512635379, |
| "grad_norm": 1.4007971114232958, |
| "learning_rate": 1.3036101083032493e-05, |
| "loss": 0.2182, |
| "step": 1930 |
| }, |
| { |
| "epoch": 3.4927797833935017, |
| "grad_norm": 1.4572957861270215, |
| "learning_rate": 1.3018050541516247e-05, |
| "loss": 0.2147, |
| "step": 1935 |
| }, |
| { |
| "epoch": 3.5018050541516246, |
| "grad_norm": 1.3722288775763722, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 0.2198, |
| "step": 1940 |
| }, |
| { |
| "epoch": 3.5108303249097474, |
| "grad_norm": 1.3099638901670316, |
| "learning_rate": 1.2981949458483756e-05, |
| "loss": 0.2195, |
| "step": 1945 |
| }, |
| { |
| "epoch": 3.51985559566787, |
| "grad_norm": 1.2794841153864642, |
| "learning_rate": 1.296389891696751e-05, |
| "loss": 0.2181, |
| "step": 1950 |
| }, |
| { |
| "epoch": 3.5288808664259927, |
| "grad_norm": 1.4143673780025412, |
| "learning_rate": 1.2945848375451264e-05, |
| "loss": 0.2159, |
| "step": 1955 |
| }, |
| { |
| "epoch": 3.5379061371841156, |
| "grad_norm": 1.2691083266416614, |
| "learning_rate": 1.2927797833935018e-05, |
| "loss": 0.2145, |
| "step": 1960 |
| }, |
| { |
| "epoch": 3.546931407942238, |
| "grad_norm": 1.6855255358173022, |
| "learning_rate": 1.2909747292418773e-05, |
| "loss": 0.2142, |
| "step": 1965 |
| }, |
| { |
| "epoch": 3.555956678700361, |
| "grad_norm": 1.2577832543255076, |
| "learning_rate": 1.2891696750902527e-05, |
| "loss": 0.2185, |
| "step": 1970 |
| }, |
| { |
| "epoch": 3.564981949458484, |
| "grad_norm": 1.4260534179211517, |
| "learning_rate": 1.2873646209386283e-05, |
| "loss": 0.2227, |
| "step": 1975 |
| }, |
| { |
| "epoch": 3.5740072202166067, |
| "grad_norm": 1.2525086568956194, |
| "learning_rate": 1.2855595667870038e-05, |
| "loss": 0.2193, |
| "step": 1980 |
| }, |
| { |
| "epoch": 3.583032490974729, |
| "grad_norm": 1.2589927076038325, |
| "learning_rate": 1.2837545126353792e-05, |
| "loss": 0.2203, |
| "step": 1985 |
| }, |
| { |
| "epoch": 3.592057761732852, |
| "grad_norm": 1.417146294874885, |
| "learning_rate": 1.2819494584837546e-05, |
| "loss": 0.2172, |
| "step": 1990 |
| }, |
| { |
| "epoch": 3.601083032490975, |
| "grad_norm": 1.201542111015426, |
| "learning_rate": 1.2801444043321301e-05, |
| "loss": 0.2175, |
| "step": 1995 |
| }, |
| { |
| "epoch": 3.6101083032490973, |
| "grad_norm": 1.3003616222573477, |
| "learning_rate": 1.2783393501805055e-05, |
| "loss": 0.2218, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.61913357400722, |
| "grad_norm": 1.2526661517801678, |
| "learning_rate": 1.2765342960288809e-05, |
| "loss": 0.2211, |
| "step": 2005 |
| }, |
| { |
| "epoch": 3.628158844765343, |
| "grad_norm": 1.3562427529698038, |
| "learning_rate": 1.2747292418772563e-05, |
| "loss": 0.2202, |
| "step": 2010 |
| }, |
| { |
| "epoch": 3.637184115523466, |
| "grad_norm": 1.337359110529062, |
| "learning_rate": 1.272924187725632e-05, |
| "loss": 0.2192, |
| "step": 2015 |
| }, |
| { |
| "epoch": 3.6462093862815883, |
| "grad_norm": 1.3247434076106055, |
| "learning_rate": 1.2711191335740074e-05, |
| "loss": 0.2175, |
| "step": 2020 |
| }, |
| { |
| "epoch": 3.655234657039711, |
| "grad_norm": 1.401401481932495, |
| "learning_rate": 1.2693140794223828e-05, |
| "loss": 0.2176, |
| "step": 2025 |
| }, |
| { |
| "epoch": 3.664259927797834, |
| "grad_norm": 1.3989599012921654, |
| "learning_rate": 1.2675090252707583e-05, |
| "loss": 0.2145, |
| "step": 2030 |
| }, |
| { |
| "epoch": 3.6732851985559565, |
| "grad_norm": 1.37942404901644, |
| "learning_rate": 1.2657039711191337e-05, |
| "loss": 0.2176, |
| "step": 2035 |
| }, |
| { |
| "epoch": 3.6823104693140793, |
| "grad_norm": 1.3768859929806074, |
| "learning_rate": 1.263898916967509e-05, |
| "loss": 0.2216, |
| "step": 2040 |
| }, |
| { |
| "epoch": 3.691335740072202, |
| "grad_norm": 1.4119126477817214, |
| "learning_rate": 1.2620938628158845e-05, |
| "loss": 0.2176, |
| "step": 2045 |
| }, |
| { |
| "epoch": 3.700361010830325, |
| "grad_norm": 1.521657968464524, |
| "learning_rate": 1.26028880866426e-05, |
| "loss": 0.2215, |
| "step": 2050 |
| }, |
| { |
| "epoch": 3.7093862815884475, |
| "grad_norm": 1.1673555154434911, |
| "learning_rate": 1.2584837545126354e-05, |
| "loss": 0.2207, |
| "step": 2055 |
| }, |
| { |
| "epoch": 3.7184115523465704, |
| "grad_norm": 1.3165831513135962, |
| "learning_rate": 1.256678700361011e-05, |
| "loss": 0.2199, |
| "step": 2060 |
| }, |
| { |
| "epoch": 3.7274368231046933, |
| "grad_norm": 1.2771969687673677, |
| "learning_rate": 1.2548736462093865e-05, |
| "loss": 0.2174, |
| "step": 2065 |
| }, |
| { |
| "epoch": 3.7364620938628157, |
| "grad_norm": 1.3460833673268793, |
| "learning_rate": 1.2530685920577619e-05, |
| "loss": 0.2176, |
| "step": 2070 |
| }, |
| { |
| "epoch": 3.7454873646209386, |
| "grad_norm": 1.321801578603406, |
| "learning_rate": 1.2512635379061373e-05, |
| "loss": 0.2181, |
| "step": 2075 |
| }, |
| { |
| "epoch": 3.7545126353790614, |
| "grad_norm": 1.296115804685079, |
| "learning_rate": 1.2494584837545128e-05, |
| "loss": 0.2174, |
| "step": 2080 |
| }, |
| { |
| "epoch": 3.7635379061371843, |
| "grad_norm": 1.611992163605043, |
| "learning_rate": 1.2476534296028882e-05, |
| "loss": 0.2218, |
| "step": 2085 |
| }, |
| { |
| "epoch": 3.7725631768953067, |
| "grad_norm": 1.333178205245191, |
| "learning_rate": 1.2458483754512636e-05, |
| "loss": 0.2191, |
| "step": 2090 |
| }, |
| { |
| "epoch": 3.7815884476534296, |
| "grad_norm": 1.3160245659773944, |
| "learning_rate": 1.244043321299639e-05, |
| "loss": 0.2103, |
| "step": 2095 |
| }, |
| { |
| "epoch": 3.7906137184115525, |
| "grad_norm": 1.229821228240215, |
| "learning_rate": 1.2422382671480145e-05, |
| "loss": 0.2211, |
| "step": 2100 |
| }, |
| { |
| "epoch": 3.799638989169675, |
| "grad_norm": 1.4668142178779533, |
| "learning_rate": 1.24043321299639e-05, |
| "loss": 0.2157, |
| "step": 2105 |
| }, |
| { |
| "epoch": 3.808664259927798, |
| "grad_norm": 1.2767987004558847, |
| "learning_rate": 1.2386281588447654e-05, |
| "loss": 0.2139, |
| "step": 2110 |
| }, |
| { |
| "epoch": 3.8176895306859207, |
| "grad_norm": 1.3419257818618695, |
| "learning_rate": 1.236823104693141e-05, |
| "loss": 0.2177, |
| "step": 2115 |
| }, |
| { |
| "epoch": 3.8267148014440435, |
| "grad_norm": 1.3106254825745933, |
| "learning_rate": 1.2350180505415164e-05, |
| "loss": 0.2188, |
| "step": 2120 |
| }, |
| { |
| "epoch": 3.835740072202166, |
| "grad_norm": 1.291543676501794, |
| "learning_rate": 1.2332129963898918e-05, |
| "loss": 0.2211, |
| "step": 2125 |
| }, |
| { |
| "epoch": 3.844765342960289, |
| "grad_norm": 1.4261998506808886, |
| "learning_rate": 1.2314079422382671e-05, |
| "loss": 0.2197, |
| "step": 2130 |
| }, |
| { |
| "epoch": 3.8537906137184117, |
| "grad_norm": 1.358333414575488, |
| "learning_rate": 1.2296028880866427e-05, |
| "loss": 0.2159, |
| "step": 2135 |
| }, |
| { |
| "epoch": 3.862815884476534, |
| "grad_norm": 1.474955377700595, |
| "learning_rate": 1.227797833935018e-05, |
| "loss": 0.2218, |
| "step": 2140 |
| }, |
| { |
| "epoch": 3.871841155234657, |
| "grad_norm": 1.3720904636423812, |
| "learning_rate": 1.2259927797833938e-05, |
| "loss": 0.2177, |
| "step": 2145 |
| }, |
| { |
| "epoch": 3.88086642599278, |
| "grad_norm": 1.5460881931560433, |
| "learning_rate": 1.2241877256317692e-05, |
| "loss": 0.2202, |
| "step": 2150 |
| }, |
| { |
| "epoch": 3.8898916967509027, |
| "grad_norm": 1.372535903476554, |
| "learning_rate": 1.2223826714801446e-05, |
| "loss": 0.2133, |
| "step": 2155 |
| }, |
| { |
| "epoch": 3.898916967509025, |
| "grad_norm": 1.3380599019325592, |
| "learning_rate": 1.22057761732852e-05, |
| "loss": 0.2156, |
| "step": 2160 |
| }, |
| { |
| "epoch": 3.907942238267148, |
| "grad_norm": 1.5215368016858366, |
| "learning_rate": 1.2187725631768955e-05, |
| "loss": 0.2209, |
| "step": 2165 |
| }, |
| { |
| "epoch": 3.916967509025271, |
| "grad_norm": 1.4103734495459146, |
| "learning_rate": 1.2169675090252709e-05, |
| "loss": 0.2156, |
| "step": 2170 |
| }, |
| { |
| "epoch": 3.9259927797833933, |
| "grad_norm": 1.3438329909754207, |
| "learning_rate": 1.2151624548736462e-05, |
| "loss": 0.2199, |
| "step": 2175 |
| }, |
| { |
| "epoch": 3.935018050541516, |
| "grad_norm": 1.2636238619595532, |
| "learning_rate": 1.2133574007220216e-05, |
| "loss": 0.2197, |
| "step": 2180 |
| }, |
| { |
| "epoch": 3.944043321299639, |
| "grad_norm": 1.1840357472659475, |
| "learning_rate": 1.2115523465703972e-05, |
| "loss": 0.2122, |
| "step": 2185 |
| }, |
| { |
| "epoch": 3.953068592057762, |
| "grad_norm": 1.2040616851708978, |
| "learning_rate": 1.2097472924187727e-05, |
| "loss": 0.2185, |
| "step": 2190 |
| }, |
| { |
| "epoch": 3.9620938628158844, |
| "grad_norm": 1.358963573245228, |
| "learning_rate": 1.2079422382671481e-05, |
| "loss": 0.2153, |
| "step": 2195 |
| }, |
| { |
| "epoch": 3.9711191335740073, |
| "grad_norm": 1.2933453928461196, |
| "learning_rate": 1.2061371841155237e-05, |
| "loss": 0.2157, |
| "step": 2200 |
| }, |
| { |
| "epoch": 3.98014440433213, |
| "grad_norm": 1.207608106082354, |
| "learning_rate": 1.204332129963899e-05, |
| "loss": 0.2199, |
| "step": 2205 |
| }, |
| { |
| "epoch": 3.9891696750902526, |
| "grad_norm": 1.3581034288222624, |
| "learning_rate": 1.2025270758122744e-05, |
| "loss": 0.2196, |
| "step": 2210 |
| }, |
| { |
| "epoch": 3.9981949458483754, |
| "grad_norm": 1.508352838653625, |
| "learning_rate": 1.2007220216606498e-05, |
| "loss": 0.2219, |
| "step": 2215 |
| }, |
| { |
| "epoch": 4.007220216606498, |
| "grad_norm": 1.1525808259183898, |
| "learning_rate": 1.1989169675090254e-05, |
| "loss": 0.172, |
| "step": 2220 |
| }, |
| { |
| "epoch": 4.016245487364621, |
| "grad_norm": 1.6837096670325025, |
| "learning_rate": 1.1971119133574007e-05, |
| "loss": 0.1473, |
| "step": 2225 |
| }, |
| { |
| "epoch": 4.025270758122744, |
| "grad_norm": 1.3622634803237335, |
| "learning_rate": 1.1953068592057765e-05, |
| "loss": 0.1424, |
| "step": 2230 |
| }, |
| { |
| "epoch": 4.034296028880866, |
| "grad_norm": 1.167482488838041, |
| "learning_rate": 1.1935018050541518e-05, |
| "loss": 0.1428, |
| "step": 2235 |
| }, |
| { |
| "epoch": 4.043321299638989, |
| "grad_norm": 1.370361726176911, |
| "learning_rate": 1.1916967509025272e-05, |
| "loss": 0.1385, |
| "step": 2240 |
| }, |
| { |
| "epoch": 4.052346570397112, |
| "grad_norm": 1.2641058446971232, |
| "learning_rate": 1.1898916967509026e-05, |
| "loss": 0.1358, |
| "step": 2245 |
| }, |
| { |
| "epoch": 4.061371841155235, |
| "grad_norm": 1.4838069133238292, |
| "learning_rate": 1.1880866425992782e-05, |
| "loss": 0.1369, |
| "step": 2250 |
| }, |
| { |
| "epoch": 4.0703971119133575, |
| "grad_norm": 1.411880125524948, |
| "learning_rate": 1.1862815884476535e-05, |
| "loss": 0.1376, |
| "step": 2255 |
| }, |
| { |
| "epoch": 4.07942238267148, |
| "grad_norm": 1.532801179696419, |
| "learning_rate": 1.184476534296029e-05, |
| "loss": 0.144, |
| "step": 2260 |
| }, |
| { |
| "epoch": 4.088447653429603, |
| "grad_norm": 1.2722280030975663, |
| "learning_rate": 1.1826714801444043e-05, |
| "loss": 0.1377, |
| "step": 2265 |
| }, |
| { |
| "epoch": 4.097472924187725, |
| "grad_norm": 1.257371634404589, |
| "learning_rate": 1.1808664259927799e-05, |
| "loss": 0.1376, |
| "step": 2270 |
| }, |
| { |
| "epoch": 4.106498194945848, |
| "grad_norm": 1.2867994012584483, |
| "learning_rate": 1.1790613718411554e-05, |
| "loss": 0.1438, |
| "step": 2275 |
| }, |
| { |
| "epoch": 4.115523465703971, |
| "grad_norm": 1.3235714594660852, |
| "learning_rate": 1.1772563176895308e-05, |
| "loss": 0.1375, |
| "step": 2280 |
| }, |
| { |
| "epoch": 4.124548736462094, |
| "grad_norm": 1.3538854718074433, |
| "learning_rate": 1.1754512635379063e-05, |
| "loss": 0.1434, |
| "step": 2285 |
| }, |
| { |
| "epoch": 4.133574007220217, |
| "grad_norm": 1.3387220340603523, |
| "learning_rate": 1.1736462093862817e-05, |
| "loss": 0.1439, |
| "step": 2290 |
| }, |
| { |
| "epoch": 4.14259927797834, |
| "grad_norm": 1.3342275390499512, |
| "learning_rate": 1.1718411552346571e-05, |
| "loss": 0.1423, |
| "step": 2295 |
| }, |
| { |
| "epoch": 4.1516245487364625, |
| "grad_norm": 1.3172292559591101, |
| "learning_rate": 1.1700361010830325e-05, |
| "loss": 0.1397, |
| "step": 2300 |
| }, |
| { |
| "epoch": 4.1606498194945845, |
| "grad_norm": 1.5089183076261925, |
| "learning_rate": 1.168231046931408e-05, |
| "loss": 0.1478, |
| "step": 2305 |
| }, |
| { |
| "epoch": 4.169675090252707, |
| "grad_norm": 1.2882037882410693, |
| "learning_rate": 1.1664259927797834e-05, |
| "loss": 0.1441, |
| "step": 2310 |
| }, |
| { |
| "epoch": 4.17870036101083, |
| "grad_norm": 1.3275271715737593, |
| "learning_rate": 1.1646209386281588e-05, |
| "loss": 0.1467, |
| "step": 2315 |
| }, |
| { |
| "epoch": 4.187725631768953, |
| "grad_norm": 1.26669118474737, |
| "learning_rate": 1.1628158844765345e-05, |
| "loss": 0.1448, |
| "step": 2320 |
| }, |
| { |
| "epoch": 4.196750902527076, |
| "grad_norm": 1.287627972081866, |
| "learning_rate": 1.1610108303249099e-05, |
| "loss": 0.1463, |
| "step": 2325 |
| }, |
| { |
| "epoch": 4.205776173285199, |
| "grad_norm": 1.4698351462515338, |
| "learning_rate": 1.1592057761732853e-05, |
| "loss": 0.1418, |
| "step": 2330 |
| }, |
| { |
| "epoch": 4.214801444043322, |
| "grad_norm": 1.214815481124232, |
| "learning_rate": 1.1574007220216608e-05, |
| "loss": 0.1414, |
| "step": 2335 |
| }, |
| { |
| "epoch": 4.223826714801444, |
| "grad_norm": 1.3143493372944082, |
| "learning_rate": 1.1555956678700362e-05, |
| "loss": 0.1434, |
| "step": 2340 |
| }, |
| { |
| "epoch": 4.2328519855595665, |
| "grad_norm": 1.312607955074974, |
| "learning_rate": 1.1537906137184116e-05, |
| "loss": 0.144, |
| "step": 2345 |
| }, |
| { |
| "epoch": 4.241877256317689, |
| "grad_norm": 1.2190888161416562, |
| "learning_rate": 1.151985559566787e-05, |
| "loss": 0.1426, |
| "step": 2350 |
| }, |
| { |
| "epoch": 4.250902527075812, |
| "grad_norm": 1.3594194563402886, |
| "learning_rate": 1.1501805054151625e-05, |
| "loss": 0.1435, |
| "step": 2355 |
| }, |
| { |
| "epoch": 4.259927797833935, |
| "grad_norm": 1.4782948938985976, |
| "learning_rate": 1.148375451263538e-05, |
| "loss": 0.1454, |
| "step": 2360 |
| }, |
| { |
| "epoch": 4.268953068592058, |
| "grad_norm": 1.336811201239736, |
| "learning_rate": 1.1465703971119135e-05, |
| "loss": 0.1454, |
| "step": 2365 |
| }, |
| { |
| "epoch": 4.277978339350181, |
| "grad_norm": 1.2482577868629918, |
| "learning_rate": 1.144765342960289e-05, |
| "loss": 0.1411, |
| "step": 2370 |
| }, |
| { |
| "epoch": 4.287003610108303, |
| "grad_norm": 1.2732430927693474, |
| "learning_rate": 1.1429602888086644e-05, |
| "loss": 0.1417, |
| "step": 2375 |
| }, |
| { |
| "epoch": 4.296028880866426, |
| "grad_norm": 1.5099576743037075, |
| "learning_rate": 1.1411552346570398e-05, |
| "loss": 0.1448, |
| "step": 2380 |
| }, |
| { |
| "epoch": 4.305054151624549, |
| "grad_norm": 1.289221803177811, |
| "learning_rate": 1.1393501805054152e-05, |
| "loss": 0.147, |
| "step": 2385 |
| }, |
| { |
| "epoch": 4.3140794223826715, |
| "grad_norm": 1.3263143595170082, |
| "learning_rate": 1.1375451263537907e-05, |
| "loss": 0.146, |
| "step": 2390 |
| }, |
| { |
| "epoch": 4.323104693140794, |
| "grad_norm": 1.2413770324582891, |
| "learning_rate": 1.1357400722021661e-05, |
| "loss": 0.1457, |
| "step": 2395 |
| }, |
| { |
| "epoch": 4.332129963898917, |
| "grad_norm": 1.2639388626402195, |
| "learning_rate": 1.1339350180505415e-05, |
| "loss": 0.1507, |
| "step": 2400 |
| }, |
| { |
| "epoch": 4.34115523465704, |
| "grad_norm": 1.264633141154961, |
| "learning_rate": 1.1321299638989172e-05, |
| "loss": 0.1449, |
| "step": 2405 |
| }, |
| { |
| "epoch": 4.350180505415162, |
| "grad_norm": 1.436154267765744, |
| "learning_rate": 1.1303249097472926e-05, |
| "loss": 0.1507, |
| "step": 2410 |
| }, |
| { |
| "epoch": 4.359205776173285, |
| "grad_norm": 1.1582422344120165, |
| "learning_rate": 1.128519855595668e-05, |
| "loss": 0.1484, |
| "step": 2415 |
| }, |
| { |
| "epoch": 4.368231046931408, |
| "grad_norm": 1.279883119070738, |
| "learning_rate": 1.1267148014440435e-05, |
| "loss": 0.1427, |
| "step": 2420 |
| }, |
| { |
| "epoch": 4.377256317689531, |
| "grad_norm": 1.3274819211696616, |
| "learning_rate": 1.1249097472924189e-05, |
| "loss": 0.1452, |
| "step": 2425 |
| }, |
| { |
| "epoch": 4.386281588447654, |
| "grad_norm": 1.3298646533007747, |
| "learning_rate": 1.1231046931407943e-05, |
| "loss": 0.1491, |
| "step": 2430 |
| }, |
| { |
| "epoch": 4.3953068592057765, |
| "grad_norm": 1.2549287331161112, |
| "learning_rate": 1.1212996389891697e-05, |
| "loss": 0.1489, |
| "step": 2435 |
| }, |
| { |
| "epoch": 4.404332129963899, |
| "grad_norm": 1.400198206798889, |
| "learning_rate": 1.1194945848375452e-05, |
| "loss": 0.1451, |
| "step": 2440 |
| }, |
| { |
| "epoch": 4.413357400722021, |
| "grad_norm": 1.3684946718616282, |
| "learning_rate": 1.1176895306859206e-05, |
| "loss": 0.1521, |
| "step": 2445 |
| }, |
| { |
| "epoch": 4.422382671480144, |
| "grad_norm": 1.2087420140469356, |
| "learning_rate": 1.1158844765342961e-05, |
| "loss": 0.1454, |
| "step": 2450 |
| }, |
| { |
| "epoch": 4.431407942238267, |
| "grad_norm": 1.2910589959993624, |
| "learning_rate": 1.1140794223826717e-05, |
| "loss": 0.1449, |
| "step": 2455 |
| }, |
| { |
| "epoch": 4.44043321299639, |
| "grad_norm": 1.4447075027189171, |
| "learning_rate": 1.112274368231047e-05, |
| "loss": 0.1443, |
| "step": 2460 |
| }, |
| { |
| "epoch": 4.449458483754513, |
| "grad_norm": 1.3740604349535315, |
| "learning_rate": 1.1104693140794225e-05, |
| "loss": 0.1413, |
| "step": 2465 |
| }, |
| { |
| "epoch": 4.458483754512636, |
| "grad_norm": 1.4050751545666154, |
| "learning_rate": 1.1086642599277978e-05, |
| "loss": 0.1483, |
| "step": 2470 |
| }, |
| { |
| "epoch": 4.467509025270758, |
| "grad_norm": 1.2385281470710559, |
| "learning_rate": 1.1068592057761734e-05, |
| "loss": 0.1485, |
| "step": 2475 |
| }, |
| { |
| "epoch": 4.4765342960288805, |
| "grad_norm": 1.2281174369886623, |
| "learning_rate": 1.1050541516245488e-05, |
| "loss": 0.1485, |
| "step": 2480 |
| }, |
| { |
| "epoch": 4.485559566787003, |
| "grad_norm": 1.2151464275592043, |
| "learning_rate": 1.1032490974729241e-05, |
| "loss": 0.1418, |
| "step": 2485 |
| }, |
| { |
| "epoch": 4.494584837545126, |
| "grad_norm": 1.3791171874873505, |
| "learning_rate": 1.1014440433212999e-05, |
| "loss": 0.1448, |
| "step": 2490 |
| }, |
| { |
| "epoch": 4.503610108303249, |
| "grad_norm": 1.2082141878609713, |
| "learning_rate": 1.0996389891696753e-05, |
| "loss": 0.142, |
| "step": 2495 |
| }, |
| { |
| "epoch": 4.512635379061372, |
| "grad_norm": 1.2284432308372542, |
| "learning_rate": 1.0978339350180506e-05, |
| "loss": 0.1462, |
| "step": 2500 |
| }, |
| { |
| "epoch": 4.512635379061372, |
| "eval_loss": 0.11392025649547577, |
| "eval_runtime": 768.0942, |
| "eval_samples_per_second": 17.309, |
| "eval_steps_per_second": 0.721, |
| "step": 2500 |
| }, |
| { |
| "epoch": 4.521660649819495, |
| "grad_norm": 1.230111612515777, |
| "learning_rate": 1.0960288808664262e-05, |
| "loss": 0.1425, |
| "step": 2505 |
| }, |
| { |
| "epoch": 4.530685920577618, |
| "grad_norm": 1.3673654833429962, |
| "learning_rate": 1.0942238267148016e-05, |
| "loss": 0.146, |
| "step": 2510 |
| }, |
| { |
| "epoch": 4.53971119133574, |
| "grad_norm": 1.470735178829857, |
| "learning_rate": 1.092418772563177e-05, |
| "loss": 0.1512, |
| "step": 2515 |
| }, |
| { |
| "epoch": 4.548736462093863, |
| "grad_norm": 1.431415105533255, |
| "learning_rate": 1.0906137184115523e-05, |
| "loss": 0.146, |
| "step": 2520 |
| }, |
| { |
| "epoch": 4.5577617328519855, |
| "grad_norm": 1.304958395632544, |
| "learning_rate": 1.0888086642599279e-05, |
| "loss": 0.1405, |
| "step": 2525 |
| }, |
| { |
| "epoch": 4.566787003610108, |
| "grad_norm": 1.202244774918958, |
| "learning_rate": 1.0870036101083033e-05, |
| "loss": 0.1494, |
| "step": 2530 |
| }, |
| { |
| "epoch": 4.575812274368231, |
| "grad_norm": 1.2061190438112863, |
| "learning_rate": 1.0851985559566788e-05, |
| "loss": 0.1454, |
| "step": 2535 |
| }, |
| { |
| "epoch": 4.584837545126354, |
| "grad_norm": 1.2303141902787755, |
| "learning_rate": 1.0833935018050544e-05, |
| "loss": 0.1464, |
| "step": 2540 |
| }, |
| { |
| "epoch": 4.593862815884476, |
| "grad_norm": 1.1973084706979935, |
| "learning_rate": 1.0815884476534297e-05, |
| "loss": 0.1525, |
| "step": 2545 |
| }, |
| { |
| "epoch": 4.602888086642599, |
| "grad_norm": 1.265181420660065, |
| "learning_rate": 1.0797833935018051e-05, |
| "loss": 0.1467, |
| "step": 2550 |
| }, |
| { |
| "epoch": 4.611913357400722, |
| "grad_norm": 1.3348119006510952, |
| "learning_rate": 1.0779783393501805e-05, |
| "loss": 0.1503, |
| "step": 2555 |
| }, |
| { |
| "epoch": 4.620938628158845, |
| "grad_norm": 1.3586496560954744, |
| "learning_rate": 1.076173285198556e-05, |
| "loss": 0.1478, |
| "step": 2560 |
| }, |
| { |
| "epoch": 4.629963898916968, |
| "grad_norm": 1.2646064815159468, |
| "learning_rate": 1.0743682310469314e-05, |
| "loss": 0.1506, |
| "step": 2565 |
| }, |
| { |
| "epoch": 4.6389891696750905, |
| "grad_norm": 1.2208996441560755, |
| "learning_rate": 1.0725631768953068e-05, |
| "loss": 0.1489, |
| "step": 2570 |
| }, |
| { |
| "epoch": 4.648014440433213, |
| "grad_norm": 1.2249820178968749, |
| "learning_rate": 1.0707581227436824e-05, |
| "loss": 0.1491, |
| "step": 2575 |
| }, |
| { |
| "epoch": 4.657039711191336, |
| "grad_norm": 1.2788771690689633, |
| "learning_rate": 1.068953068592058e-05, |
| "loss": 0.1468, |
| "step": 2580 |
| }, |
| { |
| "epoch": 4.666064981949458, |
| "grad_norm": 1.235614926954878, |
| "learning_rate": 1.0671480144404333e-05, |
| "loss": 0.1448, |
| "step": 2585 |
| }, |
| { |
| "epoch": 4.675090252707581, |
| "grad_norm": 1.4835623380890686, |
| "learning_rate": 1.0653429602888089e-05, |
| "loss": 0.1473, |
| "step": 2590 |
| }, |
| { |
| "epoch": 4.684115523465704, |
| "grad_norm": 1.3282902129276972, |
| "learning_rate": 1.0635379061371842e-05, |
| "loss": 0.1513, |
| "step": 2595 |
| }, |
| { |
| "epoch": 4.693140794223827, |
| "grad_norm": 1.3495078303520642, |
| "learning_rate": 1.0617328519855596e-05, |
| "loss": 0.1493, |
| "step": 2600 |
| }, |
| { |
| "epoch": 4.70216606498195, |
| "grad_norm": 1.3624724519254527, |
| "learning_rate": 1.059927797833935e-05, |
| "loss": 0.1464, |
| "step": 2605 |
| }, |
| { |
| "epoch": 4.7111913357400725, |
| "grad_norm": 1.5149872464672054, |
| "learning_rate": 1.0581227436823106e-05, |
| "loss": 0.1476, |
| "step": 2610 |
| }, |
| { |
| "epoch": 4.7202166064981945, |
| "grad_norm": 1.4140631499929084, |
| "learning_rate": 1.056317689530686e-05, |
| "loss": 0.1467, |
| "step": 2615 |
| }, |
| { |
| "epoch": 4.729241877256317, |
| "grad_norm": 1.3773886206332047, |
| "learning_rate": 1.0545126353790615e-05, |
| "loss": 0.1478, |
| "step": 2620 |
| }, |
| { |
| "epoch": 4.73826714801444, |
| "grad_norm": 1.360291667120557, |
| "learning_rate": 1.052707581227437e-05, |
| "loss": 0.1511, |
| "step": 2625 |
| }, |
| { |
| "epoch": 4.747292418772563, |
| "grad_norm": 2.0327952709868455, |
| "learning_rate": 1.0509025270758124e-05, |
| "loss": 0.1494, |
| "step": 2630 |
| }, |
| { |
| "epoch": 4.756317689530686, |
| "grad_norm": 1.1791100232489107, |
| "learning_rate": 1.0490974729241878e-05, |
| "loss": 0.1489, |
| "step": 2635 |
| }, |
| { |
| "epoch": 4.765342960288809, |
| "grad_norm": 1.3854796935196865, |
| "learning_rate": 1.0472924187725632e-05, |
| "loss": 0.1474, |
| "step": 2640 |
| }, |
| { |
| "epoch": 4.774368231046932, |
| "grad_norm": 1.195069413636359, |
| "learning_rate": 1.0454873646209387e-05, |
| "loss": 0.1473, |
| "step": 2645 |
| }, |
| { |
| "epoch": 4.783393501805055, |
| "grad_norm": 1.1422586625889126, |
| "learning_rate": 1.0436823104693141e-05, |
| "loss": 0.1492, |
| "step": 2650 |
| }, |
| { |
| "epoch": 4.792418772563177, |
| "grad_norm": 1.307147062583015, |
| "learning_rate": 1.0418772563176895e-05, |
| "loss": 0.1512, |
| "step": 2655 |
| }, |
| { |
| "epoch": 4.8014440433212995, |
| "grad_norm": 1.3584746863088188, |
| "learning_rate": 1.040072202166065e-05, |
| "loss": 0.1457, |
| "step": 2660 |
| }, |
| { |
| "epoch": 4.810469314079422, |
| "grad_norm": 1.3606216007098004, |
| "learning_rate": 1.0382671480144406e-05, |
| "loss": 0.1523, |
| "step": 2665 |
| }, |
| { |
| "epoch": 4.819494584837545, |
| "grad_norm": 1.2830676853953231, |
| "learning_rate": 1.036462093862816e-05, |
| "loss": 0.1468, |
| "step": 2670 |
| }, |
| { |
| "epoch": 4.828519855595668, |
| "grad_norm": 1.4025094885250153, |
| "learning_rate": 1.0346570397111915e-05, |
| "loss": 0.1531, |
| "step": 2675 |
| }, |
| { |
| "epoch": 4.837545126353791, |
| "grad_norm": 1.2538328025809764, |
| "learning_rate": 1.0328519855595669e-05, |
| "loss": 0.1511, |
| "step": 2680 |
| }, |
| { |
| "epoch": 4.846570397111913, |
| "grad_norm": 1.2447983687959376, |
| "learning_rate": 1.0310469314079423e-05, |
| "loss": 0.1482, |
| "step": 2685 |
| }, |
| { |
| "epoch": 4.855595667870036, |
| "grad_norm": 1.259843451978081, |
| "learning_rate": 1.0292418772563177e-05, |
| "loss": 0.1488, |
| "step": 2690 |
| }, |
| { |
| "epoch": 4.864620938628159, |
| "grad_norm": 1.2570447719051847, |
| "learning_rate": 1.0274368231046932e-05, |
| "loss": 0.1486, |
| "step": 2695 |
| }, |
| { |
| "epoch": 4.873646209386282, |
| "grad_norm": 1.329856783565704, |
| "learning_rate": 1.0256317689530686e-05, |
| "loss": 0.1529, |
| "step": 2700 |
| }, |
| { |
| "epoch": 4.882671480144404, |
| "grad_norm": 1.2638883131188237, |
| "learning_rate": 1.023826714801444e-05, |
| "loss": 0.1491, |
| "step": 2705 |
| }, |
| { |
| "epoch": 4.891696750902527, |
| "grad_norm": 1.308091101546035, |
| "learning_rate": 1.0220216606498197e-05, |
| "loss": 0.1444, |
| "step": 2710 |
| }, |
| { |
| "epoch": 4.90072202166065, |
| "grad_norm": 1.401420044812705, |
| "learning_rate": 1.0202166064981951e-05, |
| "loss": 0.1496, |
| "step": 2715 |
| }, |
| { |
| "epoch": 4.909747292418773, |
| "grad_norm": 1.1802217133046131, |
| "learning_rate": 1.0184115523465705e-05, |
| "loss": 0.1447, |
| "step": 2720 |
| }, |
| { |
| "epoch": 4.918772563176895, |
| "grad_norm": 1.418350121725291, |
| "learning_rate": 1.016606498194946e-05, |
| "loss": 0.1507, |
| "step": 2725 |
| }, |
| { |
| "epoch": 4.927797833935018, |
| "grad_norm": 1.3201116853524673, |
| "learning_rate": 1.0148014440433214e-05, |
| "loss": 0.1486, |
| "step": 2730 |
| }, |
| { |
| "epoch": 4.936823104693141, |
| "grad_norm": 1.3651415755311056, |
| "learning_rate": 1.0129963898916968e-05, |
| "loss": 0.1482, |
| "step": 2735 |
| }, |
| { |
| "epoch": 4.945848375451264, |
| "grad_norm": 1.2638304079285558, |
| "learning_rate": 1.0111913357400722e-05, |
| "loss": 0.1473, |
| "step": 2740 |
| }, |
| { |
| "epoch": 4.9548736462093865, |
| "grad_norm": 1.1922484627616543, |
| "learning_rate": 1.0093862815884477e-05, |
| "loss": 0.1515, |
| "step": 2745 |
| }, |
| { |
| "epoch": 4.963898916967509, |
| "grad_norm": 1.32172317810071, |
| "learning_rate": 1.0075812274368233e-05, |
| "loss": 0.151, |
| "step": 2750 |
| }, |
| { |
| "epoch": 4.972924187725631, |
| "grad_norm": 1.3124037260863468, |
| "learning_rate": 1.0057761732851987e-05, |
| "loss": 0.1458, |
| "step": 2755 |
| }, |
| { |
| "epoch": 4.981949458483754, |
| "grad_norm": 1.308966677769924, |
| "learning_rate": 1.0039711191335742e-05, |
| "loss": 0.1466, |
| "step": 2760 |
| }, |
| { |
| "epoch": 4.990974729241877, |
| "grad_norm": 1.3394659011449825, |
| "learning_rate": 1.0021660649819496e-05, |
| "loss": 0.1488, |
| "step": 2765 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 1.1211076532163786, |
| "learning_rate": 1.000361010830325e-05, |
| "loss": 0.1451, |
| "step": 2770 |
| }, |
| { |
| "epoch": 5.009025270758123, |
| "grad_norm": 0.8762321663139153, |
| "learning_rate": 9.985559566787004e-06, |
| "loss": 0.0838, |
| "step": 2775 |
| }, |
| { |
| "epoch": 5.018050541516246, |
| "grad_norm": 1.1947630523435986, |
| "learning_rate": 9.967509025270759e-06, |
| "loss": 0.0791, |
| "step": 2780 |
| }, |
| { |
| "epoch": 5.027075812274369, |
| "grad_norm": 1.1957279068028621, |
| "learning_rate": 9.949458483754515e-06, |
| "loss": 0.0762, |
| "step": 2785 |
| }, |
| { |
| "epoch": 5.036101083032491, |
| "grad_norm": 1.0406919344632632, |
| "learning_rate": 9.931407942238268e-06, |
| "loss": 0.0789, |
| "step": 2790 |
| }, |
| { |
| "epoch": 5.0451263537906135, |
| "grad_norm": 1.0243376437328686, |
| "learning_rate": 9.913357400722022e-06, |
| "loss": 0.0782, |
| "step": 2795 |
| }, |
| { |
| "epoch": 5.054151624548736, |
| "grad_norm": 1.1064381648099426, |
| "learning_rate": 9.895306859205776e-06, |
| "loss": 0.0777, |
| "step": 2800 |
| }, |
| { |
| "epoch": 5.063176895306859, |
| "grad_norm": 0.9669535500625326, |
| "learning_rate": 9.877256317689532e-06, |
| "loss": 0.0756, |
| "step": 2805 |
| }, |
| { |
| "epoch": 5.072202166064982, |
| "grad_norm": 1.0367193436042796, |
| "learning_rate": 9.859205776173287e-06, |
| "loss": 0.0759, |
| "step": 2810 |
| }, |
| { |
| "epoch": 5.081227436823105, |
| "grad_norm": 1.0786119261971507, |
| "learning_rate": 9.84115523465704e-06, |
| "loss": 0.0759, |
| "step": 2815 |
| }, |
| { |
| "epoch": 5.090252707581228, |
| "grad_norm": 1.000167422197771, |
| "learning_rate": 9.823104693140795e-06, |
| "loss": 0.0785, |
| "step": 2820 |
| }, |
| { |
| "epoch": 5.09927797833935, |
| "grad_norm": 0.9542524209315384, |
| "learning_rate": 9.805054151624548e-06, |
| "loss": 0.0773, |
| "step": 2825 |
| }, |
| { |
| "epoch": 5.108303249097473, |
| "grad_norm": 0.9683260308018407, |
| "learning_rate": 9.787003610108304e-06, |
| "loss": 0.0773, |
| "step": 2830 |
| }, |
| { |
| "epoch": 5.117328519855596, |
| "grad_norm": 1.0346987353569643, |
| "learning_rate": 9.768953068592058e-06, |
| "loss": 0.0748, |
| "step": 2835 |
| }, |
| { |
| "epoch": 5.126353790613718, |
| "grad_norm": 1.1793270223432109, |
| "learning_rate": 9.750902527075813e-06, |
| "loss": 0.078, |
| "step": 2840 |
| }, |
| { |
| "epoch": 5.135379061371841, |
| "grad_norm": 0.8930953181531934, |
| "learning_rate": 9.732851985559567e-06, |
| "loss": 0.0772, |
| "step": 2845 |
| }, |
| { |
| "epoch": 5.144404332129964, |
| "grad_norm": 1.023338270678483, |
| "learning_rate": 9.714801444043323e-06, |
| "loss": 0.0781, |
| "step": 2850 |
| }, |
| { |
| "epoch": 5.153429602888087, |
| "grad_norm": 0.946465899056401, |
| "learning_rate": 9.696750902527076e-06, |
| "loss": 0.0766, |
| "step": 2855 |
| }, |
| { |
| "epoch": 5.162454873646209, |
| "grad_norm": 0.9856919879257939, |
| "learning_rate": 9.67870036101083e-06, |
| "loss": 0.078, |
| "step": 2860 |
| }, |
| { |
| "epoch": 5.171480144404332, |
| "grad_norm": 0.9817835099066485, |
| "learning_rate": 9.660649819494586e-06, |
| "loss": 0.0778, |
| "step": 2865 |
| }, |
| { |
| "epoch": 5.180505415162455, |
| "grad_norm": 0.9867957619852264, |
| "learning_rate": 9.642599277978341e-06, |
| "loss": 0.0794, |
| "step": 2870 |
| }, |
| { |
| "epoch": 5.189530685920578, |
| "grad_norm": 0.9281749841484341, |
| "learning_rate": 9.624548736462095e-06, |
| "loss": 0.079, |
| "step": 2875 |
| }, |
| { |
| "epoch": 5.1985559566787005, |
| "grad_norm": 1.1418877678658672, |
| "learning_rate": 9.606498194945849e-06, |
| "loss": 0.0816, |
| "step": 2880 |
| }, |
| { |
| "epoch": 5.207581227436823, |
| "grad_norm": 1.2801357990196476, |
| "learning_rate": 9.588447653429603e-06, |
| "loss": 0.0802, |
| "step": 2885 |
| }, |
| { |
| "epoch": 5.216606498194946, |
| "grad_norm": 1.08414475669863, |
| "learning_rate": 9.570397111913358e-06, |
| "loss": 0.0826, |
| "step": 2890 |
| }, |
| { |
| "epoch": 5.225631768953068, |
| "grad_norm": 1.0307284905851641, |
| "learning_rate": 9.552346570397114e-06, |
| "loss": 0.082, |
| "step": 2895 |
| }, |
| { |
| "epoch": 5.234657039711191, |
| "grad_norm": 1.0409677907783568, |
| "learning_rate": 9.534296028880868e-06, |
| "loss": 0.0794, |
| "step": 2900 |
| }, |
| { |
| "epoch": 5.243682310469314, |
| "grad_norm": 1.0620698441964218, |
| "learning_rate": 9.516245487364621e-06, |
| "loss": 0.0805, |
| "step": 2905 |
| }, |
| { |
| "epoch": 5.252707581227437, |
| "grad_norm": 1.074425732291813, |
| "learning_rate": 9.498194945848375e-06, |
| "loss": 0.078, |
| "step": 2910 |
| }, |
| { |
| "epoch": 5.26173285198556, |
| "grad_norm": 1.026175971232318, |
| "learning_rate": 9.48014440433213e-06, |
| "loss": 0.0791, |
| "step": 2915 |
| }, |
| { |
| "epoch": 5.270758122743683, |
| "grad_norm": 1.067615156649838, |
| "learning_rate": 9.462093862815885e-06, |
| "loss": 0.0801, |
| "step": 2920 |
| }, |
| { |
| "epoch": 5.2797833935018055, |
| "grad_norm": 1.0009258120547495, |
| "learning_rate": 9.44404332129964e-06, |
| "loss": 0.0799, |
| "step": 2925 |
| }, |
| { |
| "epoch": 5.2888086642599275, |
| "grad_norm": 0.997000627917853, |
| "learning_rate": 9.425992779783394e-06, |
| "loss": 0.0801, |
| "step": 2930 |
| }, |
| { |
| "epoch": 5.29783393501805, |
| "grad_norm": 1.0650332846055963, |
| "learning_rate": 9.40794223826715e-06, |
| "loss": 0.083, |
| "step": 2935 |
| }, |
| { |
| "epoch": 5.306859205776173, |
| "grad_norm": 1.0505624675010425, |
| "learning_rate": 9.389891696750903e-06, |
| "loss": 0.0806, |
| "step": 2940 |
| }, |
| { |
| "epoch": 5.315884476534296, |
| "grad_norm": 1.0721048636152144, |
| "learning_rate": 9.371841155234657e-06, |
| "loss": 0.0835, |
| "step": 2945 |
| }, |
| { |
| "epoch": 5.324909747292419, |
| "grad_norm": 1.133446760779764, |
| "learning_rate": 9.353790613718413e-06, |
| "loss": 0.0832, |
| "step": 2950 |
| }, |
| { |
| "epoch": 5.333935018050542, |
| "grad_norm": 0.9772797348599754, |
| "learning_rate": 9.335740072202168e-06, |
| "loss": 0.0829, |
| "step": 2955 |
| }, |
| { |
| "epoch": 5.342960288808664, |
| "grad_norm": 1.1072279559914036, |
| "learning_rate": 9.317689530685922e-06, |
| "loss": 0.0849, |
| "step": 2960 |
| }, |
| { |
| "epoch": 5.351985559566787, |
| "grad_norm": 1.1628299433020886, |
| "learning_rate": 9.299638989169676e-06, |
| "loss": 0.0786, |
| "step": 2965 |
| }, |
| { |
| "epoch": 5.3610108303249095, |
| "grad_norm": 1.098783237460958, |
| "learning_rate": 9.28158844765343e-06, |
| "loss": 0.0811, |
| "step": 2970 |
| }, |
| { |
| "epoch": 5.370036101083032, |
| "grad_norm": 1.0282918449980682, |
| "learning_rate": 9.263537906137185e-06, |
| "loss": 0.0807, |
| "step": 2975 |
| }, |
| { |
| "epoch": 5.379061371841155, |
| "grad_norm": 0.9904645159012198, |
| "learning_rate": 9.24548736462094e-06, |
| "loss": 0.0826, |
| "step": 2980 |
| }, |
| { |
| "epoch": 5.388086642599278, |
| "grad_norm": 0.9548744748187918, |
| "learning_rate": 9.227436823104694e-06, |
| "loss": 0.0848, |
| "step": 2985 |
| }, |
| { |
| "epoch": 5.397111913357401, |
| "grad_norm": 0.9743172447638601, |
| "learning_rate": 9.209386281588448e-06, |
| "loss": 0.0811, |
| "step": 2990 |
| }, |
| { |
| "epoch": 5.406137184115524, |
| "grad_norm": 1.0785358862799732, |
| "learning_rate": 9.191335740072202e-06, |
| "loss": 0.0799, |
| "step": 2995 |
| }, |
| { |
| "epoch": 5.415162454873646, |
| "grad_norm": 1.1287010196459963, |
| "learning_rate": 9.173285198555957e-06, |
| "loss": 0.0848, |
| "step": 3000 |
| }, |
| { |
| "epoch": 5.424187725631769, |
| "grad_norm": 1.2004903366809976, |
| "learning_rate": 9.155234657039711e-06, |
| "loss": 0.0829, |
| "step": 3005 |
| }, |
| { |
| "epoch": 5.433212996389892, |
| "grad_norm": 1.0193224933306848, |
| "learning_rate": 9.137184115523467e-06, |
| "loss": 0.0845, |
| "step": 3010 |
| }, |
| { |
| "epoch": 5.4422382671480145, |
| "grad_norm": 1.0644266825718822, |
| "learning_rate": 9.11913357400722e-06, |
| "loss": 0.0839, |
| "step": 3015 |
| }, |
| { |
| "epoch": 5.451263537906137, |
| "grad_norm": 0.8258848911520923, |
| "learning_rate": 9.101083032490976e-06, |
| "loss": 0.0822, |
| "step": 3020 |
| }, |
| { |
| "epoch": 5.46028880866426, |
| "grad_norm": 0.9412075089089998, |
| "learning_rate": 9.08303249097473e-06, |
| "loss": 0.0796, |
| "step": 3025 |
| }, |
| { |
| "epoch": 5.469314079422382, |
| "grad_norm": 1.1565753144937303, |
| "learning_rate": 9.064981949458484e-06, |
| "loss": 0.084, |
| "step": 3030 |
| }, |
| { |
| "epoch": 5.478339350180505, |
| "grad_norm": 1.131138454580635, |
| "learning_rate": 9.04693140794224e-06, |
| "loss": 0.0838, |
| "step": 3035 |
| }, |
| { |
| "epoch": 5.487364620938628, |
| "grad_norm": 0.9492827416842319, |
| "learning_rate": 9.028880866425993e-06, |
| "loss": 0.0813, |
| "step": 3040 |
| }, |
| { |
| "epoch": 5.496389891696751, |
| "grad_norm": 0.9559475828227638, |
| "learning_rate": 9.010830324909749e-06, |
| "loss": 0.0816, |
| "step": 3045 |
| }, |
| { |
| "epoch": 5.505415162454874, |
| "grad_norm": 0.9227409169217686, |
| "learning_rate": 8.992779783393502e-06, |
| "loss": 0.0831, |
| "step": 3050 |
| }, |
| { |
| "epoch": 5.514440433212997, |
| "grad_norm": 1.0194412684846976, |
| "learning_rate": 8.974729241877256e-06, |
| "loss": 0.0832, |
| "step": 3055 |
| }, |
| { |
| "epoch": 5.5234657039711195, |
| "grad_norm": 1.0830453157290356, |
| "learning_rate": 8.956678700361012e-06, |
| "loss": 0.0834, |
| "step": 3060 |
| }, |
| { |
| "epoch": 5.532490974729242, |
| "grad_norm": 1.2132106015297117, |
| "learning_rate": 8.938628158844767e-06, |
| "loss": 0.0851, |
| "step": 3065 |
| }, |
| { |
| "epoch": 5.541516245487364, |
| "grad_norm": 1.0126228133692035, |
| "learning_rate": 8.920577617328521e-06, |
| "loss": 0.085, |
| "step": 3070 |
| }, |
| { |
| "epoch": 5.550541516245487, |
| "grad_norm": 1.2031735728430073, |
| "learning_rate": 8.902527075812275e-06, |
| "loss": 0.0822, |
| "step": 3075 |
| }, |
| { |
| "epoch": 5.55956678700361, |
| "grad_norm": 1.0701261920334157, |
| "learning_rate": 8.884476534296029e-06, |
| "loss": 0.0832, |
| "step": 3080 |
| }, |
| { |
| "epoch": 5.568592057761733, |
| "grad_norm": 1.0580816761482588, |
| "learning_rate": 8.866425992779784e-06, |
| "loss": 0.0796, |
| "step": 3085 |
| }, |
| { |
| "epoch": 5.577617328519856, |
| "grad_norm": 1.1940482533499739, |
| "learning_rate": 8.84837545126354e-06, |
| "loss": 0.0811, |
| "step": 3090 |
| }, |
| { |
| "epoch": 5.586642599277979, |
| "grad_norm": 1.165235309648367, |
| "learning_rate": 8.830324909747294e-06, |
| "loss": 0.0863, |
| "step": 3095 |
| }, |
| { |
| "epoch": 5.595667870036101, |
| "grad_norm": 0.9249917161400244, |
| "learning_rate": 8.812274368231047e-06, |
| "loss": 0.0826, |
| "step": 3100 |
| }, |
| { |
| "epoch": 5.6046931407942235, |
| "grad_norm": 0.9717526930385568, |
| "learning_rate": 8.794223826714801e-06, |
| "loss": 0.0831, |
| "step": 3105 |
| }, |
| { |
| "epoch": 5.613718411552346, |
| "grad_norm": 0.9703434417026183, |
| "learning_rate": 8.776173285198557e-06, |
| "loss": 0.0825, |
| "step": 3110 |
| }, |
| { |
| "epoch": 5.622743682310469, |
| "grad_norm": 1.1028319154775599, |
| "learning_rate": 8.75812274368231e-06, |
| "loss": 0.0836, |
| "step": 3115 |
| }, |
| { |
| "epoch": 5.631768953068592, |
| "grad_norm": 1.0334242813746233, |
| "learning_rate": 8.740072202166066e-06, |
| "loss": 0.0839, |
| "step": 3120 |
| }, |
| { |
| "epoch": 5.640794223826715, |
| "grad_norm": 0.952976186597617, |
| "learning_rate": 8.72202166064982e-06, |
| "loss": 0.0829, |
| "step": 3125 |
| }, |
| { |
| "epoch": 5.649819494584838, |
| "grad_norm": 1.1742650529657839, |
| "learning_rate": 8.703971119133575e-06, |
| "loss": 0.0851, |
| "step": 3130 |
| }, |
| { |
| "epoch": 5.658844765342961, |
| "grad_norm": 1.0818057279178166, |
| "learning_rate": 8.68592057761733e-06, |
| "loss": 0.0843, |
| "step": 3135 |
| }, |
| { |
| "epoch": 5.667870036101083, |
| "grad_norm": 1.0218551853468192, |
| "learning_rate": 8.667870036101083e-06, |
| "loss": 0.0835, |
| "step": 3140 |
| }, |
| { |
| "epoch": 5.676895306859206, |
| "grad_norm": 0.9122852593914, |
| "learning_rate": 8.649819494584839e-06, |
| "loss": 0.0823, |
| "step": 3145 |
| }, |
| { |
| "epoch": 5.6859205776173285, |
| "grad_norm": 0.9335847424506468, |
| "learning_rate": 8.631768953068594e-06, |
| "loss": 0.0816, |
| "step": 3150 |
| }, |
| { |
| "epoch": 5.694945848375451, |
| "grad_norm": 1.0829119759659693, |
| "learning_rate": 8.613718411552348e-06, |
| "loss": 0.082, |
| "step": 3155 |
| }, |
| { |
| "epoch": 5.703971119133574, |
| "grad_norm": 0.8918149119424851, |
| "learning_rate": 8.595667870036102e-06, |
| "loss": 0.0827, |
| "step": 3160 |
| }, |
| { |
| "epoch": 5.712996389891697, |
| "grad_norm": 0.9986993160612925, |
| "learning_rate": 8.577617328519855e-06, |
| "loss": 0.0821, |
| "step": 3165 |
| }, |
| { |
| "epoch": 5.722021660649819, |
| "grad_norm": 1.0234360957137028, |
| "learning_rate": 8.559566787003611e-06, |
| "loss": 0.0835, |
| "step": 3170 |
| }, |
| { |
| "epoch": 5.731046931407942, |
| "grad_norm": 1.0744486767713939, |
| "learning_rate": 8.541516245487366e-06, |
| "loss": 0.0861, |
| "step": 3175 |
| }, |
| { |
| "epoch": 5.740072202166065, |
| "grad_norm": 1.140075564988142, |
| "learning_rate": 8.52346570397112e-06, |
| "loss": 0.0869, |
| "step": 3180 |
| }, |
| { |
| "epoch": 5.749097472924188, |
| "grad_norm": 1.088662389971363, |
| "learning_rate": 8.505415162454874e-06, |
| "loss": 0.0828, |
| "step": 3185 |
| }, |
| { |
| "epoch": 5.758122743682311, |
| "grad_norm": 1.0811807303803094, |
| "learning_rate": 8.487364620938628e-06, |
| "loss": 0.0847, |
| "step": 3190 |
| }, |
| { |
| "epoch": 5.7671480144404335, |
| "grad_norm": 0.9808903181684402, |
| "learning_rate": 8.469314079422383e-06, |
| "loss": 0.0814, |
| "step": 3195 |
| }, |
| { |
| "epoch": 5.776173285198556, |
| "grad_norm": 0.9528918043651443, |
| "learning_rate": 8.451263537906137e-06, |
| "loss": 0.0842, |
| "step": 3200 |
| }, |
| { |
| "epoch": 5.785198555956678, |
| "grad_norm": 1.0415470416861194, |
| "learning_rate": 8.433212996389893e-06, |
| "loss": 0.0852, |
| "step": 3205 |
| }, |
| { |
| "epoch": 5.794223826714801, |
| "grad_norm": 0.973061380539213, |
| "learning_rate": 8.415162454873647e-06, |
| "loss": 0.0823, |
| "step": 3210 |
| }, |
| { |
| "epoch": 5.803249097472924, |
| "grad_norm": 0.9918845339236286, |
| "learning_rate": 8.397111913357402e-06, |
| "loss": 0.0855, |
| "step": 3215 |
| }, |
| { |
| "epoch": 5.812274368231047, |
| "grad_norm": 1.0771655009336334, |
| "learning_rate": 8.379061371841156e-06, |
| "loss": 0.0834, |
| "step": 3220 |
| }, |
| { |
| "epoch": 5.82129963898917, |
| "grad_norm": 1.2069079266806046, |
| "learning_rate": 8.36101083032491e-06, |
| "loss": 0.086, |
| "step": 3225 |
| }, |
| { |
| "epoch": 5.830324909747293, |
| "grad_norm": 1.1089608320239726, |
| "learning_rate": 8.342960288808665e-06, |
| "loss": 0.0853, |
| "step": 3230 |
| }, |
| { |
| "epoch": 5.8393501805054155, |
| "grad_norm": 1.0591489304507902, |
| "learning_rate": 8.324909747292419e-06, |
| "loss": 0.0851, |
| "step": 3235 |
| }, |
| { |
| "epoch": 5.8483754512635375, |
| "grad_norm": 0.9769104219070994, |
| "learning_rate": 8.306859205776175e-06, |
| "loss": 0.0854, |
| "step": 3240 |
| }, |
| { |
| "epoch": 5.85740072202166, |
| "grad_norm": 1.0564740427808548, |
| "learning_rate": 8.288808664259928e-06, |
| "loss": 0.0848, |
| "step": 3245 |
| }, |
| { |
| "epoch": 5.866425992779783, |
| "grad_norm": 1.1856181761239502, |
| "learning_rate": 8.270758122743682e-06, |
| "loss": 0.0844, |
| "step": 3250 |
| }, |
| { |
| "epoch": 5.875451263537906, |
| "grad_norm": 1.1377866343272012, |
| "learning_rate": 8.252707581227438e-06, |
| "loss": 0.083, |
| "step": 3255 |
| }, |
| { |
| "epoch": 5.884476534296029, |
| "grad_norm": 0.960741526303003, |
| "learning_rate": 8.234657039711193e-06, |
| "loss": 0.0866, |
| "step": 3260 |
| }, |
| { |
| "epoch": 5.893501805054152, |
| "grad_norm": 1.042073925207735, |
| "learning_rate": 8.216606498194947e-06, |
| "loss": 0.0823, |
| "step": 3265 |
| }, |
| { |
| "epoch": 5.902527075812275, |
| "grad_norm": 1.1046114797773166, |
| "learning_rate": 8.198555956678701e-06, |
| "loss": 0.0849, |
| "step": 3270 |
| }, |
| { |
| "epoch": 5.911552346570397, |
| "grad_norm": 0.9693673702377407, |
| "learning_rate": 8.180505415162455e-06, |
| "loss": 0.0853, |
| "step": 3275 |
| }, |
| { |
| "epoch": 5.92057761732852, |
| "grad_norm": 0.9935728695450117, |
| "learning_rate": 8.16245487364621e-06, |
| "loss": 0.0821, |
| "step": 3280 |
| }, |
| { |
| "epoch": 5.9296028880866425, |
| "grad_norm": 1.0804926319490278, |
| "learning_rate": 8.144404332129964e-06, |
| "loss": 0.0835, |
| "step": 3285 |
| }, |
| { |
| "epoch": 5.938628158844765, |
| "grad_norm": 1.064140844986384, |
| "learning_rate": 8.12635379061372e-06, |
| "loss": 0.0844, |
| "step": 3290 |
| }, |
| { |
| "epoch": 5.947653429602888, |
| "grad_norm": 1.0950234731974577, |
| "learning_rate": 8.108303249097473e-06, |
| "loss": 0.0856, |
| "step": 3295 |
| }, |
| { |
| "epoch": 5.956678700361011, |
| "grad_norm": 0.8673519333585892, |
| "learning_rate": 8.090252707581227e-06, |
| "loss": 0.083, |
| "step": 3300 |
| }, |
| { |
| "epoch": 5.965703971119133, |
| "grad_norm": 1.0637539730176666, |
| "learning_rate": 8.072202166064983e-06, |
| "loss": 0.0876, |
| "step": 3305 |
| }, |
| { |
| "epoch": 5.974729241877256, |
| "grad_norm": 1.1094983157547202, |
| "learning_rate": 8.054151624548736e-06, |
| "loss": 0.0824, |
| "step": 3310 |
| }, |
| { |
| "epoch": 5.983754512635379, |
| "grad_norm": 1.0059308649225067, |
| "learning_rate": 8.036101083032492e-06, |
| "loss": 0.083, |
| "step": 3315 |
| }, |
| { |
| "epoch": 5.992779783393502, |
| "grad_norm": 1.0366600463191211, |
| "learning_rate": 8.018050541516246e-06, |
| "loss": 0.0823, |
| "step": 3320 |
| }, |
| { |
| "epoch": 6.001805054151625, |
| "grad_norm": 0.8077933037227134, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.0794, |
| "step": 3325 |
| }, |
| { |
| "epoch": 6.0108303249097474, |
| "grad_norm": 0.6607413873878891, |
| "learning_rate": 7.981949458483755e-06, |
| "loss": 0.0592, |
| "step": 3330 |
| }, |
| { |
| "epoch": 6.01985559566787, |
| "grad_norm": 0.8591048311669046, |
| "learning_rate": 7.963898916967509e-06, |
| "loss": 0.0581, |
| "step": 3335 |
| }, |
| { |
| "epoch": 6.028880866425993, |
| "grad_norm": 0.70812448877148, |
| "learning_rate": 7.945848375451264e-06, |
| "loss": 0.0557, |
| "step": 3340 |
| }, |
| { |
| "epoch": 6.037906137184115, |
| "grad_norm": 0.7669486224905381, |
| "learning_rate": 7.92779783393502e-06, |
| "loss": 0.0556, |
| "step": 3345 |
| }, |
| { |
| "epoch": 6.046931407942238, |
| "grad_norm": 0.8323807855804514, |
| "learning_rate": 7.909747292418774e-06, |
| "loss": 0.0556, |
| "step": 3350 |
| }, |
| { |
| "epoch": 6.055956678700361, |
| "grad_norm": 0.7520520480227791, |
| "learning_rate": 7.891696750902528e-06, |
| "loss": 0.0565, |
| "step": 3355 |
| }, |
| { |
| "epoch": 6.064981949458484, |
| "grad_norm": 0.7627341484745689, |
| "learning_rate": 7.873646209386281e-06, |
| "loss": 0.0577, |
| "step": 3360 |
| }, |
| { |
| "epoch": 6.074007220216607, |
| "grad_norm": 0.6284399763085183, |
| "learning_rate": 7.855595667870037e-06, |
| "loss": 0.0559, |
| "step": 3365 |
| }, |
| { |
| "epoch": 6.0830324909747295, |
| "grad_norm": 0.6933321046620422, |
| "learning_rate": 7.83754512635379e-06, |
| "loss": 0.057, |
| "step": 3370 |
| }, |
| { |
| "epoch": 6.092057761732852, |
| "grad_norm": 0.7124947102302089, |
| "learning_rate": 7.819494584837546e-06, |
| "loss": 0.057, |
| "step": 3375 |
| }, |
| { |
| "epoch": 6.101083032490974, |
| "grad_norm": 0.6774361061625748, |
| "learning_rate": 7.8014440433213e-06, |
| "loss": 0.0558, |
| "step": 3380 |
| }, |
| { |
| "epoch": 6.110108303249097, |
| "grad_norm": 0.6796571497120569, |
| "learning_rate": 7.783393501805054e-06, |
| "loss": 0.0552, |
| "step": 3385 |
| }, |
| { |
| "epoch": 6.11913357400722, |
| "grad_norm": 0.7096655971296874, |
| "learning_rate": 7.76534296028881e-06, |
| "loss": 0.0566, |
| "step": 3390 |
| }, |
| { |
| "epoch": 6.128158844765343, |
| "grad_norm": 0.6734447095516544, |
| "learning_rate": 7.747292418772563e-06, |
| "loss": 0.0561, |
| "step": 3395 |
| }, |
| { |
| "epoch": 6.137184115523466, |
| "grad_norm": 0.7849122123015055, |
| "learning_rate": 7.729241877256319e-06, |
| "loss": 0.0571, |
| "step": 3400 |
| }, |
| { |
| "epoch": 6.146209386281589, |
| "grad_norm": 0.8010538023816386, |
| "learning_rate": 7.711191335740073e-06, |
| "loss": 0.0559, |
| "step": 3405 |
| }, |
| { |
| "epoch": 6.155234657039712, |
| "grad_norm": 0.7974825006602668, |
| "learning_rate": 7.693140794223828e-06, |
| "loss": 0.0558, |
| "step": 3410 |
| }, |
| { |
| "epoch": 6.164259927797834, |
| "grad_norm": 0.738675744252563, |
| "learning_rate": 7.675090252707582e-06, |
| "loss": 0.0589, |
| "step": 3415 |
| }, |
| { |
| "epoch": 6.1732851985559565, |
| "grad_norm": 0.7350542546764174, |
| "learning_rate": 7.657039711191336e-06, |
| "loss": 0.0579, |
| "step": 3420 |
| }, |
| { |
| "epoch": 6.182310469314079, |
| "grad_norm": 0.7619517736710059, |
| "learning_rate": 7.638989169675091e-06, |
| "loss": 0.0583, |
| "step": 3425 |
| }, |
| { |
| "epoch": 6.191335740072202, |
| "grad_norm": 0.7031956779944963, |
| "learning_rate": 7.620938628158845e-06, |
| "loss": 0.0573, |
| "step": 3430 |
| }, |
| { |
| "epoch": 6.200361010830325, |
| "grad_norm": 0.7075380742871121, |
| "learning_rate": 7.6028880866426006e-06, |
| "loss": 0.0578, |
| "step": 3435 |
| }, |
| { |
| "epoch": 6.209386281588448, |
| "grad_norm": 0.7290441002589563, |
| "learning_rate": 7.584837545126354e-06, |
| "loss": 0.0575, |
| "step": 3440 |
| }, |
| { |
| "epoch": 6.21841155234657, |
| "grad_norm": 0.7207806459527201, |
| "learning_rate": 7.566787003610109e-06, |
| "loss": 0.0567, |
| "step": 3445 |
| }, |
| { |
| "epoch": 6.227436823104693, |
| "grad_norm": 0.6314466279187362, |
| "learning_rate": 7.548736462093863e-06, |
| "loss": 0.0571, |
| "step": 3450 |
| }, |
| { |
| "epoch": 6.236462093862816, |
| "grad_norm": 0.6887561278193176, |
| "learning_rate": 7.530685920577618e-06, |
| "loss": 0.0593, |
| "step": 3455 |
| }, |
| { |
| "epoch": 6.245487364620939, |
| "grad_norm": 0.6206250853652553, |
| "learning_rate": 7.512635379061373e-06, |
| "loss": 0.0577, |
| "step": 3460 |
| }, |
| { |
| "epoch": 6.254512635379061, |
| "grad_norm": 0.6522866377346127, |
| "learning_rate": 7.494584837545127e-06, |
| "loss": 0.0566, |
| "step": 3465 |
| }, |
| { |
| "epoch": 6.263537906137184, |
| "grad_norm": 0.8267081288646022, |
| "learning_rate": 7.4765342960288815e-06, |
| "loss": 0.058, |
| "step": 3470 |
| }, |
| { |
| "epoch": 6.272563176895307, |
| "grad_norm": 0.7317740303239466, |
| "learning_rate": 7.458483754512636e-06, |
| "loss": 0.0578, |
| "step": 3475 |
| }, |
| { |
| "epoch": 6.28158844765343, |
| "grad_norm": 0.7147086385332849, |
| "learning_rate": 7.440433212996391e-06, |
| "loss": 0.0581, |
| "step": 3480 |
| }, |
| { |
| "epoch": 6.290613718411552, |
| "grad_norm": 0.8128890990175704, |
| "learning_rate": 7.422382671480145e-06, |
| "loss": 0.0582, |
| "step": 3485 |
| }, |
| { |
| "epoch": 6.299638989169675, |
| "grad_norm": 0.733774553010267, |
| "learning_rate": 7.404332129963899e-06, |
| "loss": 0.0589, |
| "step": 3490 |
| }, |
| { |
| "epoch": 6.308664259927798, |
| "grad_norm": 0.6543459576191735, |
| "learning_rate": 7.386281588447653e-06, |
| "loss": 0.0586, |
| "step": 3495 |
| }, |
| { |
| "epoch": 6.317689530685921, |
| "grad_norm": 0.7327266175801325, |
| "learning_rate": 7.368231046931409e-06, |
| "loss": 0.0568, |
| "step": 3500 |
| }, |
| { |
| "epoch": 6.3267148014440435, |
| "grad_norm": 0.6332768442841004, |
| "learning_rate": 7.350180505415163e-06, |
| "loss": 0.0588, |
| "step": 3505 |
| }, |
| { |
| "epoch": 6.335740072202166, |
| "grad_norm": 0.7514342787787303, |
| "learning_rate": 7.332129963898917e-06, |
| "loss": 0.0561, |
| "step": 3510 |
| }, |
| { |
| "epoch": 6.344765342960288, |
| "grad_norm": 0.9126804192240373, |
| "learning_rate": 7.314079422382672e-06, |
| "loss": 0.06, |
| "step": 3515 |
| }, |
| { |
| "epoch": 6.353790613718411, |
| "grad_norm": 0.7488940592798972, |
| "learning_rate": 7.296028880866427e-06, |
| "loss": 0.0589, |
| "step": 3520 |
| }, |
| { |
| "epoch": 6.362815884476534, |
| "grad_norm": 0.7822412626745566, |
| "learning_rate": 7.277978339350181e-06, |
| "loss": 0.057, |
| "step": 3525 |
| }, |
| { |
| "epoch": 6.371841155234657, |
| "grad_norm": 0.641895512790488, |
| "learning_rate": 7.259927797833936e-06, |
| "loss": 0.0566, |
| "step": 3530 |
| }, |
| { |
| "epoch": 6.38086642599278, |
| "grad_norm": 0.610560993405513, |
| "learning_rate": 7.24187725631769e-06, |
| "loss": 0.0568, |
| "step": 3535 |
| }, |
| { |
| "epoch": 6.389891696750903, |
| "grad_norm": 0.6706724032290002, |
| "learning_rate": 7.223826714801445e-06, |
| "loss": 0.0606, |
| "step": 3540 |
| }, |
| { |
| "epoch": 6.398916967509026, |
| "grad_norm": 0.658753045191665, |
| "learning_rate": 7.2057761732852e-06, |
| "loss": 0.0593, |
| "step": 3545 |
| }, |
| { |
| "epoch": 6.4079422382671485, |
| "grad_norm": 0.6654660097704281, |
| "learning_rate": 7.187725631768954e-06, |
| "loss": 0.0585, |
| "step": 3550 |
| }, |
| { |
| "epoch": 6.4169675090252705, |
| "grad_norm": 0.7183651068540791, |
| "learning_rate": 7.169675090252708e-06, |
| "loss": 0.0583, |
| "step": 3555 |
| }, |
| { |
| "epoch": 6.425992779783393, |
| "grad_norm": 0.8088470667398641, |
| "learning_rate": 7.151624548736462e-06, |
| "loss": 0.0588, |
| "step": 3560 |
| }, |
| { |
| "epoch": 6.435018050541516, |
| "grad_norm": 0.8263381826886101, |
| "learning_rate": 7.133574007220218e-06, |
| "loss": 0.0593, |
| "step": 3565 |
| }, |
| { |
| "epoch": 6.444043321299639, |
| "grad_norm": 0.7405826430937665, |
| "learning_rate": 7.115523465703971e-06, |
| "loss": 0.0578, |
| "step": 3570 |
| }, |
| { |
| "epoch": 6.453068592057762, |
| "grad_norm": 0.7185281820450516, |
| "learning_rate": 7.097472924187726e-06, |
| "loss": 0.0579, |
| "step": 3575 |
| }, |
| { |
| "epoch": 6.462093862815885, |
| "grad_norm": 0.7382936760799543, |
| "learning_rate": 7.07942238267148e-06, |
| "loss": 0.0575, |
| "step": 3580 |
| }, |
| { |
| "epoch": 6.471119133574007, |
| "grad_norm": 0.6877004421197578, |
| "learning_rate": 7.061371841155235e-06, |
| "loss": 0.0573, |
| "step": 3585 |
| }, |
| { |
| "epoch": 6.48014440433213, |
| "grad_norm": 0.6861100161163733, |
| "learning_rate": 7.04332129963899e-06, |
| "loss": 0.0599, |
| "step": 3590 |
| }, |
| { |
| "epoch": 6.4891696750902526, |
| "grad_norm": 0.6794004024109824, |
| "learning_rate": 7.025270758122744e-06, |
| "loss": 0.059, |
| "step": 3595 |
| }, |
| { |
| "epoch": 6.498194945848375, |
| "grad_norm": 0.8104554628255708, |
| "learning_rate": 7.0072202166064985e-06, |
| "loss": 0.0587, |
| "step": 3600 |
| }, |
| { |
| "epoch": 6.507220216606498, |
| "grad_norm": 0.8305032099557856, |
| "learning_rate": 6.989169675090254e-06, |
| "loss": 0.0585, |
| "step": 3605 |
| }, |
| { |
| "epoch": 6.516245487364621, |
| "grad_norm": 0.8023646626140253, |
| "learning_rate": 6.971119133574008e-06, |
| "loss": 0.0595, |
| "step": 3610 |
| }, |
| { |
| "epoch": 6.525270758122744, |
| "grad_norm": 0.765073355799892, |
| "learning_rate": 6.9530685920577625e-06, |
| "loss": 0.0584, |
| "step": 3615 |
| }, |
| { |
| "epoch": 6.534296028880867, |
| "grad_norm": 0.7404552154431031, |
| "learning_rate": 6.935018050541516e-06, |
| "loss": 0.0577, |
| "step": 3620 |
| }, |
| { |
| "epoch": 6.543321299638989, |
| "grad_norm": 0.7405781286739906, |
| "learning_rate": 6.916967509025271e-06, |
| "loss": 0.0574, |
| "step": 3625 |
| }, |
| { |
| "epoch": 6.552346570397112, |
| "grad_norm": 0.7793217854667968, |
| "learning_rate": 6.8989169675090265e-06, |
| "loss": 0.0605, |
| "step": 3630 |
| }, |
| { |
| "epoch": 6.561371841155235, |
| "grad_norm": 0.5845887643829276, |
| "learning_rate": 6.88086642599278e-06, |
| "loss": 0.0599, |
| "step": 3635 |
| }, |
| { |
| "epoch": 6.5703971119133575, |
| "grad_norm": 0.7145137796672513, |
| "learning_rate": 6.862815884476535e-06, |
| "loss": 0.0585, |
| "step": 3640 |
| }, |
| { |
| "epoch": 6.57942238267148, |
| "grad_norm": 0.7417399070524405, |
| "learning_rate": 6.844765342960289e-06, |
| "loss": 0.0587, |
| "step": 3645 |
| }, |
| { |
| "epoch": 6.588447653429603, |
| "grad_norm": 0.6846077356552561, |
| "learning_rate": 6.826714801444044e-06, |
| "loss": 0.0588, |
| "step": 3650 |
| }, |
| { |
| "epoch": 6.597472924187725, |
| "grad_norm": 0.6999561458466752, |
| "learning_rate": 6.808664259927798e-06, |
| "loss": 0.0595, |
| "step": 3655 |
| }, |
| { |
| "epoch": 6.606498194945848, |
| "grad_norm": 0.6567498870286701, |
| "learning_rate": 6.790613718411553e-06, |
| "loss": 0.0581, |
| "step": 3660 |
| }, |
| { |
| "epoch": 6.615523465703971, |
| "grad_norm": 0.7350564747052517, |
| "learning_rate": 6.7725631768953075e-06, |
| "loss": 0.0596, |
| "step": 3665 |
| }, |
| { |
| "epoch": 6.624548736462094, |
| "grad_norm": 0.6574574010183412, |
| "learning_rate": 6.754512635379062e-06, |
| "loss": 0.0584, |
| "step": 3670 |
| }, |
| { |
| "epoch": 6.633574007220217, |
| "grad_norm": 0.754103084530502, |
| "learning_rate": 6.736462093862817e-06, |
| "loss": 0.0597, |
| "step": 3675 |
| }, |
| { |
| "epoch": 6.64259927797834, |
| "grad_norm": 0.6204823250912029, |
| "learning_rate": 6.718411552346571e-06, |
| "loss": 0.0594, |
| "step": 3680 |
| }, |
| { |
| "epoch": 6.6516245487364625, |
| "grad_norm": 0.6526250315655968, |
| "learning_rate": 6.700361010830325e-06, |
| "loss": 0.0586, |
| "step": 3685 |
| }, |
| { |
| "epoch": 6.6606498194945845, |
| "grad_norm": 0.6759108282695475, |
| "learning_rate": 6.682310469314079e-06, |
| "loss": 0.0598, |
| "step": 3690 |
| }, |
| { |
| "epoch": 6.669675090252707, |
| "grad_norm": 0.6281015940025027, |
| "learning_rate": 6.664259927797835e-06, |
| "loss": 0.0583, |
| "step": 3695 |
| }, |
| { |
| "epoch": 6.67870036101083, |
| "grad_norm": 0.8112328410048235, |
| "learning_rate": 6.646209386281589e-06, |
| "loss": 0.0581, |
| "step": 3700 |
| }, |
| { |
| "epoch": 6.687725631768953, |
| "grad_norm": 0.8294177729044503, |
| "learning_rate": 6.628158844765343e-06, |
| "loss": 0.0588, |
| "step": 3705 |
| }, |
| { |
| "epoch": 6.696750902527076, |
| "grad_norm": 0.6837556241925122, |
| "learning_rate": 6.610108303249098e-06, |
| "loss": 0.0584, |
| "step": 3710 |
| }, |
| { |
| "epoch": 6.705776173285199, |
| "grad_norm": 0.6977698116292954, |
| "learning_rate": 6.592057761732853e-06, |
| "loss": 0.0607, |
| "step": 3715 |
| }, |
| { |
| "epoch": 6.714801444043322, |
| "grad_norm": 0.8094312645469506, |
| "learning_rate": 6.574007220216607e-06, |
| "loss": 0.0583, |
| "step": 3720 |
| }, |
| { |
| "epoch": 6.723826714801444, |
| "grad_norm": 0.6015350505674695, |
| "learning_rate": 6.555956678700362e-06, |
| "loss": 0.0584, |
| "step": 3725 |
| }, |
| { |
| "epoch": 6.7328519855595665, |
| "grad_norm": 0.7144030665488617, |
| "learning_rate": 6.5379061371841156e-06, |
| "loss": 0.0586, |
| "step": 3730 |
| }, |
| { |
| "epoch": 6.741877256317689, |
| "grad_norm": 0.7376393032962922, |
| "learning_rate": 6.519855595667871e-06, |
| "loss": 0.0591, |
| "step": 3735 |
| }, |
| { |
| "epoch": 6.750902527075812, |
| "grad_norm": 0.8824089248862236, |
| "learning_rate": 6.501805054151626e-06, |
| "loss": 0.0613, |
| "step": 3740 |
| }, |
| { |
| "epoch": 6.759927797833935, |
| "grad_norm": 0.6572712036921975, |
| "learning_rate": 6.4837545126353796e-06, |
| "loss": 0.0584, |
| "step": 3745 |
| }, |
| { |
| "epoch": 6.768953068592058, |
| "grad_norm": 0.6525445173798352, |
| "learning_rate": 6.465703971119134e-06, |
| "loss": 0.0599, |
| "step": 3750 |
| }, |
| { |
| "epoch": 6.777978339350181, |
| "grad_norm": 0.7744924607467405, |
| "learning_rate": 6.447653429602888e-06, |
| "loss": 0.0603, |
| "step": 3755 |
| }, |
| { |
| "epoch": 6.787003610108303, |
| "grad_norm": 0.7489114246161626, |
| "learning_rate": 6.4296028880866436e-06, |
| "loss": 0.0593, |
| "step": 3760 |
| }, |
| { |
| "epoch": 6.796028880866426, |
| "grad_norm": 0.645441657942143, |
| "learning_rate": 6.411552346570397e-06, |
| "loss": 0.0601, |
| "step": 3765 |
| }, |
| { |
| "epoch": 6.805054151624549, |
| "grad_norm": 0.7628853557562308, |
| "learning_rate": 6.393501805054152e-06, |
| "loss": 0.059, |
| "step": 3770 |
| }, |
| { |
| "epoch": 6.8140794223826715, |
| "grad_norm": 0.6961794188939842, |
| "learning_rate": 6.375451263537906e-06, |
| "loss": 0.0589, |
| "step": 3775 |
| }, |
| { |
| "epoch": 6.823104693140794, |
| "grad_norm": 0.732051818736521, |
| "learning_rate": 6.357400722021661e-06, |
| "loss": 0.059, |
| "step": 3780 |
| }, |
| { |
| "epoch": 6.832129963898917, |
| "grad_norm": 0.6559311687860191, |
| "learning_rate": 6.339350180505416e-06, |
| "loss": 0.0583, |
| "step": 3785 |
| }, |
| { |
| "epoch": 6.841155234657039, |
| "grad_norm": 0.6593139916118618, |
| "learning_rate": 6.32129963898917e-06, |
| "loss": 0.0584, |
| "step": 3790 |
| }, |
| { |
| "epoch": 6.850180505415162, |
| "grad_norm": 0.6976834804145813, |
| "learning_rate": 6.3032490974729245e-06, |
| "loss": 0.0592, |
| "step": 3795 |
| }, |
| { |
| "epoch": 6.859205776173285, |
| "grad_norm": 0.9247025540887917, |
| "learning_rate": 6.28519855595668e-06, |
| "loss": 0.0579, |
| "step": 3800 |
| }, |
| { |
| "epoch": 6.868231046931408, |
| "grad_norm": 0.8326401902247998, |
| "learning_rate": 6.267148014440434e-06, |
| "loss": 0.0604, |
| "step": 3805 |
| }, |
| { |
| "epoch": 6.877256317689531, |
| "grad_norm": 0.5853865156076931, |
| "learning_rate": 6.2490974729241885e-06, |
| "loss": 0.0594, |
| "step": 3810 |
| }, |
| { |
| "epoch": 6.886281588447654, |
| "grad_norm": 0.6292032096557062, |
| "learning_rate": 6.231046931407942e-06, |
| "loss": 0.0573, |
| "step": 3815 |
| }, |
| { |
| "epoch": 6.8953068592057765, |
| "grad_norm": 0.7744076548642933, |
| "learning_rate": 6.212996389891697e-06, |
| "loss": 0.0607, |
| "step": 3820 |
| }, |
| { |
| "epoch": 6.904332129963899, |
| "grad_norm": 0.7914463992742894, |
| "learning_rate": 6.1949458483754525e-06, |
| "loss": 0.0606, |
| "step": 3825 |
| }, |
| { |
| "epoch": 6.913357400722021, |
| "grad_norm": 0.701945083370995, |
| "learning_rate": 6.176895306859206e-06, |
| "loss": 0.0587, |
| "step": 3830 |
| }, |
| { |
| "epoch": 6.922382671480144, |
| "grad_norm": 0.6696484276570565, |
| "learning_rate": 6.158844765342961e-06, |
| "loss": 0.0608, |
| "step": 3835 |
| }, |
| { |
| "epoch": 6.931407942238267, |
| "grad_norm": 0.7132432488001278, |
| "learning_rate": 6.140794223826715e-06, |
| "loss": 0.0576, |
| "step": 3840 |
| }, |
| { |
| "epoch": 6.94043321299639, |
| "grad_norm": 0.6745043956786805, |
| "learning_rate": 6.12274368231047e-06, |
| "loss": 0.0597, |
| "step": 3845 |
| }, |
| { |
| "epoch": 6.949458483754513, |
| "grad_norm": 0.7735109912206191, |
| "learning_rate": 6.104693140794224e-06, |
| "loss": 0.058, |
| "step": 3850 |
| }, |
| { |
| "epoch": 6.958483754512636, |
| "grad_norm": 0.8204235477419153, |
| "learning_rate": 6.086642599277979e-06, |
| "loss": 0.0611, |
| "step": 3855 |
| }, |
| { |
| "epoch": 6.967509025270758, |
| "grad_norm": 0.6940007350939205, |
| "learning_rate": 6.068592057761733e-06, |
| "loss": 0.0572, |
| "step": 3860 |
| }, |
| { |
| "epoch": 6.9765342960288805, |
| "grad_norm": 0.7293292682342528, |
| "learning_rate": 6.050541516245488e-06, |
| "loss": 0.0595, |
| "step": 3865 |
| }, |
| { |
| "epoch": 6.985559566787003, |
| "grad_norm": 0.7122810615357342, |
| "learning_rate": 6.032490974729243e-06, |
| "loss": 0.0581, |
| "step": 3870 |
| }, |
| { |
| "epoch": 6.994584837545126, |
| "grad_norm": 0.6537994119990527, |
| "learning_rate": 6.014440433212997e-06, |
| "loss": 0.0598, |
| "step": 3875 |
| }, |
| { |
| "epoch": 7.003610108303249, |
| "grad_norm": 0.45575554548627367, |
| "learning_rate": 5.996389891696751e-06, |
| "loss": 0.0554, |
| "step": 3880 |
| }, |
| { |
| "epoch": 7.012635379061372, |
| "grad_norm": 0.5070177525024102, |
| "learning_rate": 5.978339350180505e-06, |
| "loss": 0.0472, |
| "step": 3885 |
| }, |
| { |
| "epoch": 7.021660649819495, |
| "grad_norm": 0.5588320341624766, |
| "learning_rate": 5.960288808664261e-06, |
| "loss": 0.0476, |
| "step": 3890 |
| }, |
| { |
| "epoch": 7.030685920577618, |
| "grad_norm": 0.4730456078169544, |
| "learning_rate": 5.942238267148015e-06, |
| "loss": 0.0465, |
| "step": 3895 |
| }, |
| { |
| "epoch": 7.03971119133574, |
| "grad_norm": 0.4802501819942531, |
| "learning_rate": 5.924187725631769e-06, |
| "loss": 0.0467, |
| "step": 3900 |
| }, |
| { |
| "epoch": 7.048736462093863, |
| "grad_norm": 0.43376711913832905, |
| "learning_rate": 5.906137184115524e-06, |
| "loss": 0.046, |
| "step": 3905 |
| }, |
| { |
| "epoch": 7.0577617328519855, |
| "grad_norm": 0.5467463303002804, |
| "learning_rate": 5.888086642599279e-06, |
| "loss": 0.0472, |
| "step": 3910 |
| }, |
| { |
| "epoch": 7.066787003610108, |
| "grad_norm": 0.5482465563214539, |
| "learning_rate": 5.870036101083033e-06, |
| "loss": 0.0484, |
| "step": 3915 |
| }, |
| { |
| "epoch": 7.075812274368231, |
| "grad_norm": 0.47912292874862494, |
| "learning_rate": 5.851985559566788e-06, |
| "loss": 0.0464, |
| "step": 3920 |
| }, |
| { |
| "epoch": 7.084837545126354, |
| "grad_norm": 0.5614426575228361, |
| "learning_rate": 5.8339350180505415e-06, |
| "loss": 0.0473, |
| "step": 3925 |
| }, |
| { |
| "epoch": 7.093862815884476, |
| "grad_norm": 0.45529148018104104, |
| "learning_rate": 5.815884476534297e-06, |
| "loss": 0.0475, |
| "step": 3930 |
| }, |
| { |
| "epoch": 7.102888086642599, |
| "grad_norm": 0.4788344545919543, |
| "learning_rate": 5.797833935018051e-06, |
| "loss": 0.0479, |
| "step": 3935 |
| }, |
| { |
| "epoch": 7.111913357400722, |
| "grad_norm": 0.46173140924255596, |
| "learning_rate": 5.7797833935018055e-06, |
| "loss": 0.0473, |
| "step": 3940 |
| }, |
| { |
| "epoch": 7.120938628158845, |
| "grad_norm": 0.5390906257209109, |
| "learning_rate": 5.761732851985559e-06, |
| "loss": 0.0485, |
| "step": 3945 |
| }, |
| { |
| "epoch": 7.129963898916968, |
| "grad_norm": 0.5227509249629044, |
| "learning_rate": 5.743682310469314e-06, |
| "loss": 0.0467, |
| "step": 3950 |
| }, |
| { |
| "epoch": 7.1389891696750905, |
| "grad_norm": 0.48947576478003363, |
| "learning_rate": 5.7256317689530695e-06, |
| "loss": 0.0472, |
| "step": 3955 |
| }, |
| { |
| "epoch": 7.148014440433213, |
| "grad_norm": 0.49244408024963654, |
| "learning_rate": 5.707581227436823e-06, |
| "loss": 0.0483, |
| "step": 3960 |
| }, |
| { |
| "epoch": 7.157039711191336, |
| "grad_norm": 0.5222815328881174, |
| "learning_rate": 5.689530685920578e-06, |
| "loss": 0.048, |
| "step": 3965 |
| }, |
| { |
| "epoch": 7.166064981949458, |
| "grad_norm": 0.5817714524678111, |
| "learning_rate": 5.671480144404332e-06, |
| "loss": 0.0473, |
| "step": 3970 |
| }, |
| { |
| "epoch": 7.175090252707581, |
| "grad_norm": 0.5925655806700871, |
| "learning_rate": 5.653429602888087e-06, |
| "loss": 0.0497, |
| "step": 3975 |
| }, |
| { |
| "epoch": 7.184115523465704, |
| "grad_norm": 0.49153223210393315, |
| "learning_rate": 5.635379061371842e-06, |
| "loss": 0.0488, |
| "step": 3980 |
| }, |
| { |
| "epoch": 7.193140794223827, |
| "grad_norm": 0.47623000392936515, |
| "learning_rate": 5.617328519855596e-06, |
| "loss": 0.0483, |
| "step": 3985 |
| }, |
| { |
| "epoch": 7.20216606498195, |
| "grad_norm": 0.5429876236829767, |
| "learning_rate": 5.5992779783393505e-06, |
| "loss": 0.0475, |
| "step": 3990 |
| }, |
| { |
| "epoch": 7.2111913357400725, |
| "grad_norm": 0.5392076103305925, |
| "learning_rate": 5.581227436823106e-06, |
| "loss": 0.0473, |
| "step": 3995 |
| }, |
| { |
| "epoch": 7.2202166064981945, |
| "grad_norm": 0.4442145984166065, |
| "learning_rate": 5.56317689530686e-06, |
| "loss": 0.0477, |
| "step": 4000 |
| }, |
| { |
| "epoch": 7.229241877256317, |
| "grad_norm": 0.4474040313759183, |
| "learning_rate": 5.5451263537906145e-06, |
| "loss": 0.0478, |
| "step": 4005 |
| }, |
| { |
| "epoch": 7.23826714801444, |
| "grad_norm": 0.49719158373571726, |
| "learning_rate": 5.527075812274368e-06, |
| "loss": 0.0471, |
| "step": 4010 |
| }, |
| { |
| "epoch": 7.247292418772563, |
| "grad_norm": 0.5110277695420385, |
| "learning_rate": 5.509025270758123e-06, |
| "loss": 0.0472, |
| "step": 4015 |
| }, |
| { |
| "epoch": 7.256317689530686, |
| "grad_norm": 0.551234875347394, |
| "learning_rate": 5.490974729241878e-06, |
| "loss": 0.0482, |
| "step": 4020 |
| }, |
| { |
| "epoch": 7.265342960288809, |
| "grad_norm": 0.5164595334266263, |
| "learning_rate": 5.472924187725632e-06, |
| "loss": 0.0483, |
| "step": 4025 |
| }, |
| { |
| "epoch": 7.274368231046932, |
| "grad_norm": 0.5120155623092252, |
| "learning_rate": 5.454873646209387e-06, |
| "loss": 0.0473, |
| "step": 4030 |
| }, |
| { |
| "epoch": 7.283393501805054, |
| "grad_norm": 0.7138533885415025, |
| "learning_rate": 5.436823104693141e-06, |
| "loss": 0.0491, |
| "step": 4035 |
| }, |
| { |
| "epoch": 7.292418772563177, |
| "grad_norm": 0.5256186511567283, |
| "learning_rate": 5.418772563176896e-06, |
| "loss": 0.0482, |
| "step": 4040 |
| }, |
| { |
| "epoch": 7.3014440433212995, |
| "grad_norm": 0.5308368446047028, |
| "learning_rate": 5.40072202166065e-06, |
| "loss": 0.0483, |
| "step": 4045 |
| }, |
| { |
| "epoch": 7.310469314079422, |
| "grad_norm": 0.6340076056397649, |
| "learning_rate": 5.382671480144405e-06, |
| "loss": 0.0497, |
| "step": 4050 |
| }, |
| { |
| "epoch": 7.319494584837545, |
| "grad_norm": 0.5515989056962669, |
| "learning_rate": 5.3646209386281586e-06, |
| "loss": 0.0485, |
| "step": 4055 |
| }, |
| { |
| "epoch": 7.328519855595668, |
| "grad_norm": 0.5670979301767023, |
| "learning_rate": 5.346570397111914e-06, |
| "loss": 0.049, |
| "step": 4060 |
| }, |
| { |
| "epoch": 7.337545126353791, |
| "grad_norm": 0.5155332846058096, |
| "learning_rate": 5.328519855595669e-06, |
| "loss": 0.0491, |
| "step": 4065 |
| }, |
| { |
| "epoch": 7.346570397111913, |
| "grad_norm": 0.4824925439743894, |
| "learning_rate": 5.3104693140794226e-06, |
| "loss": 0.0478, |
| "step": 4070 |
| }, |
| { |
| "epoch": 7.355595667870036, |
| "grad_norm": 0.5632294248317786, |
| "learning_rate": 5.292418772563177e-06, |
| "loss": 0.0487, |
| "step": 4075 |
| }, |
| { |
| "epoch": 7.364620938628159, |
| "grad_norm": 0.5912408794489219, |
| "learning_rate": 5.274368231046931e-06, |
| "loss": 0.0494, |
| "step": 4080 |
| }, |
| { |
| "epoch": 7.373646209386282, |
| "grad_norm": 0.6610618876210643, |
| "learning_rate": 5.2563176895306866e-06, |
| "loss": 0.0484, |
| "step": 4085 |
| }, |
| { |
| "epoch": 7.382671480144404, |
| "grad_norm": 0.5715318430573592, |
| "learning_rate": 5.238267148014441e-06, |
| "loss": 0.0484, |
| "step": 4090 |
| }, |
| { |
| "epoch": 7.391696750902527, |
| "grad_norm": 0.5661862605906709, |
| "learning_rate": 5.220216606498195e-06, |
| "loss": 0.0486, |
| "step": 4095 |
| }, |
| { |
| "epoch": 7.40072202166065, |
| "grad_norm": 0.9029651147919706, |
| "learning_rate": 5.20216606498195e-06, |
| "loss": 0.0508, |
| "step": 4100 |
| }, |
| { |
| "epoch": 7.409747292418772, |
| "grad_norm": 0.5165409614311138, |
| "learning_rate": 5.184115523465705e-06, |
| "loss": 0.0495, |
| "step": 4105 |
| }, |
| { |
| "epoch": 7.418772563176895, |
| "grad_norm": 0.5306583449154315, |
| "learning_rate": 5.166064981949459e-06, |
| "loss": 0.0491, |
| "step": 4110 |
| }, |
| { |
| "epoch": 7.427797833935018, |
| "grad_norm": 0.450162478445713, |
| "learning_rate": 5.148014440433214e-06, |
| "loss": 0.0481, |
| "step": 4115 |
| }, |
| { |
| "epoch": 7.436823104693141, |
| "grad_norm": 0.5832168434645013, |
| "learning_rate": 5.1299638989169675e-06, |
| "loss": 0.0484, |
| "step": 4120 |
| }, |
| { |
| "epoch": 7.445848375451264, |
| "grad_norm": 0.6015879512932737, |
| "learning_rate": 5.111913357400723e-06, |
| "loss": 0.0483, |
| "step": 4125 |
| }, |
| { |
| "epoch": 7.4548736462093865, |
| "grad_norm": 0.516158408533585, |
| "learning_rate": 5.093862815884477e-06, |
| "loss": 0.0502, |
| "step": 4130 |
| }, |
| { |
| "epoch": 7.463898916967509, |
| "grad_norm": 0.5739166956674056, |
| "learning_rate": 5.0758122743682315e-06, |
| "loss": 0.049, |
| "step": 4135 |
| }, |
| { |
| "epoch": 7.472924187725631, |
| "grad_norm": 0.5545335828483756, |
| "learning_rate": 5.057761732851985e-06, |
| "loss": 0.0493, |
| "step": 4140 |
| }, |
| { |
| "epoch": 7.481949458483754, |
| "grad_norm": 0.47988775413231505, |
| "learning_rate": 5.03971119133574e-06, |
| "loss": 0.0481, |
| "step": 4145 |
| }, |
| { |
| "epoch": 7.490974729241877, |
| "grad_norm": 0.4135154941397547, |
| "learning_rate": 5.0216606498194955e-06, |
| "loss": 0.0485, |
| "step": 4150 |
| }, |
| { |
| "epoch": 7.5, |
| "grad_norm": 0.584778293406958, |
| "learning_rate": 5.003610108303249e-06, |
| "loss": 0.0496, |
| "step": 4155 |
| }, |
| { |
| "epoch": 7.509025270758123, |
| "grad_norm": 0.6596870903880235, |
| "learning_rate": 4.985559566787004e-06, |
| "loss": 0.0509, |
| "step": 4160 |
| }, |
| { |
| "epoch": 7.518050541516246, |
| "grad_norm": 0.5258493182778312, |
| "learning_rate": 4.967509025270759e-06, |
| "loss": 0.0479, |
| "step": 4165 |
| }, |
| { |
| "epoch": 7.527075812274369, |
| "grad_norm": 0.5240346323436935, |
| "learning_rate": 4.949458483754513e-06, |
| "loss": 0.0486, |
| "step": 4170 |
| }, |
| { |
| "epoch": 7.536101083032491, |
| "grad_norm": 0.5698373146810196, |
| "learning_rate": 4.931407942238268e-06, |
| "loss": 0.0498, |
| "step": 4175 |
| }, |
| { |
| "epoch": 7.5451263537906135, |
| "grad_norm": 0.5168274629904006, |
| "learning_rate": 4.913357400722022e-06, |
| "loss": 0.0484, |
| "step": 4180 |
| }, |
| { |
| "epoch": 7.554151624548736, |
| "grad_norm": 0.4967245847978526, |
| "learning_rate": 4.8953068592057764e-06, |
| "loss": 0.0494, |
| "step": 4185 |
| }, |
| { |
| "epoch": 7.563176895306859, |
| "grad_norm": 0.5296559595656957, |
| "learning_rate": 4.877256317689531e-06, |
| "loss": 0.0487, |
| "step": 4190 |
| }, |
| { |
| "epoch": 7.572202166064982, |
| "grad_norm": 0.4681797361268003, |
| "learning_rate": 4.859205776173286e-06, |
| "loss": 0.0492, |
| "step": 4195 |
| }, |
| { |
| "epoch": 7.581227436823105, |
| "grad_norm": 0.5837080132512812, |
| "learning_rate": 4.8411552346570404e-06, |
| "loss": 0.0505, |
| "step": 4200 |
| }, |
| { |
| "epoch": 7.590252707581227, |
| "grad_norm": 0.6465877829891032, |
| "learning_rate": 4.823104693140795e-06, |
| "loss": 0.0491, |
| "step": 4205 |
| }, |
| { |
| "epoch": 7.59927797833935, |
| "grad_norm": 0.5542060145314779, |
| "learning_rate": 4.805054151624549e-06, |
| "loss": 0.0502, |
| "step": 4210 |
| }, |
| { |
| "epoch": 7.608303249097473, |
| "grad_norm": 0.544169659697146, |
| "learning_rate": 4.787003610108304e-06, |
| "loss": 0.049, |
| "step": 4215 |
| }, |
| { |
| "epoch": 7.617328519855596, |
| "grad_norm": 0.4766240683764691, |
| "learning_rate": 4.768953068592058e-06, |
| "loss": 0.0488, |
| "step": 4220 |
| }, |
| { |
| "epoch": 7.626353790613718, |
| "grad_norm": 0.5857310047242031, |
| "learning_rate": 4.750902527075812e-06, |
| "loss": 0.0484, |
| "step": 4225 |
| }, |
| { |
| "epoch": 7.635379061371841, |
| "grad_norm": 0.6584297092461749, |
| "learning_rate": 4.7328519855595676e-06, |
| "loss": 0.0504, |
| "step": 4230 |
| }, |
| { |
| "epoch": 7.644404332129964, |
| "grad_norm": 0.6165622003595669, |
| "learning_rate": 4.714801444043321e-06, |
| "loss": 0.0497, |
| "step": 4235 |
| }, |
| { |
| "epoch": 7.653429602888087, |
| "grad_norm": 0.5411935348913237, |
| "learning_rate": 4.696750902527076e-06, |
| "loss": 0.0493, |
| "step": 4240 |
| }, |
| { |
| "epoch": 7.662454873646209, |
| "grad_norm": 0.5757635354988099, |
| "learning_rate": 4.678700361010831e-06, |
| "loss": 0.0487, |
| "step": 4245 |
| }, |
| { |
| "epoch": 7.671480144404332, |
| "grad_norm": 0.4789345734543781, |
| "learning_rate": 4.660649819494585e-06, |
| "loss": 0.0493, |
| "step": 4250 |
| }, |
| { |
| "epoch": 7.680505415162455, |
| "grad_norm": 0.5729021031172669, |
| "learning_rate": 4.64259927797834e-06, |
| "loss": 0.0494, |
| "step": 4255 |
| }, |
| { |
| "epoch": 7.689530685920578, |
| "grad_norm": 0.5131076676913804, |
| "learning_rate": 4.624548736462095e-06, |
| "loss": 0.0486, |
| "step": 4260 |
| }, |
| { |
| "epoch": 7.6985559566787005, |
| "grad_norm": 0.5950116178035108, |
| "learning_rate": 4.6064981949458485e-06, |
| "loss": 0.0492, |
| "step": 4265 |
| }, |
| { |
| "epoch": 7.707581227436823, |
| "grad_norm": 0.5524155549870725, |
| "learning_rate": 4.588447653429603e-06, |
| "loss": 0.049, |
| "step": 4270 |
| }, |
| { |
| "epoch": 7.716606498194945, |
| "grad_norm": 0.5270091994399845, |
| "learning_rate": 4.570397111913358e-06, |
| "loss": 0.0488, |
| "step": 4275 |
| }, |
| { |
| "epoch": 7.725631768953068, |
| "grad_norm": 0.46706887147243975, |
| "learning_rate": 4.552346570397112e-06, |
| "loss": 0.0499, |
| "step": 4280 |
| }, |
| { |
| "epoch": 7.734657039711191, |
| "grad_norm": 0.598860629124346, |
| "learning_rate": 4.534296028880867e-06, |
| "loss": 0.049, |
| "step": 4285 |
| }, |
| { |
| "epoch": 7.743682310469314, |
| "grad_norm": 0.4792268975334193, |
| "learning_rate": 4.516245487364621e-06, |
| "loss": 0.0504, |
| "step": 4290 |
| }, |
| { |
| "epoch": 7.752707581227437, |
| "grad_norm": 0.5423376274444353, |
| "learning_rate": 4.498194945848376e-06, |
| "loss": 0.0487, |
| "step": 4295 |
| }, |
| { |
| "epoch": 7.76173285198556, |
| "grad_norm": 0.5483231957576472, |
| "learning_rate": 4.48014440433213e-06, |
| "loss": 0.0492, |
| "step": 4300 |
| }, |
| { |
| "epoch": 7.770758122743683, |
| "grad_norm": 0.5272610680544871, |
| "learning_rate": 4.462093862815885e-06, |
| "loss": 0.0498, |
| "step": 4305 |
| }, |
| { |
| "epoch": 7.7797833935018055, |
| "grad_norm": 0.5746187590999563, |
| "learning_rate": 4.444043321299639e-06, |
| "loss": 0.0476, |
| "step": 4310 |
| }, |
| { |
| "epoch": 7.7888086642599275, |
| "grad_norm": 0.5034631208906547, |
| "learning_rate": 4.425992779783394e-06, |
| "loss": 0.0499, |
| "step": 4315 |
| }, |
| { |
| "epoch": 7.79783393501805, |
| "grad_norm": 0.4802523837756993, |
| "learning_rate": 4.407942238267148e-06, |
| "loss": 0.0491, |
| "step": 4320 |
| }, |
| { |
| "epoch": 7.806859205776173, |
| "grad_norm": 0.5995873657958978, |
| "learning_rate": 4.389891696750903e-06, |
| "loss": 0.0503, |
| "step": 4325 |
| }, |
| { |
| "epoch": 7.815884476534296, |
| "grad_norm": 0.5877452980751235, |
| "learning_rate": 4.3718411552346575e-06, |
| "loss": 0.0484, |
| "step": 4330 |
| }, |
| { |
| "epoch": 7.824909747292419, |
| "grad_norm": 0.5912813010558485, |
| "learning_rate": 4.353790613718412e-06, |
| "loss": 0.0506, |
| "step": 4335 |
| }, |
| { |
| "epoch": 7.833935018050542, |
| "grad_norm": 0.5241916120291473, |
| "learning_rate": 4.335740072202167e-06, |
| "loss": 0.0498, |
| "step": 4340 |
| }, |
| { |
| "epoch": 7.842960288808664, |
| "grad_norm": 0.6235491325166662, |
| "learning_rate": 4.317689530685921e-06, |
| "loss": 0.0493, |
| "step": 4345 |
| }, |
| { |
| "epoch": 7.851985559566787, |
| "grad_norm": 0.4918990115478395, |
| "learning_rate": 4.299638989169675e-06, |
| "loss": 0.0488, |
| "step": 4350 |
| }, |
| { |
| "epoch": 7.8610108303249095, |
| "grad_norm": 0.5015566746050467, |
| "learning_rate": 4.28158844765343e-06, |
| "loss": 0.0492, |
| "step": 4355 |
| }, |
| { |
| "epoch": 7.870036101083032, |
| "grad_norm": 0.5897991332222726, |
| "learning_rate": 4.263537906137185e-06, |
| "loss": 0.0499, |
| "step": 4360 |
| }, |
| { |
| "epoch": 7.879061371841155, |
| "grad_norm": 0.5044541687620931, |
| "learning_rate": 4.245487364620938e-06, |
| "loss": 0.0496, |
| "step": 4365 |
| }, |
| { |
| "epoch": 7.888086642599278, |
| "grad_norm": 0.5298007896462054, |
| "learning_rate": 4.227436823104694e-06, |
| "loss": 0.0499, |
| "step": 4370 |
| }, |
| { |
| "epoch": 7.897111913357401, |
| "grad_norm": 0.6508152344907792, |
| "learning_rate": 4.209386281588448e-06, |
| "loss": 0.0499, |
| "step": 4375 |
| }, |
| { |
| "epoch": 7.906137184115524, |
| "grad_norm": 0.4952820870375711, |
| "learning_rate": 4.191335740072202e-06, |
| "loss": 0.0486, |
| "step": 4380 |
| }, |
| { |
| "epoch": 7.915162454873646, |
| "grad_norm": 0.534088933198461, |
| "learning_rate": 4.173285198555957e-06, |
| "loss": 0.0497, |
| "step": 4385 |
| }, |
| { |
| "epoch": 7.924187725631769, |
| "grad_norm": 0.5456898452361849, |
| "learning_rate": 4.155234657039712e-06, |
| "loss": 0.0498, |
| "step": 4390 |
| }, |
| { |
| "epoch": 7.933212996389892, |
| "grad_norm": 0.5206741989861254, |
| "learning_rate": 4.137184115523466e-06, |
| "loss": 0.0503, |
| "step": 4395 |
| }, |
| { |
| "epoch": 7.9422382671480145, |
| "grad_norm": 0.5130632228899183, |
| "learning_rate": 4.119133574007221e-06, |
| "loss": 0.0503, |
| "step": 4400 |
| }, |
| { |
| "epoch": 7.951263537906137, |
| "grad_norm": 0.5076154969319394, |
| "learning_rate": 4.101083032490975e-06, |
| "loss": 0.0497, |
| "step": 4405 |
| }, |
| { |
| "epoch": 7.96028880866426, |
| "grad_norm": 0.4532751901805038, |
| "learning_rate": 4.0830324909747296e-06, |
| "loss": 0.0486, |
| "step": 4410 |
| }, |
| { |
| "epoch": 7.969314079422382, |
| "grad_norm": 0.5569018909973789, |
| "learning_rate": 4.064981949458484e-06, |
| "loss": 0.0495, |
| "step": 4415 |
| }, |
| { |
| "epoch": 7.978339350180505, |
| "grad_norm": 0.5691379588931518, |
| "learning_rate": 4.046931407942238e-06, |
| "loss": 0.0491, |
| "step": 4420 |
| }, |
| { |
| "epoch": 7.987364620938628, |
| "grad_norm": 0.5087632373984906, |
| "learning_rate": 4.0288808664259935e-06, |
| "loss": 0.0507, |
| "step": 4425 |
| }, |
| { |
| "epoch": 7.996389891696751, |
| "grad_norm": 0.5438187646707812, |
| "learning_rate": 4.010830324909747e-06, |
| "loss": 0.0509, |
| "step": 4430 |
| }, |
| { |
| "epoch": 8.005415162454874, |
| "grad_norm": 0.46810462572343003, |
| "learning_rate": 3.992779783393502e-06, |
| "loss": 0.0467, |
| "step": 4435 |
| }, |
| { |
| "epoch": 8.014440433212997, |
| "grad_norm": 0.5110390059676326, |
| "learning_rate": 3.974729241877257e-06, |
| "loss": 0.0439, |
| "step": 4440 |
| }, |
| { |
| "epoch": 8.02346570397112, |
| "grad_norm": 0.5350833689826751, |
| "learning_rate": 3.956678700361011e-06, |
| "loss": 0.0443, |
| "step": 4445 |
| }, |
| { |
| "epoch": 8.032490974729242, |
| "grad_norm": 0.5246123933567166, |
| "learning_rate": 3.938628158844765e-06, |
| "loss": 0.0436, |
| "step": 4450 |
| }, |
| { |
| "epoch": 8.041516245487365, |
| "grad_norm": 0.5240110675258801, |
| "learning_rate": 3.920577617328521e-06, |
| "loss": 0.0438, |
| "step": 4455 |
| }, |
| { |
| "epoch": 8.050541516245488, |
| "grad_norm": 0.4508238887199392, |
| "learning_rate": 3.9025270758122745e-06, |
| "loss": 0.0434, |
| "step": 4460 |
| }, |
| { |
| "epoch": 8.059566787003611, |
| "grad_norm": 0.4587378398564632, |
| "learning_rate": 3.884476534296029e-06, |
| "loss": 0.0433, |
| "step": 4465 |
| }, |
| { |
| "epoch": 8.068592057761732, |
| "grad_norm": 0.4536888628696346, |
| "learning_rate": 3.866425992779784e-06, |
| "loss": 0.044, |
| "step": 4470 |
| }, |
| { |
| "epoch": 8.077617328519855, |
| "grad_norm": 0.489763842575816, |
| "learning_rate": 3.848375451263538e-06, |
| "loss": 0.0439, |
| "step": 4475 |
| }, |
| { |
| "epoch": 8.086642599277978, |
| "grad_norm": 0.4841182353897231, |
| "learning_rate": 3.830324909747293e-06, |
| "loss": 0.0447, |
| "step": 4480 |
| }, |
| { |
| "epoch": 8.0956678700361, |
| "grad_norm": 0.43276031492594386, |
| "learning_rate": 3.812274368231047e-06, |
| "loss": 0.0436, |
| "step": 4485 |
| }, |
| { |
| "epoch": 8.104693140794224, |
| "grad_norm": 0.43236829754729605, |
| "learning_rate": 3.7942238267148016e-06, |
| "loss": 0.0442, |
| "step": 4490 |
| }, |
| { |
| "epoch": 8.113718411552346, |
| "grad_norm": 0.5146544780130509, |
| "learning_rate": 3.776173285198556e-06, |
| "loss": 0.0444, |
| "step": 4495 |
| }, |
| { |
| "epoch": 8.12274368231047, |
| "grad_norm": 0.5155356346240472, |
| "learning_rate": 3.758122743682311e-06, |
| "loss": 0.0447, |
| "step": 4500 |
| }, |
| { |
| "epoch": 8.131768953068592, |
| "grad_norm": 0.4360981669075826, |
| "learning_rate": 3.740072202166065e-06, |
| "loss": 0.0444, |
| "step": 4505 |
| }, |
| { |
| "epoch": 8.140794223826715, |
| "grad_norm": 0.5047195091481486, |
| "learning_rate": 3.72202166064982e-06, |
| "loss": 0.0453, |
| "step": 4510 |
| }, |
| { |
| "epoch": 8.149819494584838, |
| "grad_norm": 0.4206990761101632, |
| "learning_rate": 3.703971119133574e-06, |
| "loss": 0.0455, |
| "step": 4515 |
| }, |
| { |
| "epoch": 8.15884476534296, |
| "grad_norm": 0.4082036544428832, |
| "learning_rate": 3.685920577617329e-06, |
| "loss": 0.0446, |
| "step": 4520 |
| }, |
| { |
| "epoch": 8.167870036101084, |
| "grad_norm": 0.4584532743829287, |
| "learning_rate": 3.6678700361010834e-06, |
| "loss": 0.0437, |
| "step": 4525 |
| }, |
| { |
| "epoch": 8.176895306859207, |
| "grad_norm": 0.5033458410420543, |
| "learning_rate": 3.649819494584838e-06, |
| "loss": 0.0443, |
| "step": 4530 |
| }, |
| { |
| "epoch": 8.18592057761733, |
| "grad_norm": 0.4791798460117816, |
| "learning_rate": 3.6317689530685923e-06, |
| "loss": 0.0445, |
| "step": 4535 |
| }, |
| { |
| "epoch": 8.19494584837545, |
| "grad_norm": 0.4196203455463231, |
| "learning_rate": 3.6137184115523466e-06, |
| "loss": 0.0442, |
| "step": 4540 |
| }, |
| { |
| "epoch": 8.203971119133573, |
| "grad_norm": 0.41063069847051686, |
| "learning_rate": 3.5956678700361012e-06, |
| "loss": 0.0445, |
| "step": 4545 |
| }, |
| { |
| "epoch": 8.212996389891696, |
| "grad_norm": 0.45170926636173403, |
| "learning_rate": 3.5776173285198555e-06, |
| "loss": 0.0445, |
| "step": 4550 |
| }, |
| { |
| "epoch": 8.222021660649819, |
| "grad_norm": 0.4365171717286736, |
| "learning_rate": 3.5595667870036106e-06, |
| "loss": 0.0439, |
| "step": 4555 |
| }, |
| { |
| "epoch": 8.231046931407942, |
| "grad_norm": 0.44704281997750017, |
| "learning_rate": 3.541516245487365e-06, |
| "loss": 0.0451, |
| "step": 4560 |
| }, |
| { |
| "epoch": 8.240072202166065, |
| "grad_norm": 0.44891926809808913, |
| "learning_rate": 3.5234657039711195e-06, |
| "loss": 0.0444, |
| "step": 4565 |
| }, |
| { |
| "epoch": 8.249097472924188, |
| "grad_norm": 0.4965534347892108, |
| "learning_rate": 3.5054151624548737e-06, |
| "loss": 0.045, |
| "step": 4570 |
| }, |
| { |
| "epoch": 8.25812274368231, |
| "grad_norm": 0.4926232662322707, |
| "learning_rate": 3.487364620938629e-06, |
| "loss": 0.0458, |
| "step": 4575 |
| }, |
| { |
| "epoch": 8.267148014440433, |
| "grad_norm": 0.4655262777572625, |
| "learning_rate": 3.469314079422383e-06, |
| "loss": 0.0447, |
| "step": 4580 |
| }, |
| { |
| "epoch": 8.276173285198556, |
| "grad_norm": 0.42335405356873596, |
| "learning_rate": 3.4512635379061377e-06, |
| "loss": 0.0448, |
| "step": 4585 |
| }, |
| { |
| "epoch": 8.28519855595668, |
| "grad_norm": 0.4222745585103908, |
| "learning_rate": 3.433212996389892e-06, |
| "loss": 0.0447, |
| "step": 4590 |
| }, |
| { |
| "epoch": 8.294223826714802, |
| "grad_norm": 0.5301226189403145, |
| "learning_rate": 3.4151624548736466e-06, |
| "loss": 0.0446, |
| "step": 4595 |
| }, |
| { |
| "epoch": 8.303249097472925, |
| "grad_norm": 0.42819391239433835, |
| "learning_rate": 3.397111913357401e-06, |
| "loss": 0.0452, |
| "step": 4600 |
| }, |
| { |
| "epoch": 8.312274368231048, |
| "grad_norm": 0.514730540749935, |
| "learning_rate": 3.379061371841155e-06, |
| "loss": 0.0448, |
| "step": 4605 |
| }, |
| { |
| "epoch": 8.321299638989169, |
| "grad_norm": 0.3997116268939232, |
| "learning_rate": 3.36101083032491e-06, |
| "loss": 0.046, |
| "step": 4610 |
| }, |
| { |
| "epoch": 8.330324909747292, |
| "grad_norm": 0.5325835981272248, |
| "learning_rate": 3.3429602888086644e-06, |
| "loss": 0.0445, |
| "step": 4615 |
| }, |
| { |
| "epoch": 8.339350180505415, |
| "grad_norm": 0.4871783345754918, |
| "learning_rate": 3.324909747292419e-06, |
| "loss": 0.0436, |
| "step": 4620 |
| }, |
| { |
| "epoch": 8.348375451263538, |
| "grad_norm": 0.44483140119486997, |
| "learning_rate": 3.3068592057761733e-06, |
| "loss": 0.0457, |
| "step": 4625 |
| }, |
| { |
| "epoch": 8.35740072202166, |
| "grad_norm": 0.4988117583696254, |
| "learning_rate": 3.288808664259928e-06, |
| "loss": 0.0451, |
| "step": 4630 |
| }, |
| { |
| "epoch": 8.366425992779783, |
| "grad_norm": 0.498516790859318, |
| "learning_rate": 3.2707581227436822e-06, |
| "loss": 0.0453, |
| "step": 4635 |
| }, |
| { |
| "epoch": 8.375451263537906, |
| "grad_norm": 0.4871818511915015, |
| "learning_rate": 3.2527075812274373e-06, |
| "loss": 0.0449, |
| "step": 4640 |
| }, |
| { |
| "epoch": 8.384476534296029, |
| "grad_norm": 0.4644645285652418, |
| "learning_rate": 3.2346570397111916e-06, |
| "loss": 0.045, |
| "step": 4645 |
| }, |
| { |
| "epoch": 8.393501805054152, |
| "grad_norm": 0.48904484199047815, |
| "learning_rate": 3.2166064981949462e-06, |
| "loss": 0.0458, |
| "step": 4650 |
| }, |
| { |
| "epoch": 8.402527075812275, |
| "grad_norm": 0.49191409648820944, |
| "learning_rate": 3.1985559566787005e-06, |
| "loss": 0.0449, |
| "step": 4655 |
| }, |
| { |
| "epoch": 8.411552346570398, |
| "grad_norm": 0.42381190140416636, |
| "learning_rate": 3.1805054151624556e-06, |
| "loss": 0.0451, |
| "step": 4660 |
| }, |
| { |
| "epoch": 8.42057761732852, |
| "grad_norm": 0.5141446233668461, |
| "learning_rate": 3.16245487364621e-06, |
| "loss": 0.0448, |
| "step": 4665 |
| }, |
| { |
| "epoch": 8.429602888086643, |
| "grad_norm": 0.43988353370949224, |
| "learning_rate": 3.1444043321299645e-06, |
| "loss": 0.0449, |
| "step": 4670 |
| }, |
| { |
| "epoch": 8.438628158844764, |
| "grad_norm": 0.5124332256581936, |
| "learning_rate": 3.1263537906137187e-06, |
| "loss": 0.046, |
| "step": 4675 |
| }, |
| { |
| "epoch": 8.447653429602887, |
| "grad_norm": 0.44304572226308614, |
| "learning_rate": 3.108303249097473e-06, |
| "loss": 0.0464, |
| "step": 4680 |
| }, |
| { |
| "epoch": 8.45667870036101, |
| "grad_norm": 0.5470624111508224, |
| "learning_rate": 3.0902527075812276e-06, |
| "loss": 0.0454, |
| "step": 4685 |
| }, |
| { |
| "epoch": 8.465703971119133, |
| "grad_norm": 0.4030173134885733, |
| "learning_rate": 3.072202166064982e-06, |
| "loss": 0.0453, |
| "step": 4690 |
| }, |
| { |
| "epoch": 8.474729241877256, |
| "grad_norm": 0.48989039284185976, |
| "learning_rate": 3.054151624548737e-06, |
| "loss": 0.0456, |
| "step": 4695 |
| }, |
| { |
| "epoch": 8.483754512635379, |
| "grad_norm": 0.44373432641009264, |
| "learning_rate": 3.036101083032491e-06, |
| "loss": 0.0458, |
| "step": 4700 |
| }, |
| { |
| "epoch": 8.492779783393502, |
| "grad_norm": 0.4167509709185524, |
| "learning_rate": 3.018050541516246e-06, |
| "loss": 0.0445, |
| "step": 4705 |
| }, |
| { |
| "epoch": 8.501805054151625, |
| "grad_norm": 0.5383282538033065, |
| "learning_rate": 3e-06, |
| "loss": 0.0467, |
| "step": 4710 |
| }, |
| { |
| "epoch": 8.510830324909747, |
| "grad_norm": 0.528737608029945, |
| "learning_rate": 2.9819494584837547e-06, |
| "loss": 0.0457, |
| "step": 4715 |
| }, |
| { |
| "epoch": 8.51985559566787, |
| "grad_norm": 0.508286793685187, |
| "learning_rate": 2.9638989169675094e-06, |
| "loss": 0.0466, |
| "step": 4720 |
| }, |
| { |
| "epoch": 8.528880866425993, |
| "grad_norm": 0.43030142221157835, |
| "learning_rate": 2.945848375451264e-06, |
| "loss": 0.0458, |
| "step": 4725 |
| }, |
| { |
| "epoch": 8.537906137184116, |
| "grad_norm": 0.5345650309751631, |
| "learning_rate": 2.9277978339350183e-06, |
| "loss": 0.0451, |
| "step": 4730 |
| }, |
| { |
| "epoch": 8.546931407942239, |
| "grad_norm": 0.4211221353833985, |
| "learning_rate": 2.909747292418773e-06, |
| "loss": 0.0453, |
| "step": 4735 |
| }, |
| { |
| "epoch": 8.555956678700362, |
| "grad_norm": 0.45522058199277166, |
| "learning_rate": 2.8916967509025272e-06, |
| "loss": 0.0446, |
| "step": 4740 |
| }, |
| { |
| "epoch": 8.564981949458485, |
| "grad_norm": 0.44570872798295774, |
| "learning_rate": 2.8736462093862815e-06, |
| "loss": 0.0455, |
| "step": 4745 |
| }, |
| { |
| "epoch": 8.574007220216606, |
| "grad_norm": 0.4552227500869339, |
| "learning_rate": 2.8555956678700365e-06, |
| "loss": 0.0449, |
| "step": 4750 |
| }, |
| { |
| "epoch": 8.583032490974729, |
| "grad_norm": 0.5005592666887394, |
| "learning_rate": 2.8375451263537908e-06, |
| "loss": 0.0463, |
| "step": 4755 |
| }, |
| { |
| "epoch": 8.592057761732852, |
| "grad_norm": 0.42833298285006977, |
| "learning_rate": 2.8194945848375454e-06, |
| "loss": 0.0457, |
| "step": 4760 |
| }, |
| { |
| "epoch": 8.601083032490974, |
| "grad_norm": 0.5140646854966842, |
| "learning_rate": 2.8014440433212997e-06, |
| "loss": 0.0471, |
| "step": 4765 |
| }, |
| { |
| "epoch": 8.610108303249097, |
| "grad_norm": 0.5892042283245157, |
| "learning_rate": 2.7833935018050544e-06, |
| "loss": 0.0452, |
| "step": 4770 |
| }, |
| { |
| "epoch": 8.61913357400722, |
| "grad_norm": 0.452012962570369, |
| "learning_rate": 2.7653429602888086e-06, |
| "loss": 0.0448, |
| "step": 4775 |
| }, |
| { |
| "epoch": 8.628158844765343, |
| "grad_norm": 0.4396464198294571, |
| "learning_rate": 2.7472924187725637e-06, |
| "loss": 0.0461, |
| "step": 4780 |
| }, |
| { |
| "epoch": 8.637184115523466, |
| "grad_norm": 0.41842531603143573, |
| "learning_rate": 2.729241877256318e-06, |
| "loss": 0.0451, |
| "step": 4785 |
| }, |
| { |
| "epoch": 8.646209386281589, |
| "grad_norm": 0.41898202699131626, |
| "learning_rate": 2.7111913357400726e-06, |
| "loss": 0.0457, |
| "step": 4790 |
| }, |
| { |
| "epoch": 8.655234657039712, |
| "grad_norm": 0.5196610269445232, |
| "learning_rate": 2.693140794223827e-06, |
| "loss": 0.0452, |
| "step": 4795 |
| }, |
| { |
| "epoch": 8.664259927797834, |
| "grad_norm": 0.46936311363162797, |
| "learning_rate": 2.675090252707582e-06, |
| "loss": 0.0455, |
| "step": 4800 |
| }, |
| { |
| "epoch": 8.673285198555957, |
| "grad_norm": 0.4740159197363855, |
| "learning_rate": 2.657039711191336e-06, |
| "loss": 0.0447, |
| "step": 4805 |
| }, |
| { |
| "epoch": 8.68231046931408, |
| "grad_norm": 0.43074612764563486, |
| "learning_rate": 2.6389891696750904e-06, |
| "loss": 0.0456, |
| "step": 4810 |
| }, |
| { |
| "epoch": 8.691335740072201, |
| "grad_norm": 0.5425044634950232, |
| "learning_rate": 2.620938628158845e-06, |
| "loss": 0.0466, |
| "step": 4815 |
| }, |
| { |
| "epoch": 8.700361010830324, |
| "grad_norm": 0.5081339659232526, |
| "learning_rate": 2.6028880866425993e-06, |
| "loss": 0.046, |
| "step": 4820 |
| }, |
| { |
| "epoch": 8.709386281588447, |
| "grad_norm": 0.4651038570514386, |
| "learning_rate": 2.584837545126354e-06, |
| "loss": 0.0463, |
| "step": 4825 |
| }, |
| { |
| "epoch": 8.71841155234657, |
| "grad_norm": 0.49355797519718886, |
| "learning_rate": 2.566787003610108e-06, |
| "loss": 0.0457, |
| "step": 4830 |
| }, |
| { |
| "epoch": 8.727436823104693, |
| "grad_norm": 0.46746434525998604, |
| "learning_rate": 2.5487364620938633e-06, |
| "loss": 0.0461, |
| "step": 4835 |
| }, |
| { |
| "epoch": 8.736462093862816, |
| "grad_norm": 0.4210937803108333, |
| "learning_rate": 2.5306859205776175e-06, |
| "loss": 0.0449, |
| "step": 4840 |
| }, |
| { |
| "epoch": 8.745487364620939, |
| "grad_norm": 0.45489349719230204, |
| "learning_rate": 2.512635379061372e-06, |
| "loss": 0.0451, |
| "step": 4845 |
| }, |
| { |
| "epoch": 8.754512635379061, |
| "grad_norm": 0.49590457857103976, |
| "learning_rate": 2.4945848375451264e-06, |
| "loss": 0.0462, |
| "step": 4850 |
| }, |
| { |
| "epoch": 8.763537906137184, |
| "grad_norm": 0.41135841968863646, |
| "learning_rate": 2.476534296028881e-06, |
| "loss": 0.0459, |
| "step": 4855 |
| }, |
| { |
| "epoch": 8.772563176895307, |
| "grad_norm": 0.5862358329269642, |
| "learning_rate": 2.4584837545126353e-06, |
| "loss": 0.046, |
| "step": 4860 |
| }, |
| { |
| "epoch": 8.78158844765343, |
| "grad_norm": 0.4869407752123959, |
| "learning_rate": 2.44043321299639e-06, |
| "loss": 0.046, |
| "step": 4865 |
| }, |
| { |
| "epoch": 8.790613718411553, |
| "grad_norm": 0.4541073046578143, |
| "learning_rate": 2.4223826714801447e-06, |
| "loss": 0.0452, |
| "step": 4870 |
| }, |
| { |
| "epoch": 8.799638989169676, |
| "grad_norm": 0.506564617854228, |
| "learning_rate": 2.4043321299638993e-06, |
| "loss": 0.0467, |
| "step": 4875 |
| }, |
| { |
| "epoch": 8.808664259927799, |
| "grad_norm": 0.5487992800276584, |
| "learning_rate": 2.3862815884476536e-06, |
| "loss": 0.0461, |
| "step": 4880 |
| }, |
| { |
| "epoch": 8.81768953068592, |
| "grad_norm": 0.5911995859863417, |
| "learning_rate": 2.3682310469314082e-06, |
| "loss": 0.046, |
| "step": 4885 |
| }, |
| { |
| "epoch": 8.826714801444043, |
| "grad_norm": 0.49900273240686943, |
| "learning_rate": 2.350180505415163e-06, |
| "loss": 0.046, |
| "step": 4890 |
| }, |
| { |
| "epoch": 8.835740072202166, |
| "grad_norm": 0.48948946203921534, |
| "learning_rate": 2.332129963898917e-06, |
| "loss": 0.0458, |
| "step": 4895 |
| }, |
| { |
| "epoch": 8.844765342960288, |
| "grad_norm": 0.4148924662584737, |
| "learning_rate": 2.314079422382672e-06, |
| "loss": 0.0461, |
| "step": 4900 |
| }, |
| { |
| "epoch": 8.853790613718411, |
| "grad_norm": 0.5404388584683619, |
| "learning_rate": 2.296028880866426e-06, |
| "loss": 0.046, |
| "step": 4905 |
| }, |
| { |
| "epoch": 8.862815884476534, |
| "grad_norm": 0.4321254910131908, |
| "learning_rate": 2.2779783393501807e-06, |
| "loss": 0.0454, |
| "step": 4910 |
| }, |
| { |
| "epoch": 8.871841155234657, |
| "grad_norm": 0.5949655513323762, |
| "learning_rate": 2.259927797833935e-06, |
| "loss": 0.046, |
| "step": 4915 |
| }, |
| { |
| "epoch": 8.88086642599278, |
| "grad_norm": 0.4378486480608059, |
| "learning_rate": 2.2418772563176896e-06, |
| "loss": 0.0452, |
| "step": 4920 |
| }, |
| { |
| "epoch": 8.889891696750903, |
| "grad_norm": 0.5870449555902121, |
| "learning_rate": 2.2238267148014443e-06, |
| "loss": 0.0459, |
| "step": 4925 |
| }, |
| { |
| "epoch": 8.898916967509026, |
| "grad_norm": 0.5683809822588126, |
| "learning_rate": 2.2057761732851985e-06, |
| "loss": 0.0462, |
| "step": 4930 |
| }, |
| { |
| "epoch": 8.907942238267148, |
| "grad_norm": 0.46521601312012895, |
| "learning_rate": 2.187725631768953e-06, |
| "loss": 0.0467, |
| "step": 4935 |
| }, |
| { |
| "epoch": 8.916967509025271, |
| "grad_norm": 0.47477683194093706, |
| "learning_rate": 2.169675090252708e-06, |
| "loss": 0.0462, |
| "step": 4940 |
| }, |
| { |
| "epoch": 8.925992779783394, |
| "grad_norm": 0.4804957099911855, |
| "learning_rate": 2.1516245487364625e-06, |
| "loss": 0.0454, |
| "step": 4945 |
| }, |
| { |
| "epoch": 8.935018050541515, |
| "grad_norm": 0.4571767863844299, |
| "learning_rate": 2.1335740072202168e-06, |
| "loss": 0.0453, |
| "step": 4950 |
| }, |
| { |
| "epoch": 8.944043321299638, |
| "grad_norm": 0.5601651443669259, |
| "learning_rate": 2.1155234657039714e-06, |
| "loss": 0.0463, |
| "step": 4955 |
| }, |
| { |
| "epoch": 8.953068592057761, |
| "grad_norm": 0.5500455060473453, |
| "learning_rate": 2.097472924187726e-06, |
| "loss": 0.0463, |
| "step": 4960 |
| }, |
| { |
| "epoch": 8.962093862815884, |
| "grad_norm": 0.5232043738599569, |
| "learning_rate": 2.0794223826714803e-06, |
| "loss": 0.0464, |
| "step": 4965 |
| }, |
| { |
| "epoch": 8.971119133574007, |
| "grad_norm": 0.4969509637225851, |
| "learning_rate": 2.0613718411552346e-06, |
| "loss": 0.0467, |
| "step": 4970 |
| }, |
| { |
| "epoch": 8.98014440433213, |
| "grad_norm": 0.46415374324212516, |
| "learning_rate": 2.0433212996389892e-06, |
| "loss": 0.0464, |
| "step": 4975 |
| }, |
| { |
| "epoch": 8.989169675090253, |
| "grad_norm": 0.495736189400118, |
| "learning_rate": 2.025270758122744e-06, |
| "loss": 0.0459, |
| "step": 4980 |
| }, |
| { |
| "epoch": 8.998194945848375, |
| "grad_norm": 0.5020092748082658, |
| "learning_rate": 2.007220216606498e-06, |
| "loss": 0.0458, |
| "step": 4985 |
| }, |
| { |
| "epoch": 9.007220216606498, |
| "grad_norm": 0.4534175561356577, |
| "learning_rate": 1.989169675090253e-06, |
| "loss": 0.0428, |
| "step": 4990 |
| }, |
| { |
| "epoch": 9.016245487364621, |
| "grad_norm": 0.39468719173310207, |
| "learning_rate": 1.9711191335740075e-06, |
| "loss": 0.0425, |
| "step": 4995 |
| }, |
| { |
| "epoch": 9.025270758122744, |
| "grad_norm": 0.5163608939567661, |
| "learning_rate": 1.9530685920577617e-06, |
| "loss": 0.0423, |
| "step": 5000 |
| }, |
| { |
| "epoch": 9.025270758122744, |
| "eval_loss": 0.04193449020385742, |
| "eval_runtime": 759.583, |
| "eval_samples_per_second": 17.503, |
| "eval_steps_per_second": 0.729, |
| "step": 5000 |
| }, |
| { |
| "epoch": 9.034296028880867, |
| "grad_norm": 0.4599758437234501, |
| "learning_rate": 1.9350180505415164e-06, |
| "loss": 0.0422, |
| "step": 5005 |
| }, |
| { |
| "epoch": 9.04332129963899, |
| "grad_norm": 0.49964626450423844, |
| "learning_rate": 1.916967509025271e-06, |
| "loss": 0.0427, |
| "step": 5010 |
| }, |
| { |
| "epoch": 9.052346570397113, |
| "grad_norm": 0.4178907774622966, |
| "learning_rate": 1.8989169675090255e-06, |
| "loss": 0.0417, |
| "step": 5015 |
| }, |
| { |
| "epoch": 9.061371841155236, |
| "grad_norm": 0.39048192594633196, |
| "learning_rate": 1.88086642599278e-06, |
| "loss": 0.042, |
| "step": 5020 |
| }, |
| { |
| "epoch": 9.070397111913357, |
| "grad_norm": 0.4791548976955057, |
| "learning_rate": 1.8628158844765346e-06, |
| "loss": 0.0412, |
| "step": 5025 |
| }, |
| { |
| "epoch": 9.07942238267148, |
| "grad_norm": 0.4638926347317585, |
| "learning_rate": 1.844765342960289e-06, |
| "loss": 0.0423, |
| "step": 5030 |
| }, |
| { |
| "epoch": 9.088447653429602, |
| "grad_norm": 0.5289808337845963, |
| "learning_rate": 1.8267148014440433e-06, |
| "loss": 0.0421, |
| "step": 5035 |
| }, |
| { |
| "epoch": 9.097472924187725, |
| "grad_norm": 0.48767336384018667, |
| "learning_rate": 1.808664259927798e-06, |
| "loss": 0.0421, |
| "step": 5040 |
| }, |
| { |
| "epoch": 9.106498194945848, |
| "grad_norm": 0.4589026757647638, |
| "learning_rate": 1.7906137184115524e-06, |
| "loss": 0.0427, |
| "step": 5045 |
| }, |
| { |
| "epoch": 9.115523465703971, |
| "grad_norm": 0.46264566303543775, |
| "learning_rate": 1.7725631768953069e-06, |
| "loss": 0.0422, |
| "step": 5050 |
| }, |
| { |
| "epoch": 9.124548736462094, |
| "grad_norm": 0.4410983802446593, |
| "learning_rate": 1.7545126353790615e-06, |
| "loss": 0.0412, |
| "step": 5055 |
| }, |
| { |
| "epoch": 9.133574007220217, |
| "grad_norm": 0.4202311860157241, |
| "learning_rate": 1.736462093862816e-06, |
| "loss": 0.0421, |
| "step": 5060 |
| }, |
| { |
| "epoch": 9.14259927797834, |
| "grad_norm": 0.41239420931705556, |
| "learning_rate": 1.7184115523465706e-06, |
| "loss": 0.0426, |
| "step": 5065 |
| }, |
| { |
| "epoch": 9.151624548736462, |
| "grad_norm": 0.48539351529631136, |
| "learning_rate": 1.700361010830325e-06, |
| "loss": 0.0424, |
| "step": 5070 |
| }, |
| { |
| "epoch": 9.160649819494585, |
| "grad_norm": 0.4427031394702614, |
| "learning_rate": 1.6823104693140795e-06, |
| "loss": 0.0426, |
| "step": 5075 |
| }, |
| { |
| "epoch": 9.169675090252708, |
| "grad_norm": 0.4752210878563376, |
| "learning_rate": 1.6642599277978342e-06, |
| "loss": 0.0421, |
| "step": 5080 |
| }, |
| { |
| "epoch": 9.178700361010831, |
| "grad_norm": 0.5234039982082958, |
| "learning_rate": 1.6462093862815887e-06, |
| "loss": 0.0433, |
| "step": 5085 |
| }, |
| { |
| "epoch": 9.187725631768952, |
| "grad_norm": 0.4515971877704036, |
| "learning_rate": 1.6281588447653431e-06, |
| "loss": 0.0433, |
| "step": 5090 |
| }, |
| { |
| "epoch": 9.196750902527075, |
| "grad_norm": 0.6669560841097782, |
| "learning_rate": 1.6101083032490978e-06, |
| "loss": 0.0426, |
| "step": 5095 |
| }, |
| { |
| "epoch": 9.205776173285198, |
| "grad_norm": 0.4598456495040656, |
| "learning_rate": 1.592057761732852e-06, |
| "loss": 0.0423, |
| "step": 5100 |
| }, |
| { |
| "epoch": 9.21480144404332, |
| "grad_norm": 0.4965666374110849, |
| "learning_rate": 1.5740072202166065e-06, |
| "loss": 0.0428, |
| "step": 5105 |
| }, |
| { |
| "epoch": 9.223826714801444, |
| "grad_norm": 0.37975291809275413, |
| "learning_rate": 1.5559566787003611e-06, |
| "loss": 0.0429, |
| "step": 5110 |
| }, |
| { |
| "epoch": 9.232851985559567, |
| "grad_norm": 0.4716411802408934, |
| "learning_rate": 1.5379061371841156e-06, |
| "loss": 0.0428, |
| "step": 5115 |
| }, |
| { |
| "epoch": 9.24187725631769, |
| "grad_norm": 0.4327838862415194, |
| "learning_rate": 1.51985559566787e-06, |
| "loss": 0.0433, |
| "step": 5120 |
| }, |
| { |
| "epoch": 9.250902527075812, |
| "grad_norm": 0.5003874549105873, |
| "learning_rate": 1.5018050541516247e-06, |
| "loss": 0.0422, |
| "step": 5125 |
| }, |
| { |
| "epoch": 9.259927797833935, |
| "grad_norm": 0.49004225732796214, |
| "learning_rate": 1.4837545126353792e-06, |
| "loss": 0.0431, |
| "step": 5130 |
| }, |
| { |
| "epoch": 9.268953068592058, |
| "grad_norm": 0.41580487545033434, |
| "learning_rate": 1.4657039711191336e-06, |
| "loss": 0.0423, |
| "step": 5135 |
| }, |
| { |
| "epoch": 9.277978339350181, |
| "grad_norm": 0.6027983626577104, |
| "learning_rate": 1.4476534296028883e-06, |
| "loss": 0.0439, |
| "step": 5140 |
| }, |
| { |
| "epoch": 9.287003610108304, |
| "grad_norm": 0.4598712206248585, |
| "learning_rate": 1.4296028880866427e-06, |
| "loss": 0.0429, |
| "step": 5145 |
| }, |
| { |
| "epoch": 9.296028880866427, |
| "grad_norm": 0.49342736984840635, |
| "learning_rate": 1.4115523465703974e-06, |
| "loss": 0.0426, |
| "step": 5150 |
| }, |
| { |
| "epoch": 9.30505415162455, |
| "grad_norm": 0.49176684451178787, |
| "learning_rate": 1.3935018050541518e-06, |
| "loss": 0.0427, |
| "step": 5155 |
| }, |
| { |
| "epoch": 9.314079422382672, |
| "grad_norm": 0.45945014096327996, |
| "learning_rate": 1.3754512635379063e-06, |
| "loss": 0.0429, |
| "step": 5160 |
| }, |
| { |
| "epoch": 9.323104693140793, |
| "grad_norm": 0.4114132503638751, |
| "learning_rate": 1.357400722021661e-06, |
| "loss": 0.0428, |
| "step": 5165 |
| }, |
| { |
| "epoch": 9.332129963898916, |
| "grad_norm": 0.42473969764225256, |
| "learning_rate": 1.3393501805054152e-06, |
| "loss": 0.0431, |
| "step": 5170 |
| }, |
| { |
| "epoch": 9.34115523465704, |
| "grad_norm": 0.48677947222160384, |
| "learning_rate": 1.3212996389891696e-06, |
| "loss": 0.0427, |
| "step": 5175 |
| }, |
| { |
| "epoch": 9.350180505415162, |
| "grad_norm": 0.46858607602832114, |
| "learning_rate": 1.3032490974729243e-06, |
| "loss": 0.043, |
| "step": 5180 |
| }, |
| { |
| "epoch": 9.359205776173285, |
| "grad_norm": 0.42070686139078084, |
| "learning_rate": 1.2851985559566788e-06, |
| "loss": 0.043, |
| "step": 5185 |
| }, |
| { |
| "epoch": 9.368231046931408, |
| "grad_norm": 0.5082683923945276, |
| "learning_rate": 1.2671480144404332e-06, |
| "loss": 0.0432, |
| "step": 5190 |
| }, |
| { |
| "epoch": 9.37725631768953, |
| "grad_norm": 0.49146765440804957, |
| "learning_rate": 1.2490974729241879e-06, |
| "loss": 0.0433, |
| "step": 5195 |
| }, |
| { |
| "epoch": 9.386281588447654, |
| "grad_norm": 0.4942629872457007, |
| "learning_rate": 1.2310469314079423e-06, |
| "loss": 0.0432, |
| "step": 5200 |
| }, |
| { |
| "epoch": 9.395306859205776, |
| "grad_norm": 0.5421985341404022, |
| "learning_rate": 1.2129963898916968e-06, |
| "loss": 0.0434, |
| "step": 5205 |
| }, |
| { |
| "epoch": 9.4043321299639, |
| "grad_norm": 0.5216299465972287, |
| "learning_rate": 1.1949458483754514e-06, |
| "loss": 0.0427, |
| "step": 5210 |
| }, |
| { |
| "epoch": 9.413357400722022, |
| "grad_norm": 0.41742965195734916, |
| "learning_rate": 1.176895306859206e-06, |
| "loss": 0.0435, |
| "step": 5215 |
| }, |
| { |
| "epoch": 9.422382671480145, |
| "grad_norm": 0.50664802662452, |
| "learning_rate": 1.1588447653429604e-06, |
| "loss": 0.0427, |
| "step": 5220 |
| }, |
| { |
| "epoch": 9.431407942238268, |
| "grad_norm": 0.44313965879945716, |
| "learning_rate": 1.1407942238267148e-06, |
| "loss": 0.0435, |
| "step": 5225 |
| }, |
| { |
| "epoch": 9.440433212996389, |
| "grad_norm": 0.5131645452817757, |
| "learning_rate": 1.1227436823104695e-06, |
| "loss": 0.0434, |
| "step": 5230 |
| }, |
| { |
| "epoch": 9.449458483754512, |
| "grad_norm": 0.4474063447070478, |
| "learning_rate": 1.104693140794224e-06, |
| "loss": 0.0428, |
| "step": 5235 |
| }, |
| { |
| "epoch": 9.458483754512635, |
| "grad_norm": 0.4729265352475666, |
| "learning_rate": 1.0866425992779784e-06, |
| "loss": 0.0434, |
| "step": 5240 |
| }, |
| { |
| "epoch": 9.467509025270758, |
| "grad_norm": 0.45958551411695614, |
| "learning_rate": 1.068592057761733e-06, |
| "loss": 0.0432, |
| "step": 5245 |
| }, |
| { |
| "epoch": 9.47653429602888, |
| "grad_norm": 0.41039837943400087, |
| "learning_rate": 1.0505415162454875e-06, |
| "loss": 0.0439, |
| "step": 5250 |
| }, |
| { |
| "epoch": 9.485559566787003, |
| "grad_norm": 0.44753217417346286, |
| "learning_rate": 1.032490974729242e-06, |
| "loss": 0.042, |
| "step": 5255 |
| }, |
| { |
| "epoch": 9.494584837545126, |
| "grad_norm": 0.4139276562020777, |
| "learning_rate": 1.0144404332129964e-06, |
| "loss": 0.0427, |
| "step": 5260 |
| }, |
| { |
| "epoch": 9.50361010830325, |
| "grad_norm": 0.6251275698046171, |
| "learning_rate": 9.96389891696751e-07, |
| "loss": 0.0436, |
| "step": 5265 |
| }, |
| { |
| "epoch": 9.512635379061372, |
| "grad_norm": 0.4501260605118548, |
| "learning_rate": 9.783393501805055e-07, |
| "loss": 0.0431, |
| "step": 5270 |
| }, |
| { |
| "epoch": 9.521660649819495, |
| "grad_norm": 0.3980490709756831, |
| "learning_rate": 9.6028880866426e-07, |
| "loss": 0.0435, |
| "step": 5275 |
| }, |
| { |
| "epoch": 9.530685920577618, |
| "grad_norm": 0.46391281584553307, |
| "learning_rate": 9.422382671480146e-07, |
| "loss": 0.0436, |
| "step": 5280 |
| }, |
| { |
| "epoch": 9.53971119133574, |
| "grad_norm": 0.4127369030638665, |
| "learning_rate": 9.24187725631769e-07, |
| "loss": 0.043, |
| "step": 5285 |
| }, |
| { |
| "epoch": 9.548736462093864, |
| "grad_norm": 0.40324535228510755, |
| "learning_rate": 9.061371841155235e-07, |
| "loss": 0.0424, |
| "step": 5290 |
| }, |
| { |
| "epoch": 9.557761732851986, |
| "grad_norm": 0.4319791954661702, |
| "learning_rate": 8.880866425992781e-07, |
| "loss": 0.0435, |
| "step": 5295 |
| }, |
| { |
| "epoch": 9.566787003610107, |
| "grad_norm": 0.46271459456155417, |
| "learning_rate": 8.700361010830325e-07, |
| "loss": 0.0437, |
| "step": 5300 |
| }, |
| { |
| "epoch": 9.57581227436823, |
| "grad_norm": 0.427126926635068, |
| "learning_rate": 8.519855595667871e-07, |
| "loss": 0.0434, |
| "step": 5305 |
| }, |
| { |
| "epoch": 9.584837545126353, |
| "grad_norm": 0.4691427501429953, |
| "learning_rate": 8.339350180505417e-07, |
| "loss": 0.0437, |
| "step": 5310 |
| }, |
| { |
| "epoch": 9.593862815884476, |
| "grad_norm": 0.45679217355753204, |
| "learning_rate": 8.15884476534296e-07, |
| "loss": 0.0435, |
| "step": 5315 |
| }, |
| { |
| "epoch": 9.602888086642599, |
| "grad_norm": 0.48200222966829426, |
| "learning_rate": 7.978339350180506e-07, |
| "loss": 0.0437, |
| "step": 5320 |
| }, |
| { |
| "epoch": 9.611913357400722, |
| "grad_norm": 0.4901938411136157, |
| "learning_rate": 7.797833935018051e-07, |
| "loss": 0.044, |
| "step": 5325 |
| }, |
| { |
| "epoch": 9.620938628158845, |
| "grad_norm": 0.5165751415632741, |
| "learning_rate": 7.617328519855597e-07, |
| "loss": 0.0438, |
| "step": 5330 |
| }, |
| { |
| "epoch": 9.629963898916968, |
| "grad_norm": 0.5120078231958552, |
| "learning_rate": 7.436823104693141e-07, |
| "loss": 0.0427, |
| "step": 5335 |
| }, |
| { |
| "epoch": 9.63898916967509, |
| "grad_norm": 0.40973988656758753, |
| "learning_rate": 7.256317689530687e-07, |
| "loss": 0.0431, |
| "step": 5340 |
| }, |
| { |
| "epoch": 9.648014440433213, |
| "grad_norm": 0.4582389620066886, |
| "learning_rate": 7.075812274368232e-07, |
| "loss": 0.0433, |
| "step": 5345 |
| }, |
| { |
| "epoch": 9.657039711191336, |
| "grad_norm": 0.552631347988738, |
| "learning_rate": 6.895306859205776e-07, |
| "loss": 0.044, |
| "step": 5350 |
| }, |
| { |
| "epoch": 9.666064981949459, |
| "grad_norm": 0.3783877672799792, |
| "learning_rate": 6.714801444043322e-07, |
| "loss": 0.0435, |
| "step": 5355 |
| }, |
| { |
| "epoch": 9.675090252707582, |
| "grad_norm": 0.48138412749053733, |
| "learning_rate": 6.534296028880867e-07, |
| "loss": 0.0432, |
| "step": 5360 |
| }, |
| { |
| "epoch": 9.684115523465703, |
| "grad_norm": 0.4338898356243751, |
| "learning_rate": 6.353790613718413e-07, |
| "loss": 0.0433, |
| "step": 5365 |
| }, |
| { |
| "epoch": 9.693140794223826, |
| "grad_norm": 0.5225550209175859, |
| "learning_rate": 6.173285198555957e-07, |
| "loss": 0.0437, |
| "step": 5370 |
| }, |
| { |
| "epoch": 9.702166064981949, |
| "grad_norm": 0.5167671561018974, |
| "learning_rate": 5.992779783393502e-07, |
| "loss": 0.0434, |
| "step": 5375 |
| }, |
| { |
| "epoch": 9.711191335740072, |
| "grad_norm": 0.4115427231842469, |
| "learning_rate": 5.812274368231047e-07, |
| "loss": 0.0439, |
| "step": 5380 |
| }, |
| { |
| "epoch": 9.720216606498195, |
| "grad_norm": 0.45107850138578787, |
| "learning_rate": 5.631768953068593e-07, |
| "loss": 0.0439, |
| "step": 5385 |
| }, |
| { |
| "epoch": 9.729241877256317, |
| "grad_norm": 0.4342115174653207, |
| "learning_rate": 5.451263537906137e-07, |
| "loss": 0.0429, |
| "step": 5390 |
| }, |
| { |
| "epoch": 9.73826714801444, |
| "grad_norm": 0.4398151357748213, |
| "learning_rate": 5.270758122743683e-07, |
| "loss": 0.0432, |
| "step": 5395 |
| }, |
| { |
| "epoch": 9.747292418772563, |
| "grad_norm": 0.4242422950084841, |
| "learning_rate": 5.090252707581228e-07, |
| "loss": 0.0428, |
| "step": 5400 |
| }, |
| { |
| "epoch": 9.756317689530686, |
| "grad_norm": 0.46470583012028227, |
| "learning_rate": 4.909747292418773e-07, |
| "loss": 0.0431, |
| "step": 5405 |
| }, |
| { |
| "epoch": 9.765342960288809, |
| "grad_norm": 0.41760352928012284, |
| "learning_rate": 4.729241877256318e-07, |
| "loss": 0.0439, |
| "step": 5410 |
| }, |
| { |
| "epoch": 9.774368231046932, |
| "grad_norm": 0.43617126444221704, |
| "learning_rate": 4.548736462093863e-07, |
| "loss": 0.0429, |
| "step": 5415 |
| }, |
| { |
| "epoch": 9.783393501805055, |
| "grad_norm": 0.44372565623267207, |
| "learning_rate": 4.368231046931409e-07, |
| "loss": 0.0435, |
| "step": 5420 |
| }, |
| { |
| "epoch": 9.792418772563177, |
| "grad_norm": 0.4428259601085559, |
| "learning_rate": 4.1877256317689533e-07, |
| "loss": 0.0436, |
| "step": 5425 |
| }, |
| { |
| "epoch": 9.8014440433213, |
| "grad_norm": 0.5278115547439892, |
| "learning_rate": 4.0072202166064984e-07, |
| "loss": 0.0433, |
| "step": 5430 |
| }, |
| { |
| "epoch": 9.810469314079423, |
| "grad_norm": 0.4527700640650528, |
| "learning_rate": 3.826714801444044e-07, |
| "loss": 0.043, |
| "step": 5435 |
| }, |
| { |
| "epoch": 9.819494584837544, |
| "grad_norm": 0.401226496888323, |
| "learning_rate": 3.6462093862815885e-07, |
| "loss": 0.0428, |
| "step": 5440 |
| }, |
| { |
| "epoch": 9.828519855595667, |
| "grad_norm": 0.49440429063887187, |
| "learning_rate": 3.465703971119134e-07, |
| "loss": 0.0428, |
| "step": 5445 |
| }, |
| { |
| "epoch": 9.83754512635379, |
| "grad_norm": 0.5076032586052756, |
| "learning_rate": 3.285198555956679e-07, |
| "loss": 0.044, |
| "step": 5450 |
| }, |
| { |
| "epoch": 9.846570397111913, |
| "grad_norm": 0.43280796148087763, |
| "learning_rate": 3.104693140794224e-07, |
| "loss": 0.0432, |
| "step": 5455 |
| }, |
| { |
| "epoch": 9.855595667870036, |
| "grad_norm": 0.5015245155705008, |
| "learning_rate": 2.924187725631769e-07, |
| "loss": 0.0438, |
| "step": 5460 |
| }, |
| { |
| "epoch": 9.864620938628159, |
| "grad_norm": 0.5286579403439075, |
| "learning_rate": 2.743682310469314e-07, |
| "loss": 0.0434, |
| "step": 5465 |
| }, |
| { |
| "epoch": 9.873646209386282, |
| "grad_norm": 0.4224487204668749, |
| "learning_rate": 2.5631768953068593e-07, |
| "loss": 0.043, |
| "step": 5470 |
| }, |
| { |
| "epoch": 9.882671480144404, |
| "grad_norm": 0.45900406854255277, |
| "learning_rate": 2.3826714801444044e-07, |
| "loss": 0.0437, |
| "step": 5475 |
| }, |
| { |
| "epoch": 9.891696750902527, |
| "grad_norm": 0.45858140540922404, |
| "learning_rate": 2.2021660649819497e-07, |
| "loss": 0.0433, |
| "step": 5480 |
| }, |
| { |
| "epoch": 9.90072202166065, |
| "grad_norm": 0.530002579562083, |
| "learning_rate": 2.0216606498194947e-07, |
| "loss": 0.0434, |
| "step": 5485 |
| }, |
| { |
| "epoch": 9.909747292418773, |
| "grad_norm": 0.5370134546660643, |
| "learning_rate": 1.84115523465704e-07, |
| "loss": 0.0437, |
| "step": 5490 |
| }, |
| { |
| "epoch": 9.918772563176896, |
| "grad_norm": 0.4902526885586167, |
| "learning_rate": 1.660649819494585e-07, |
| "loss": 0.0439, |
| "step": 5495 |
| }, |
| { |
| "epoch": 9.927797833935019, |
| "grad_norm": 0.4649182165247798, |
| "learning_rate": 1.4801444043321301e-07, |
| "loss": 0.0444, |
| "step": 5500 |
| }, |
| { |
| "epoch": 9.93682310469314, |
| "grad_norm": 0.5575325439198506, |
| "learning_rate": 1.2996389891696752e-07, |
| "loss": 0.0424, |
| "step": 5505 |
| }, |
| { |
| "epoch": 9.945848375451263, |
| "grad_norm": 0.45452667158004884, |
| "learning_rate": 1.1191335740072203e-07, |
| "loss": 0.0437, |
| "step": 5510 |
| }, |
| { |
| "epoch": 9.954873646209386, |
| "grad_norm": 0.5203515708243794, |
| "learning_rate": 9.386281588447654e-08, |
| "loss": 0.0439, |
| "step": 5515 |
| }, |
| { |
| "epoch": 9.963898916967509, |
| "grad_norm": 0.45178261726772573, |
| "learning_rate": 7.581227436823105e-08, |
| "loss": 0.0432, |
| "step": 5520 |
| }, |
| { |
| "epoch": 9.972924187725631, |
| "grad_norm": 0.4688905694517184, |
| "learning_rate": 5.776173285198556e-08, |
| "loss": 0.0434, |
| "step": 5525 |
| }, |
| { |
| "epoch": 9.981949458483754, |
| "grad_norm": 0.43677959065978206, |
| "learning_rate": 3.971119133574008e-08, |
| "loss": 0.0437, |
| "step": 5530 |
| }, |
| { |
| "epoch": 9.990974729241877, |
| "grad_norm": 0.49453568586672814, |
| "learning_rate": 2.1660649819494588e-08, |
| "loss": 0.0437, |
| "step": 5535 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.4332227379502704, |
| "learning_rate": 3.6101083032490975e-09, |
| "loss": 0.0433, |
| "step": 5540 |
| }, |
| { |
| "epoch": 10.0, |
| "step": 5540, |
| "total_flos": 613623860887552.0, |
| "train_loss": 0.19332283668838685, |
| "train_runtime": 35523.5631, |
| "train_samples_per_second": 3.743, |
| "train_steps_per_second": 0.156 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 5540, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 613623860887552.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|