| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 58914, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005092297899427116, | |
| "grad_norm": 3.7509827613830566, | |
| "learning_rate": 4.9500000000000004e-05, | |
| "loss": 3.2642, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.010184595798854232, | |
| "grad_norm": 3.509660005569458, | |
| "learning_rate": 4.991583636549121e-05, | |
| "loss": 3.014, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.015276893698281349, | |
| "grad_norm": 3.0620908737182617, | |
| "learning_rate": 4.983082259326011e-05, | |
| "loss": 2.901, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.020369191597708464, | |
| "grad_norm": 2.637498617172241, | |
| "learning_rate": 4.974580882102901e-05, | |
| "loss": 2.8888, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.025461489497135583, | |
| "grad_norm": 2.582336902618408, | |
| "learning_rate": 4.966079504879791e-05, | |
| "loss": 2.8137, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.030553787396562698, | |
| "grad_norm": 2.575382709503174, | |
| "learning_rate": 4.957578127656681e-05, | |
| "loss": 2.8131, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.03564608529598982, | |
| "grad_norm": 2.2707173824310303, | |
| "learning_rate": 4.94907675043357e-05, | |
| "loss": 2.7753, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.04073838319541693, | |
| "grad_norm": 2.1095917224884033, | |
| "learning_rate": 4.94057537321046e-05, | |
| "loss": 2.7512, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.04583068109484405, | |
| "grad_norm": 1.7593672275543213, | |
| "learning_rate": 4.93207399598735e-05, | |
| "loss": 2.7556, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.050922978994271166, | |
| "grad_norm": 2.2801873683929443, | |
| "learning_rate": 4.92357261876424e-05, | |
| "loss": 2.7417, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.056015276893698285, | |
| "grad_norm": 1.9631321430206299, | |
| "learning_rate": 4.91507124154113e-05, | |
| "loss": 2.7122, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.061107574793125397, | |
| "grad_norm": 1.6080312728881836, | |
| "learning_rate": 4.90656986431802e-05, | |
| "loss": 2.687, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.06619987269255251, | |
| "grad_norm": 2.1147282123565674, | |
| "learning_rate": 4.89806848709491e-05, | |
| "loss": 2.6617, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.07129217059197963, | |
| "grad_norm": 1.905120849609375, | |
| "learning_rate": 4.889567109871799e-05, | |
| "loss": 2.6655, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.07638446849140675, | |
| "grad_norm": 1.6756385564804077, | |
| "learning_rate": 4.881065732648689e-05, | |
| "loss": 2.6286, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.08147676639083386, | |
| "grad_norm": 1.8816139698028564, | |
| "learning_rate": 4.872564355425579e-05, | |
| "loss": 2.6414, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.08656906429026098, | |
| "grad_norm": 1.611456036567688, | |
| "learning_rate": 4.864062978202469e-05, | |
| "loss": 2.635, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.0916613621896881, | |
| "grad_norm": 1.8698660135269165, | |
| "learning_rate": 4.855561600979359e-05, | |
| "loss": 2.6683, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.0967536600891152, | |
| "grad_norm": 1.6007249355316162, | |
| "learning_rate": 4.847060223756249e-05, | |
| "loss": 2.633, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.10184595798854233, | |
| "grad_norm": 1.5520641803741455, | |
| "learning_rate": 4.838558846533139e-05, | |
| "loss": 2.5912, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.10693825588796944, | |
| "grad_norm": 1.522303819656372, | |
| "learning_rate": 4.8300574693100286e-05, | |
| "loss": 2.614, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.11203055378739657, | |
| "grad_norm": 1.752119541168213, | |
| "learning_rate": 4.8215560920869186e-05, | |
| "loss": 2.5984, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.11712285168682368, | |
| "grad_norm": 1.5005803108215332, | |
| "learning_rate": 4.8130547148638085e-05, | |
| "loss": 2.6231, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.12221514958625079, | |
| "grad_norm": 1.3557181358337402, | |
| "learning_rate": 4.8045533376406984e-05, | |
| "loss": 2.6189, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.1273074474856779, | |
| "grad_norm": 1.5560193061828613, | |
| "learning_rate": 4.796051960417588e-05, | |
| "loss": 2.5609, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.13239974538510502, | |
| "grad_norm": 1.4254344701766968, | |
| "learning_rate": 4.7875505831944776e-05, | |
| "loss": 2.5632, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.13749204328453216, | |
| "grad_norm": 1.366593599319458, | |
| "learning_rate": 4.7790492059713675e-05, | |
| "loss": 2.54, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.14258434118395927, | |
| "grad_norm": 1.2629475593566895, | |
| "learning_rate": 4.7705478287482575e-05, | |
| "loss": 2.5679, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.14767663908338638, | |
| "grad_norm": 1.234580159187317, | |
| "learning_rate": 4.7620464515251474e-05, | |
| "loss": 2.5671, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.1527689369828135, | |
| "grad_norm": 1.4017528295516968, | |
| "learning_rate": 4.7535450743020373e-05, | |
| "loss": 2.5859, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.1578612348822406, | |
| "grad_norm": 1.3464558124542236, | |
| "learning_rate": 4.7450436970789266e-05, | |
| "loss": 2.5289, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.16295353278166771, | |
| "grad_norm": 1.3121877908706665, | |
| "learning_rate": 4.7365423198558165e-05, | |
| "loss": 2.548, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.16804583068109485, | |
| "grad_norm": 1.2319351434707642, | |
| "learning_rate": 4.7280409426327065e-05, | |
| "loss": 2.5425, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.17313812858052197, | |
| "grad_norm": 1.243325114250183, | |
| "learning_rate": 4.7195395654095964e-05, | |
| "loss": 2.5798, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.17823042647994908, | |
| "grad_norm": 1.2152389287948608, | |
| "learning_rate": 4.711038188186486e-05, | |
| "loss": 2.5235, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.1833227243793762, | |
| "grad_norm": 1.2546372413635254, | |
| "learning_rate": 4.702536810963376e-05, | |
| "loss": 2.5451, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.1884150222788033, | |
| "grad_norm": 1.2566453218460083, | |
| "learning_rate": 4.694035433740266e-05, | |
| "loss": 2.5031, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.1935073201782304, | |
| "grad_norm": 1.4164502620697021, | |
| "learning_rate": 4.685534056517156e-05, | |
| "loss": 2.5002, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.19859961807765755, | |
| "grad_norm": 1.2575647830963135, | |
| "learning_rate": 4.677032679294046e-05, | |
| "loss": 2.5175, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.20369191597708466, | |
| "grad_norm": 1.2546263933181763, | |
| "learning_rate": 4.668531302070936e-05, | |
| "loss": 2.5374, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.20878421387651178, | |
| "grad_norm": 1.4746454954147339, | |
| "learning_rate": 4.660029924847826e-05, | |
| "loss": 2.5077, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.2138765117759389, | |
| "grad_norm": 1.3161815404891968, | |
| "learning_rate": 4.651528547624716e-05, | |
| "loss": 2.4939, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.218968809675366, | |
| "grad_norm": 1.2247682809829712, | |
| "learning_rate": 4.643027170401605e-05, | |
| "loss": 2.5047, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.22406110757479314, | |
| "grad_norm": 1.024702787399292, | |
| "learning_rate": 4.634525793178495e-05, | |
| "loss": 2.4986, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.22915340547422025, | |
| "grad_norm": 1.2271933555603027, | |
| "learning_rate": 4.626024415955385e-05, | |
| "loss": 2.4815, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.23424570337364736, | |
| "grad_norm": 1.1049838066101074, | |
| "learning_rate": 4.617523038732275e-05, | |
| "loss": 2.5055, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.23933800127307447, | |
| "grad_norm": 1.1865185499191284, | |
| "learning_rate": 4.609021661509165e-05, | |
| "loss": 2.4932, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.24443029917250159, | |
| "grad_norm": 1.2031099796295166, | |
| "learning_rate": 4.600520284286055e-05, | |
| "loss": 2.4857, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.2495225970719287, | |
| "grad_norm": 1.2100847959518433, | |
| "learning_rate": 4.592018907062944e-05, | |
| "loss": 2.4704, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.2546148949713558, | |
| "grad_norm": 1.306518793106079, | |
| "learning_rate": 4.583517529839834e-05, | |
| "loss": 2.4679, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.2597071928707829, | |
| "grad_norm": 1.3596395254135132, | |
| "learning_rate": 4.575016152616724e-05, | |
| "loss": 2.5029, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.26479949077021003, | |
| "grad_norm": 1.1463990211486816, | |
| "learning_rate": 4.566514775393614e-05, | |
| "loss": 2.4678, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.2698917886696372, | |
| "grad_norm": 1.4843939542770386, | |
| "learning_rate": 4.558013398170504e-05, | |
| "loss": 2.4549, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.2749840865690643, | |
| "grad_norm": 1.4119912385940552, | |
| "learning_rate": 4.549512020947394e-05, | |
| "loss": 2.4449, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.2800763844684914, | |
| "grad_norm": 1.1640745401382446, | |
| "learning_rate": 4.5410106437242836e-05, | |
| "loss": 2.4133, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.28516868236791854, | |
| "grad_norm": 1.2901395559310913, | |
| "learning_rate": 4.532509266501173e-05, | |
| "loss": 2.4493, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.29026098026734565, | |
| "grad_norm": 1.3150924444198608, | |
| "learning_rate": 4.5240078892780635e-05, | |
| "loss": 2.4616, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.29535327816677276, | |
| "grad_norm": 1.1391271352767944, | |
| "learning_rate": 4.5155065120549534e-05, | |
| "loss": 2.4491, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.30044557606619987, | |
| "grad_norm": 1.047142505645752, | |
| "learning_rate": 4.5070051348318434e-05, | |
| "loss": 2.4664, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.305537873965627, | |
| "grad_norm": 1.2513772249221802, | |
| "learning_rate": 4.498503757608733e-05, | |
| "loss": 2.4356, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.3106301718650541, | |
| "grad_norm": 1.2248339653015137, | |
| "learning_rate": 4.4900023803856225e-05, | |
| "loss": 2.458, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.3157224697644812, | |
| "grad_norm": 0.9861664772033691, | |
| "learning_rate": 4.4815010031625125e-05, | |
| "loss": 2.4494, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.3208147676639083, | |
| "grad_norm": 1.087272047996521, | |
| "learning_rate": 4.4729996259394024e-05, | |
| "loss": 2.4459, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.32590706556333543, | |
| "grad_norm": 1.0361382961273193, | |
| "learning_rate": 4.464498248716292e-05, | |
| "loss": 2.451, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.3309993634627626, | |
| "grad_norm": 1.0861406326293945, | |
| "learning_rate": 4.455996871493182e-05, | |
| "loss": 2.4426, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.3360916613621897, | |
| "grad_norm": 0.9402614235877991, | |
| "learning_rate": 4.447495494270072e-05, | |
| "loss": 2.4189, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.3411839592616168, | |
| "grad_norm": 0.9866734743118286, | |
| "learning_rate": 4.4389941170469615e-05, | |
| "loss": 2.4521, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.34627625716104393, | |
| "grad_norm": 1.0977962017059326, | |
| "learning_rate": 4.4304927398238514e-05, | |
| "loss": 2.4505, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.35136855506047104, | |
| "grad_norm": 1.1266326904296875, | |
| "learning_rate": 4.421991362600741e-05, | |
| "loss": 2.3999, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.35646085295989816, | |
| "grad_norm": 1.1100637912750244, | |
| "learning_rate": 4.413489985377631e-05, | |
| "loss": 2.4226, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.36155315085932527, | |
| "grad_norm": 1.1532678604125977, | |
| "learning_rate": 4.404988608154521e-05, | |
| "loss": 2.4048, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.3666454487587524, | |
| "grad_norm": 1.02146315574646, | |
| "learning_rate": 4.396487230931411e-05, | |
| "loss": 2.4177, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.3717377466581795, | |
| "grad_norm": 1.1943087577819824, | |
| "learning_rate": 4.387985853708301e-05, | |
| "loss": 2.4276, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.3768300445576066, | |
| "grad_norm": 1.118034839630127, | |
| "learning_rate": 4.37948447648519e-05, | |
| "loss": 2.3933, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.3819223424570337, | |
| "grad_norm": 1.0506726503372192, | |
| "learning_rate": 4.370983099262081e-05, | |
| "loss": 2.4162, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.3870146403564608, | |
| "grad_norm": 1.1072652339935303, | |
| "learning_rate": 4.362481722038971e-05, | |
| "loss": 2.4166, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.392106938255888, | |
| "grad_norm": 0.9805678129196167, | |
| "learning_rate": 4.353980344815861e-05, | |
| "loss": 2.3771, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.3971992361553151, | |
| "grad_norm": 1.0781447887420654, | |
| "learning_rate": 4.345478967592751e-05, | |
| "loss": 2.3971, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.4022915340547422, | |
| "grad_norm": 1.1752007007598877, | |
| "learning_rate": 4.33697759036964e-05, | |
| "loss": 2.3837, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.40738383195416933, | |
| "grad_norm": 1.0886644124984741, | |
| "learning_rate": 4.32847621314653e-05, | |
| "loss": 2.4372, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.41247612985359644, | |
| "grad_norm": 1.01775062084198, | |
| "learning_rate": 4.31997483592342e-05, | |
| "loss": 2.4051, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.41756842775302355, | |
| "grad_norm": 1.0455646514892578, | |
| "learning_rate": 4.31147345870031e-05, | |
| "loss": 2.366, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.42266072565245066, | |
| "grad_norm": 0.9850195646286011, | |
| "learning_rate": 4.3029720814772e-05, | |
| "loss": 2.3816, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.4277530235518778, | |
| "grad_norm": 1.092155933380127, | |
| "learning_rate": 4.2944707042540896e-05, | |
| "loss": 2.396, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.4328453214513049, | |
| "grad_norm": 1.008317232131958, | |
| "learning_rate": 4.285969327030979e-05, | |
| "loss": 2.3976, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.437937619350732, | |
| "grad_norm": 1.1001275777816772, | |
| "learning_rate": 4.277467949807869e-05, | |
| "loss": 2.4009, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.4430299172501591, | |
| "grad_norm": 0.9589524865150452, | |
| "learning_rate": 4.268966572584759e-05, | |
| "loss": 2.3755, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.4481222151495863, | |
| "grad_norm": 0.9529566168785095, | |
| "learning_rate": 4.260465195361649e-05, | |
| "loss": 2.3961, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.4532145130490134, | |
| "grad_norm": 1.0157649517059326, | |
| "learning_rate": 4.2519638181385386e-05, | |
| "loss": 2.3743, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.4583068109484405, | |
| "grad_norm": 1.0096311569213867, | |
| "learning_rate": 4.2434624409154286e-05, | |
| "loss": 2.3702, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.4633991088478676, | |
| "grad_norm": 1.0700254440307617, | |
| "learning_rate": 4.2349610636923185e-05, | |
| "loss": 2.3486, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.4684914067472947, | |
| "grad_norm": 0.9580355286598206, | |
| "learning_rate": 4.226459686469208e-05, | |
| "loss": 2.3686, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.47358370464672184, | |
| "grad_norm": 1.0027587413787842, | |
| "learning_rate": 4.217958309246098e-05, | |
| "loss": 2.4074, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.47867600254614895, | |
| "grad_norm": 0.9647036194801331, | |
| "learning_rate": 4.209456932022988e-05, | |
| "loss": 2.3631, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.48376830044557606, | |
| "grad_norm": 1.0718977451324463, | |
| "learning_rate": 4.200955554799878e-05, | |
| "loss": 2.3613, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.48886059834500317, | |
| "grad_norm": 1.1674007177352905, | |
| "learning_rate": 4.192454177576768e-05, | |
| "loss": 2.3604, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.4939528962444303, | |
| "grad_norm": 0.8964582681655884, | |
| "learning_rate": 4.1839528003536574e-05, | |
| "loss": 2.3517, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.4990451941438574, | |
| "grad_norm": 0.9950689673423767, | |
| "learning_rate": 4.175451423130547e-05, | |
| "loss": 2.3609, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.5041374920432845, | |
| "grad_norm": 1.0391299724578857, | |
| "learning_rate": 4.166950045907437e-05, | |
| "loss": 2.3764, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.5092297899427116, | |
| "grad_norm": 0.9937861561775208, | |
| "learning_rate": 4.158448668684327e-05, | |
| "loss": 2.3439, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.5143220878421387, | |
| "grad_norm": 0.9637438654899597, | |
| "learning_rate": 4.149947291461217e-05, | |
| "loss": 2.3599, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.5194143857415658, | |
| "grad_norm": 0.991791844367981, | |
| "learning_rate": 4.141445914238107e-05, | |
| "loss": 2.3688, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.524506683640993, | |
| "grad_norm": 1.1475801467895508, | |
| "learning_rate": 4.132944537014996e-05, | |
| "loss": 2.351, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.5295989815404201, | |
| "grad_norm": 1.018678069114685, | |
| "learning_rate": 4.124443159791886e-05, | |
| "loss": 2.3381, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.5346912794398472, | |
| "grad_norm": 1.0166884660720825, | |
| "learning_rate": 4.115941782568776e-05, | |
| "loss": 2.3393, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.5397835773392744, | |
| "grad_norm": 0.9590491652488708, | |
| "learning_rate": 4.107440405345666e-05, | |
| "loss": 2.3428, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.5448758752387015, | |
| "grad_norm": 1.0007227659225464, | |
| "learning_rate": 4.098939028122556e-05, | |
| "loss": 2.3388, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.5499681731381286, | |
| "grad_norm": 0.8273807764053345, | |
| "learning_rate": 4.090437650899446e-05, | |
| "loss": 2.3238, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.5550604710375557, | |
| "grad_norm": 0.9188222885131836, | |
| "learning_rate": 4.081936273676335e-05, | |
| "loss": 2.3171, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.5601527689369828, | |
| "grad_norm": 1.2066142559051514, | |
| "learning_rate": 4.073434896453225e-05, | |
| "loss": 2.385, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.56524506683641, | |
| "grad_norm": 1.0904101133346558, | |
| "learning_rate": 4.064933519230115e-05, | |
| "loss": 2.341, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.5703373647358371, | |
| "grad_norm": 1.0374412536621094, | |
| "learning_rate": 4.056432142007005e-05, | |
| "loss": 2.3398, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.5754296626352642, | |
| "grad_norm": 0.9854114055633545, | |
| "learning_rate": 4.0479307647838956e-05, | |
| "loss": 2.3512, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.5805219605346913, | |
| "grad_norm": 1.071382999420166, | |
| "learning_rate": 4.0394293875607856e-05, | |
| "loss": 2.3145, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.5856142584341184, | |
| "grad_norm": 0.9923407435417175, | |
| "learning_rate": 4.030928010337675e-05, | |
| "loss": 2.3475, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.5907065563335455, | |
| "grad_norm": 1.034600019454956, | |
| "learning_rate": 4.022426633114565e-05, | |
| "loss": 2.3196, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.5957988542329726, | |
| "grad_norm": 1.4072537422180176, | |
| "learning_rate": 4.013925255891455e-05, | |
| "loss": 2.3435, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.6008911521323997, | |
| "grad_norm": 1.0498465299606323, | |
| "learning_rate": 4.0054238786683446e-05, | |
| "loss": 2.3488, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.6059834500318269, | |
| "grad_norm": 0.9911717176437378, | |
| "learning_rate": 3.9969225014452346e-05, | |
| "loss": 2.3286, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.611075747931254, | |
| "grad_norm": 0.9431672692298889, | |
| "learning_rate": 3.9884211242221245e-05, | |
| "loss": 2.3502, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.6161680458306811, | |
| "grad_norm": 1.0439810752868652, | |
| "learning_rate": 3.979919746999014e-05, | |
| "loss": 2.3516, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.6212603437301082, | |
| "grad_norm": 0.8762308955192566, | |
| "learning_rate": 3.971418369775904e-05, | |
| "loss": 2.2836, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.6263526416295353, | |
| "grad_norm": 0.8706735372543335, | |
| "learning_rate": 3.9629169925527936e-05, | |
| "loss": 2.349, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.6314449395289624, | |
| "grad_norm": 0.9823511838912964, | |
| "learning_rate": 3.9544156153296836e-05, | |
| "loss": 2.3356, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.6365372374283895, | |
| "grad_norm": 0.939285933971405, | |
| "learning_rate": 3.9459142381065735e-05, | |
| "loss": 2.3435, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.6416295353278166, | |
| "grad_norm": 1.033011555671692, | |
| "learning_rate": 3.9374128608834634e-05, | |
| "loss": 2.3208, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.6467218332272437, | |
| "grad_norm": 0.9835578799247742, | |
| "learning_rate": 3.928911483660353e-05, | |
| "loss": 2.3332, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.6518141311266709, | |
| "grad_norm": 0.9082310795783997, | |
| "learning_rate": 3.9204101064372426e-05, | |
| "loss": 2.3216, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.6569064290260981, | |
| "grad_norm": 0.8588578701019287, | |
| "learning_rate": 3.9119087292141325e-05, | |
| "loss": 2.3114, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.6619987269255252, | |
| "grad_norm": 1.040531873703003, | |
| "learning_rate": 3.9034073519910225e-05, | |
| "loss": 2.3328, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.6670910248249523, | |
| "grad_norm": 1.0225043296813965, | |
| "learning_rate": 3.894905974767913e-05, | |
| "loss": 2.3245, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.6721833227243794, | |
| "grad_norm": 1.0172550678253174, | |
| "learning_rate": 3.886404597544803e-05, | |
| "loss": 2.3056, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.6772756206238065, | |
| "grad_norm": 0.9119499921798706, | |
| "learning_rate": 3.877903220321692e-05, | |
| "loss": 2.317, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.6823679185232336, | |
| "grad_norm": 0.8971495032310486, | |
| "learning_rate": 3.869401843098582e-05, | |
| "loss": 2.3292, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.6874602164226608, | |
| "grad_norm": 0.9643430709838867, | |
| "learning_rate": 3.860900465875472e-05, | |
| "loss": 2.3779, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.6925525143220879, | |
| "grad_norm": 0.919440507888794, | |
| "learning_rate": 3.852399088652362e-05, | |
| "loss": 2.2993, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.697644812221515, | |
| "grad_norm": 0.9949972033500671, | |
| "learning_rate": 3.843897711429252e-05, | |
| "loss": 2.3255, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.7027371101209421, | |
| "grad_norm": 0.9251271486282349, | |
| "learning_rate": 3.835396334206142e-05, | |
| "loss": 2.2997, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.7078294080203692, | |
| "grad_norm": 0.9567040205001831, | |
| "learning_rate": 3.826894956983031e-05, | |
| "loss": 2.3198, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.7129217059197963, | |
| "grad_norm": 1.1165566444396973, | |
| "learning_rate": 3.818393579759921e-05, | |
| "loss": 2.3074, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.7180140038192234, | |
| "grad_norm": 0.9649367928504944, | |
| "learning_rate": 3.809892202536811e-05, | |
| "loss": 2.2916, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.7231063017186505, | |
| "grad_norm": 0.8595756888389587, | |
| "learning_rate": 3.801390825313701e-05, | |
| "loss": 2.3386, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.7281985996180776, | |
| "grad_norm": 0.7877846360206604, | |
| "learning_rate": 3.792889448090591e-05, | |
| "loss": 2.2741, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.7332908975175048, | |
| "grad_norm": 0.9086227416992188, | |
| "learning_rate": 3.784388070867481e-05, | |
| "loss": 2.3186, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.7383831954169319, | |
| "grad_norm": 0.9466003179550171, | |
| "learning_rate": 3.77588669364437e-05, | |
| "loss": 2.2916, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.743475493316359, | |
| "grad_norm": 0.8069922924041748, | |
| "learning_rate": 3.76738531642126e-05, | |
| "loss": 2.3108, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.7485677912157861, | |
| "grad_norm": 1.0324113368988037, | |
| "learning_rate": 3.75888393919815e-05, | |
| "loss": 2.3066, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.7536600891152132, | |
| "grad_norm": 0.892573893070221, | |
| "learning_rate": 3.75038256197504e-05, | |
| "loss": 2.2738, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.7587523870146403, | |
| "grad_norm": 0.7999922037124634, | |
| "learning_rate": 3.74188118475193e-05, | |
| "loss": 2.3195, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.7638446849140674, | |
| "grad_norm": 1.004957914352417, | |
| "learning_rate": 3.73337980752882e-05, | |
| "loss": 2.2935, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.7689369828134945, | |
| "grad_norm": 1.046640157699585, | |
| "learning_rate": 3.72487843030571e-05, | |
| "loss": 2.3109, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.7740292807129217, | |
| "grad_norm": 0.9236047863960266, | |
| "learning_rate": 3.7163770530825996e-05, | |
| "loss": 2.3128, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.7791215786123489, | |
| "grad_norm": 1.0190492868423462, | |
| "learning_rate": 3.7078756758594896e-05, | |
| "loss": 2.3018, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.784213876511776, | |
| "grad_norm": 0.8099306225776672, | |
| "learning_rate": 3.6993742986363795e-05, | |
| "loss": 2.313, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.7893061744112031, | |
| "grad_norm": 0.9618342518806458, | |
| "learning_rate": 3.6908729214132694e-05, | |
| "loss": 2.2864, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.7943984723106302, | |
| "grad_norm": 1.046680212020874, | |
| "learning_rate": 3.6823715441901594e-05, | |
| "loss": 2.2853, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.7994907702100573, | |
| "grad_norm": 0.8486195206642151, | |
| "learning_rate": 3.6738701669670486e-05, | |
| "loss": 2.2854, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.8045830681094844, | |
| "grad_norm": 0.9708773493766785, | |
| "learning_rate": 3.6653687897439386e-05, | |
| "loss": 2.2928, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.8096753660089115, | |
| "grad_norm": 0.8969681262969971, | |
| "learning_rate": 3.6568674125208285e-05, | |
| "loss": 2.2976, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.8147676639083387, | |
| "grad_norm": 0.9385348558425903, | |
| "learning_rate": 3.6483660352977184e-05, | |
| "loss": 2.2847, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.8198599618077658, | |
| "grad_norm": 0.8899937272071838, | |
| "learning_rate": 3.6398646580746083e-05, | |
| "loss": 2.2972, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.8249522597071929, | |
| "grad_norm": 0.8900747299194336, | |
| "learning_rate": 3.631363280851498e-05, | |
| "loss": 2.2952, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.83004455760662, | |
| "grad_norm": 1.026571273803711, | |
| "learning_rate": 3.6228619036283875e-05, | |
| "loss": 2.2842, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.8351368555060471, | |
| "grad_norm": 0.9016963839530945, | |
| "learning_rate": 3.6143605264052775e-05, | |
| "loss": 2.288, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.8402291534054742, | |
| "grad_norm": 0.8101049065589905, | |
| "learning_rate": 3.6058591491821674e-05, | |
| "loss": 2.2486, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.8453214513049013, | |
| "grad_norm": 0.860748827457428, | |
| "learning_rate": 3.597357771959057e-05, | |
| "loss": 2.2911, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.8504137492043284, | |
| "grad_norm": 0.9295821189880371, | |
| "learning_rate": 3.588856394735947e-05, | |
| "loss": 2.2477, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.8555060471037556, | |
| "grad_norm": 0.9582170844078064, | |
| "learning_rate": 3.580355017512837e-05, | |
| "loss": 2.307, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.8605983450031827, | |
| "grad_norm": 0.9199303984642029, | |
| "learning_rate": 3.571853640289727e-05, | |
| "loss": 2.2692, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.8656906429026098, | |
| "grad_norm": 0.8835098743438721, | |
| "learning_rate": 3.563352263066617e-05, | |
| "loss": 2.2681, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.8707829408020369, | |
| "grad_norm": 0.9898850917816162, | |
| "learning_rate": 3.554850885843507e-05, | |
| "loss": 2.2718, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.875875238701464, | |
| "grad_norm": 1.0997586250305176, | |
| "learning_rate": 3.546349508620397e-05, | |
| "loss": 2.2577, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.8809675366008911, | |
| "grad_norm": 0.8374606370925903, | |
| "learning_rate": 3.537848131397287e-05, | |
| "loss": 2.2731, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.8860598345003182, | |
| "grad_norm": 0.9752559065818787, | |
| "learning_rate": 3.529346754174177e-05, | |
| "loss": 2.2776, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.8911521323997453, | |
| "grad_norm": 0.8918510675430298, | |
| "learning_rate": 3.520845376951066e-05, | |
| "loss": 2.2838, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.8962444302991726, | |
| "grad_norm": 0.9751953482627869, | |
| "learning_rate": 3.512343999727956e-05, | |
| "loss": 2.268, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.9013367281985997, | |
| "grad_norm": 0.9787586331367493, | |
| "learning_rate": 3.503842622504846e-05, | |
| "loss": 2.2927, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.9064290260980268, | |
| "grad_norm": 0.9199690222740173, | |
| "learning_rate": 3.495341245281736e-05, | |
| "loss": 2.2785, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.9115213239974539, | |
| "grad_norm": 0.8526634573936462, | |
| "learning_rate": 3.486839868058626e-05, | |
| "loss": 2.2818, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.916613621896881, | |
| "grad_norm": 0.9445266127586365, | |
| "learning_rate": 3.478338490835516e-05, | |
| "loss": 2.3147, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.9217059197963081, | |
| "grad_norm": 0.9607738256454468, | |
| "learning_rate": 3.469837113612405e-05, | |
| "loss": 2.2663, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.9267982176957352, | |
| "grad_norm": 0.8561920523643494, | |
| "learning_rate": 3.461335736389295e-05, | |
| "loss": 2.2355, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.9318905155951623, | |
| "grad_norm": 0.8668131828308105, | |
| "learning_rate": 3.452834359166185e-05, | |
| "loss": 2.2801, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.9369828134945895, | |
| "grad_norm": 0.9161975979804993, | |
| "learning_rate": 3.444332981943075e-05, | |
| "loss": 2.2668, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.9420751113940166, | |
| "grad_norm": 0.9021576046943665, | |
| "learning_rate": 3.435831604719965e-05, | |
| "loss": 2.2887, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.9471674092934437, | |
| "grad_norm": 0.8754701018333435, | |
| "learning_rate": 3.4273302274968546e-05, | |
| "loss": 2.2567, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.9522597071928708, | |
| "grad_norm": 0.9762224555015564, | |
| "learning_rate": 3.4188288502737446e-05, | |
| "loss": 2.2574, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.9573520050922979, | |
| "grad_norm": 0.8961549401283264, | |
| "learning_rate": 3.4103274730506345e-05, | |
| "loss": 2.252, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.962444302991725, | |
| "grad_norm": 0.8942741751670837, | |
| "learning_rate": 3.4018260958275244e-05, | |
| "loss": 2.3098, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.9675366008911521, | |
| "grad_norm": 0.8678953051567078, | |
| "learning_rate": 3.3933247186044144e-05, | |
| "loss": 2.2751, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.9726288987905792, | |
| "grad_norm": 0.9803009629249573, | |
| "learning_rate": 3.384823341381304e-05, | |
| "loss": 2.2329, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.9777211966900063, | |
| "grad_norm": 0.8548142313957214, | |
| "learning_rate": 3.376321964158194e-05, | |
| "loss": 2.2577, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.9828134945894335, | |
| "grad_norm": 0.8247301578521729, | |
| "learning_rate": 3.3678205869350835e-05, | |
| "loss": 2.2776, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.9879057924888606, | |
| "grad_norm": 0.8970145583152771, | |
| "learning_rate": 3.3593192097119734e-05, | |
| "loss": 2.2436, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.9929980903882877, | |
| "grad_norm": 0.9450452923774719, | |
| "learning_rate": 3.3508178324888633e-05, | |
| "loss": 2.274, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.9980903882877148, | |
| "grad_norm": 0.9455347061157227, | |
| "learning_rate": 3.342316455265753e-05, | |
| "loss": 2.2618, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 1.0031572246976448, | |
| "grad_norm": 0.9727960228919983, | |
| "learning_rate": 3.333815078042643e-05, | |
| "loss": 2.2148, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 1.008249522597072, | |
| "grad_norm": 1.0244638919830322, | |
| "learning_rate": 3.325313700819533e-05, | |
| "loss": 2.2209, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 1.013341820496499, | |
| "grad_norm": 1.002837061882019, | |
| "learning_rate": 3.3168123235964224e-05, | |
| "loss": 2.2011, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 1.0184341183959262, | |
| "grad_norm": 0.8974801898002625, | |
| "learning_rate": 3.308310946373312e-05, | |
| "loss": 2.2186, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.0235264162953532, | |
| "grad_norm": 1.0660030841827393, | |
| "learning_rate": 3.299809569150202e-05, | |
| "loss": 2.2368, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 1.0286187141947805, | |
| "grad_norm": 0.8874944448471069, | |
| "learning_rate": 3.291308191927092e-05, | |
| "loss": 2.2552, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 1.0337110120942075, | |
| "grad_norm": 0.9332163333892822, | |
| "learning_rate": 3.282806814703982e-05, | |
| "loss": 2.231, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 1.0388033099936347, | |
| "grad_norm": 0.8272064328193665, | |
| "learning_rate": 3.274305437480872e-05, | |
| "loss": 2.2287, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 1.0438956078930617, | |
| "grad_norm": 0.8333924412727356, | |
| "learning_rate": 3.265804060257761e-05, | |
| "loss": 2.2217, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.048987905792489, | |
| "grad_norm": 0.9589939117431641, | |
| "learning_rate": 3.257302683034652e-05, | |
| "loss": 2.2328, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 1.054080203691916, | |
| "grad_norm": 0.8918903470039368, | |
| "learning_rate": 3.248801305811542e-05, | |
| "loss": 2.2169, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 1.0591725015913431, | |
| "grad_norm": 0.9166114926338196, | |
| "learning_rate": 3.240299928588432e-05, | |
| "loss": 2.2605, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 1.0642647994907701, | |
| "grad_norm": 0.8604680895805359, | |
| "learning_rate": 3.231798551365322e-05, | |
| "loss": 2.2591, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 1.0693570973901974, | |
| "grad_norm": 0.82822185754776, | |
| "learning_rate": 3.2232971741422117e-05, | |
| "loss": 2.2075, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.0744493952896244, | |
| "grad_norm": 0.8195912837982178, | |
| "learning_rate": 3.214795796919101e-05, | |
| "loss": 2.2054, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 1.0795416931890516, | |
| "grad_norm": 0.9587050080299377, | |
| "learning_rate": 3.206294419695991e-05, | |
| "loss": 2.2558, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 1.0846339910884786, | |
| "grad_norm": 0.9604052901268005, | |
| "learning_rate": 3.197793042472881e-05, | |
| "loss": 2.2023, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 1.0897262889879058, | |
| "grad_norm": 0.9480250477790833, | |
| "learning_rate": 3.189291665249771e-05, | |
| "loss": 2.2168, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 1.094818586887333, | |
| "grad_norm": 0.8999929428100586, | |
| "learning_rate": 3.1807902880266606e-05, | |
| "loss": 2.2089, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.09991088478676, | |
| "grad_norm": 0.9180619716644287, | |
| "learning_rate": 3.1722889108035506e-05, | |
| "loss": 2.2092, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 1.105003182686187, | |
| "grad_norm": 0.8434627056121826, | |
| "learning_rate": 3.16378753358044e-05, | |
| "loss": 2.2179, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 1.1100954805856142, | |
| "grad_norm": 0.8810749053955078, | |
| "learning_rate": 3.15528615635733e-05, | |
| "loss": 2.1857, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 1.1151877784850415, | |
| "grad_norm": 0.9257334470748901, | |
| "learning_rate": 3.14678477913422e-05, | |
| "loss": 2.2205, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 1.1202800763844685, | |
| "grad_norm": 0.8661274313926697, | |
| "learning_rate": 3.1382834019111096e-05, | |
| "loss": 2.1995, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.1253723742838957, | |
| "grad_norm": 0.8728938698768616, | |
| "learning_rate": 3.1297820246879996e-05, | |
| "loss": 2.2125, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 1.1304646721833227, | |
| "grad_norm": 0.9176629185676575, | |
| "learning_rate": 3.1212806474648895e-05, | |
| "loss": 2.1908, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 1.13555697008275, | |
| "grad_norm": 0.9520237445831299, | |
| "learning_rate": 3.112779270241779e-05, | |
| "loss": 2.2345, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 1.140649267982177, | |
| "grad_norm": 0.8356249928474426, | |
| "learning_rate": 3.1042778930186694e-05, | |
| "loss": 2.2452, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 1.1457415658816041, | |
| "grad_norm": 1.0978131294250488, | |
| "learning_rate": 3.095776515795559e-05, | |
| "loss": 2.1776, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.1508338637810311, | |
| "grad_norm": 1.1184298992156982, | |
| "learning_rate": 3.087275138572449e-05, | |
| "loss": 2.2174, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 1.1559261616804584, | |
| "grad_norm": 0.9109058380126953, | |
| "learning_rate": 3.078773761349339e-05, | |
| "loss": 2.2168, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 1.1610184595798854, | |
| "grad_norm": 0.8274030089378357, | |
| "learning_rate": 3.0702723841262284e-05, | |
| "loss": 2.224, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 1.1661107574793126, | |
| "grad_norm": 0.8593317270278931, | |
| "learning_rate": 3.0617710069031183e-05, | |
| "loss": 2.2653, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 1.1712030553787396, | |
| "grad_norm": 1.1305369138717651, | |
| "learning_rate": 3.053269629680008e-05, | |
| "loss": 2.241, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.1762953532781668, | |
| "grad_norm": 1.0249735116958618, | |
| "learning_rate": 3.0447682524568982e-05, | |
| "loss": 2.2044, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 1.1813876511775938, | |
| "grad_norm": 0.762690007686615, | |
| "learning_rate": 3.036266875233788e-05, | |
| "loss": 2.2057, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 1.186479949077021, | |
| "grad_norm": 0.7995686531066895, | |
| "learning_rate": 3.0277654980106777e-05, | |
| "loss": 2.2435, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 1.191572246976448, | |
| "grad_norm": 1.0537996292114258, | |
| "learning_rate": 3.0192641207875677e-05, | |
| "loss": 2.2155, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 1.1966645448758753, | |
| "grad_norm": 0.8992569446563721, | |
| "learning_rate": 3.0107627435644576e-05, | |
| "loss": 2.217, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.2017568427753023, | |
| "grad_norm": 0.9041591286659241, | |
| "learning_rate": 3.0022613663413472e-05, | |
| "loss": 2.2277, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 1.2068491406747295, | |
| "grad_norm": 0.9437869787216187, | |
| "learning_rate": 2.993759989118237e-05, | |
| "loss": 2.2151, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 1.2119414385741565, | |
| "grad_norm": 0.7999377846717834, | |
| "learning_rate": 2.985258611895127e-05, | |
| "loss": 2.2103, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 1.2170337364735837, | |
| "grad_norm": 0.932995080947876, | |
| "learning_rate": 2.976757234672017e-05, | |
| "loss": 2.1964, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 1.222126034373011, | |
| "grad_norm": 0.846868097782135, | |
| "learning_rate": 2.9682558574489066e-05, | |
| "loss": 2.1821, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.227218332272438, | |
| "grad_norm": 0.889284610748291, | |
| "learning_rate": 2.9597544802257965e-05, | |
| "loss": 2.2227, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 1.2323106301718652, | |
| "grad_norm": 0.9376260042190552, | |
| "learning_rate": 2.9512531030026865e-05, | |
| "loss": 2.226, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 1.2374029280712922, | |
| "grad_norm": 0.8779696226119995, | |
| "learning_rate": 2.9427517257795767e-05, | |
| "loss": 2.2086, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 1.2424952259707194, | |
| "grad_norm": 0.9524549841880798, | |
| "learning_rate": 2.9342503485564667e-05, | |
| "loss": 2.2026, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 1.2475875238701464, | |
| "grad_norm": 0.919808030128479, | |
| "learning_rate": 2.9257489713333563e-05, | |
| "loss": 2.192, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 1.2526798217695736, | |
| "grad_norm": 1.0228092670440674, | |
| "learning_rate": 2.9172475941102462e-05, | |
| "loss": 2.2241, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 1.2577721196690006, | |
| "grad_norm": 0.8363624811172485, | |
| "learning_rate": 2.908746216887136e-05, | |
| "loss": 2.1808, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 1.2628644175684278, | |
| "grad_norm": 0.8711551427841187, | |
| "learning_rate": 2.9002448396640257e-05, | |
| "loss": 2.2093, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 1.2679567154678548, | |
| "grad_norm": 0.9497014284133911, | |
| "learning_rate": 2.8917434624409156e-05, | |
| "loss": 2.1856, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 1.273049013367282, | |
| "grad_norm": 0.9282352924346924, | |
| "learning_rate": 2.8832420852178056e-05, | |
| "loss": 2.1787, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.278141311266709, | |
| "grad_norm": 0.9017792344093323, | |
| "learning_rate": 2.8747407079946952e-05, | |
| "loss": 2.2054, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 1.2832336091661363, | |
| "grad_norm": 0.9470519423484802, | |
| "learning_rate": 2.866239330771585e-05, | |
| "loss": 2.1885, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 1.2883259070655633, | |
| "grad_norm": 0.991397500038147, | |
| "learning_rate": 2.857737953548475e-05, | |
| "loss": 2.1875, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 1.2934182049649905, | |
| "grad_norm": 0.920644223690033, | |
| "learning_rate": 2.8492365763253646e-05, | |
| "loss": 2.2418, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 1.2985105028644175, | |
| "grad_norm": 0.8312422037124634, | |
| "learning_rate": 2.8407351991022546e-05, | |
| "loss": 2.1635, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 1.3036028007638447, | |
| "grad_norm": 0.9457144737243652, | |
| "learning_rate": 2.8322338218791445e-05, | |
| "loss": 2.1945, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 1.308695098663272, | |
| "grad_norm": 0.8914629220962524, | |
| "learning_rate": 2.8237324446560344e-05, | |
| "loss": 2.2092, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 1.313787396562699, | |
| "grad_norm": 0.9140703082084656, | |
| "learning_rate": 2.815231067432924e-05, | |
| "loss": 2.2162, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 1.318879694462126, | |
| "grad_norm": 0.926543116569519, | |
| "learning_rate": 2.806729690209814e-05, | |
| "loss": 2.1906, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 1.3239719923615532, | |
| "grad_norm": 0.888692319393158, | |
| "learning_rate": 2.798228312986704e-05, | |
| "loss": 2.1866, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.3290642902609804, | |
| "grad_norm": 0.7925876379013062, | |
| "learning_rate": 2.7897269357635935e-05, | |
| "loss": 2.1988, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 1.3341565881604074, | |
| "grad_norm": 0.8814985752105713, | |
| "learning_rate": 2.781225558540484e-05, | |
| "loss": 2.2072, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 1.3392488860598344, | |
| "grad_norm": 0.8415858745574951, | |
| "learning_rate": 2.7727241813173737e-05, | |
| "loss": 2.2227, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 1.3443411839592616, | |
| "grad_norm": 0.9423860907554626, | |
| "learning_rate": 2.7642228040942636e-05, | |
| "loss": 2.2426, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 1.3494334818586888, | |
| "grad_norm": 0.8816553950309753, | |
| "learning_rate": 2.7557214268711535e-05, | |
| "loss": 2.206, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 1.3545257797581158, | |
| "grad_norm": 0.8283177018165588, | |
| "learning_rate": 2.747220049648043e-05, | |
| "loss": 2.1859, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 1.3596180776575428, | |
| "grad_norm": 0.8860555291175842, | |
| "learning_rate": 2.738718672424933e-05, | |
| "loss": 2.178, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 1.36471037555697, | |
| "grad_norm": 0.8853309154510498, | |
| "learning_rate": 2.730217295201823e-05, | |
| "loss": 2.1844, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 1.3698026734563973, | |
| "grad_norm": 0.9043028950691223, | |
| "learning_rate": 2.7217159179787126e-05, | |
| "loss": 2.2105, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 1.3748949713558243, | |
| "grad_norm": 0.8943936824798584, | |
| "learning_rate": 2.7132145407556025e-05, | |
| "loss": 2.1814, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.3799872692552515, | |
| "grad_norm": 0.7901210188865662, | |
| "learning_rate": 2.7047131635324925e-05, | |
| "loss": 2.1819, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 1.3850795671546785, | |
| "grad_norm": 0.9602735638618469, | |
| "learning_rate": 2.696211786309382e-05, | |
| "loss": 2.2121, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 1.3901718650541057, | |
| "grad_norm": 0.8327048420906067, | |
| "learning_rate": 2.687710409086272e-05, | |
| "loss": 2.2128, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 1.3952641629535327, | |
| "grad_norm": 0.8546739220619202, | |
| "learning_rate": 2.679209031863162e-05, | |
| "loss": 2.2035, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 1.40035646085296, | |
| "grad_norm": 1.585236668586731, | |
| "learning_rate": 2.6707076546400515e-05, | |
| "loss": 2.1845, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 1.405448758752387, | |
| "grad_norm": 0.9497547745704651, | |
| "learning_rate": 2.6622062774169415e-05, | |
| "loss": 2.1886, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 1.4105410566518142, | |
| "grad_norm": 0.8747720718383789, | |
| "learning_rate": 2.6537049001938314e-05, | |
| "loss": 2.1735, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 1.4156333545512412, | |
| "grad_norm": 0.9204273223876953, | |
| "learning_rate": 2.6452035229707213e-05, | |
| "loss": 2.2153, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 1.4207256524506684, | |
| "grad_norm": 0.868325412273407, | |
| "learning_rate": 2.636702145747611e-05, | |
| "loss": 2.209, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 1.4258179503500954, | |
| "grad_norm": 0.9367715716362, | |
| "learning_rate": 2.6282007685245015e-05, | |
| "loss": 2.1868, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.4309102482495226, | |
| "grad_norm": 0.9658358693122864, | |
| "learning_rate": 2.619699391301391e-05, | |
| "loss": 2.1757, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 1.4360025461489498, | |
| "grad_norm": 0.8091734051704407, | |
| "learning_rate": 2.611198014078281e-05, | |
| "loss": 2.1878, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 1.4410948440483768, | |
| "grad_norm": 0.8200072050094604, | |
| "learning_rate": 2.602696636855171e-05, | |
| "loss": 2.192, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 1.4461871419478038, | |
| "grad_norm": 0.9280868768692017, | |
| "learning_rate": 2.5941952596320606e-05, | |
| "loss": 2.1829, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 1.451279439847231, | |
| "grad_norm": 0.9731032252311707, | |
| "learning_rate": 2.5856938824089505e-05, | |
| "loss": 2.156, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 1.4563717377466583, | |
| "grad_norm": 0.8023040294647217, | |
| "learning_rate": 2.5771925051858404e-05, | |
| "loss": 2.1913, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 1.4614640356460853, | |
| "grad_norm": 1.003476619720459, | |
| "learning_rate": 2.56869112796273e-05, | |
| "loss": 2.1537, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 1.4665563335455123, | |
| "grad_norm": 1.0280425548553467, | |
| "learning_rate": 2.56018975073962e-05, | |
| "loss": 2.2106, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 1.4716486314449395, | |
| "grad_norm": 0.9685016870498657, | |
| "learning_rate": 2.55168837351651e-05, | |
| "loss": 2.1758, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 1.4767409293443667, | |
| "grad_norm": 0.8572561144828796, | |
| "learning_rate": 2.5431869962933995e-05, | |
| "loss": 2.1647, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.4818332272437937, | |
| "grad_norm": 0.8688543438911438, | |
| "learning_rate": 2.5346856190702894e-05, | |
| "loss": 2.1973, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 1.486925525143221, | |
| "grad_norm": 1.0197324752807617, | |
| "learning_rate": 2.5261842418471794e-05, | |
| "loss": 2.1649, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 1.492017823042648, | |
| "grad_norm": 0.8760496377944946, | |
| "learning_rate": 2.517682864624069e-05, | |
| "loss": 2.2024, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 1.4971101209420752, | |
| "grad_norm": 0.9327671527862549, | |
| "learning_rate": 2.509181487400959e-05, | |
| "loss": 2.2006, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 1.5022024188415022, | |
| "grad_norm": 0.9184695482254028, | |
| "learning_rate": 2.5006801101778488e-05, | |
| "loss": 2.1616, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 1.5072947167409292, | |
| "grad_norm": 0.8531858325004578, | |
| "learning_rate": 2.4921787329547387e-05, | |
| "loss": 2.1688, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 1.5123870146403564, | |
| "grad_norm": 0.8902334570884705, | |
| "learning_rate": 2.4836773557316287e-05, | |
| "loss": 2.1692, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 1.5174793125397836, | |
| "grad_norm": 0.8231461644172668, | |
| "learning_rate": 2.4751759785085186e-05, | |
| "loss": 2.1855, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 1.5225716104392109, | |
| "grad_norm": 0.9362125396728516, | |
| "learning_rate": 2.4666746012854082e-05, | |
| "loss": 2.1798, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 1.5276639083386379, | |
| "grad_norm": 0.8145864009857178, | |
| "learning_rate": 2.458173224062298e-05, | |
| "loss": 2.1655, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.5327562062380649, | |
| "grad_norm": 0.9912553429603577, | |
| "learning_rate": 2.449671846839188e-05, | |
| "loss": 2.2025, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 1.537848504137492, | |
| "grad_norm": 0.818953275680542, | |
| "learning_rate": 2.4411704696160777e-05, | |
| "loss": 2.1845, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 1.5429408020369193, | |
| "grad_norm": 0.845649778842926, | |
| "learning_rate": 2.4326690923929676e-05, | |
| "loss": 2.199, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 1.5480330999363463, | |
| "grad_norm": 1.0135074853897095, | |
| "learning_rate": 2.424167715169858e-05, | |
| "loss": 2.1912, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 1.5531253978357733, | |
| "grad_norm": 0.9612752199172974, | |
| "learning_rate": 2.4156663379467475e-05, | |
| "loss": 2.159, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 1.5582176957352005, | |
| "grad_norm": 0.8450791239738464, | |
| "learning_rate": 2.4071649607236374e-05, | |
| "loss": 2.1615, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 1.5633099936346277, | |
| "grad_norm": 0.9979317784309387, | |
| "learning_rate": 2.3986635835005273e-05, | |
| "loss": 2.1713, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 1.5684022915340547, | |
| "grad_norm": 0.904403567314148, | |
| "learning_rate": 2.390162206277417e-05, | |
| "loss": 2.2114, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 1.5734945894334817, | |
| "grad_norm": 0.8977887630462646, | |
| "learning_rate": 2.381660829054307e-05, | |
| "loss": 2.1867, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 1.578586887332909, | |
| "grad_norm": 0.9076321125030518, | |
| "learning_rate": 2.3731594518311968e-05, | |
| "loss": 2.167, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.5836791852323362, | |
| "grad_norm": 0.9048725962638855, | |
| "learning_rate": 2.3646580746080864e-05, | |
| "loss": 2.1645, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 1.5887714831317632, | |
| "grad_norm": 0.9547775387763977, | |
| "learning_rate": 2.3561566973849763e-05, | |
| "loss": 2.1849, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 1.5938637810311902, | |
| "grad_norm": 0.7886509299278259, | |
| "learning_rate": 2.3476553201618666e-05, | |
| "loss": 2.187, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 1.5989560789306174, | |
| "grad_norm": 0.8473970293998718, | |
| "learning_rate": 2.3391539429387562e-05, | |
| "loss": 2.1722, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 1.6040483768300446, | |
| "grad_norm": 0.8617937564849854, | |
| "learning_rate": 2.330652565715646e-05, | |
| "loss": 2.2002, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 1.6091406747294716, | |
| "grad_norm": 0.9672524333000183, | |
| "learning_rate": 2.322151188492536e-05, | |
| "loss": 2.1623, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 1.6142329726288986, | |
| "grad_norm": 0.8769922852516174, | |
| "learning_rate": 2.3136498112694256e-05, | |
| "loss": 2.1695, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 1.6193252705283259, | |
| "grad_norm": 0.8249488472938538, | |
| "learning_rate": 2.3051484340463156e-05, | |
| "loss": 2.1647, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 1.624417568427753, | |
| "grad_norm": 0.9503587484359741, | |
| "learning_rate": 2.2966470568232055e-05, | |
| "loss": 2.2024, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 1.62950986632718, | |
| "grad_norm": 0.9500870108604431, | |
| "learning_rate": 2.288145679600095e-05, | |
| "loss": 2.1467, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.634602164226607, | |
| "grad_norm": 0.888297975063324, | |
| "learning_rate": 2.279644302376985e-05, | |
| "loss": 2.1586, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 1.6396944621260343, | |
| "grad_norm": 0.8958535194396973, | |
| "learning_rate": 2.2711429251538753e-05, | |
| "loss": 2.1923, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 1.6447867600254615, | |
| "grad_norm": 0.7949930429458618, | |
| "learning_rate": 2.262641547930765e-05, | |
| "loss": 2.1925, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 1.6498790579248888, | |
| "grad_norm": 0.8516358733177185, | |
| "learning_rate": 2.2541401707076548e-05, | |
| "loss": 2.1818, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 1.6549713558243158, | |
| "grad_norm": 0.9597014784812927, | |
| "learning_rate": 2.2456387934845448e-05, | |
| "loss": 2.1412, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.6600636537237428, | |
| "grad_norm": 0.8643897771835327, | |
| "learning_rate": 2.2371374162614344e-05, | |
| "loss": 2.1645, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 1.66515595162317, | |
| "grad_norm": 1.069393515586853, | |
| "learning_rate": 2.2286360390383243e-05, | |
| "loss": 2.1468, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 1.6702482495225972, | |
| "grad_norm": 0.8896872401237488, | |
| "learning_rate": 2.2201346618152142e-05, | |
| "loss": 2.1732, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 1.6753405474220242, | |
| "grad_norm": 0.8662711381912231, | |
| "learning_rate": 2.2116332845921038e-05, | |
| "loss": 2.1901, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 1.6804328453214512, | |
| "grad_norm": 0.7606475353240967, | |
| "learning_rate": 2.2031319073689937e-05, | |
| "loss": 2.2045, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 1.6855251432208784, | |
| "grad_norm": 0.9675360918045044, | |
| "learning_rate": 2.1946305301458837e-05, | |
| "loss": 2.1782, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 1.6906174411203057, | |
| "grad_norm": 0.8184406757354736, | |
| "learning_rate": 2.1861291529227736e-05, | |
| "loss": 2.1827, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 1.6957097390197327, | |
| "grad_norm": 0.8774561882019043, | |
| "learning_rate": 2.1776277756996635e-05, | |
| "loss": 2.1592, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 1.7008020369191597, | |
| "grad_norm": 0.8667624592781067, | |
| "learning_rate": 2.1691263984765535e-05, | |
| "loss": 2.1779, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 1.7058943348185869, | |
| "grad_norm": 0.9804625511169434, | |
| "learning_rate": 2.160625021253443e-05, | |
| "loss": 2.1985, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 1.710986632718014, | |
| "grad_norm": 0.9614706039428711, | |
| "learning_rate": 2.152123644030333e-05, | |
| "loss": 2.1687, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 1.716078930617441, | |
| "grad_norm": 0.8270972967147827, | |
| "learning_rate": 2.143622266807223e-05, | |
| "loss": 2.1539, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 1.721171228516868, | |
| "grad_norm": 0.9252774119377136, | |
| "learning_rate": 2.1351208895841125e-05, | |
| "loss": 2.16, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 1.7262635264162953, | |
| "grad_norm": 0.855818510055542, | |
| "learning_rate": 2.1266195123610025e-05, | |
| "loss": 2.1928, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 1.7313558243157225, | |
| "grad_norm": 0.8505380153656006, | |
| "learning_rate": 2.1181181351378924e-05, | |
| "loss": 2.1748, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.7364481222151495, | |
| "grad_norm": 0.8876926898956299, | |
| "learning_rate": 2.1096167579147823e-05, | |
| "loss": 2.2102, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 1.7415404201145765, | |
| "grad_norm": 0.8772891163825989, | |
| "learning_rate": 2.1011153806916723e-05, | |
| "loss": 2.1691, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 1.7466327180140038, | |
| "grad_norm": 0.9799501299858093, | |
| "learning_rate": 2.0926140034685622e-05, | |
| "loss": 2.1858, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 1.751725015913431, | |
| "grad_norm": 0.8863718509674072, | |
| "learning_rate": 2.0841126262454518e-05, | |
| "loss": 2.1745, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 1.7568173138128582, | |
| "grad_norm": 0.8394114375114441, | |
| "learning_rate": 2.0756112490223417e-05, | |
| "loss": 2.1629, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 1.7619096117122852, | |
| "grad_norm": 0.8472095727920532, | |
| "learning_rate": 2.0671098717992317e-05, | |
| "loss": 2.1665, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 1.7670019096117122, | |
| "grad_norm": 0.9460027813911438, | |
| "learning_rate": 2.0586084945761212e-05, | |
| "loss": 2.1458, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 1.7720942075111394, | |
| "grad_norm": 0.9211781620979309, | |
| "learning_rate": 2.0501071173530112e-05, | |
| "loss": 2.1922, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 1.7771865054105667, | |
| "grad_norm": 0.9996361136436462, | |
| "learning_rate": 2.041605740129901e-05, | |
| "loss": 2.1447, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 1.7822788033099937, | |
| "grad_norm": 0.8266726136207581, | |
| "learning_rate": 2.033104362906791e-05, | |
| "loss": 2.1881, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.7873711012094207, | |
| "grad_norm": 0.8855674862861633, | |
| "learning_rate": 2.024602985683681e-05, | |
| "loss": 2.198, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 1.7924633991088479, | |
| "grad_norm": 0.9789201021194458, | |
| "learning_rate": 2.016101608460571e-05, | |
| "loss": 2.1685, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 1.797555697008275, | |
| "grad_norm": 0.8354413509368896, | |
| "learning_rate": 2.0076002312374605e-05, | |
| "loss": 2.1535, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 1.8026479949077021, | |
| "grad_norm": 0.9418453574180603, | |
| "learning_rate": 1.9990988540143504e-05, | |
| "loss": 2.1671, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 1.8077402928071291, | |
| "grad_norm": 0.9462503790855408, | |
| "learning_rate": 1.9905974767912404e-05, | |
| "loss": 2.1339, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 1.8128325907065563, | |
| "grad_norm": 0.8490837216377258, | |
| "learning_rate": 1.98209609956813e-05, | |
| "loss": 2.1528, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 1.8179248886059836, | |
| "grad_norm": 0.9105218052864075, | |
| "learning_rate": 1.97359472234502e-05, | |
| "loss": 2.1717, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 1.8230171865054106, | |
| "grad_norm": 0.9058020710945129, | |
| "learning_rate": 1.9650933451219098e-05, | |
| "loss": 2.1535, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 1.8281094844048376, | |
| "grad_norm": 0.9724037647247314, | |
| "learning_rate": 1.9565919678987994e-05, | |
| "loss": 2.166, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 1.8332017823042648, | |
| "grad_norm": 0.9018999338150024, | |
| "learning_rate": 1.9480905906756897e-05, | |
| "loss": 2.1528, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.838294080203692, | |
| "grad_norm": 0.9223784804344177, | |
| "learning_rate": 1.9395892134525796e-05, | |
| "loss": 2.1982, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 1.843386378103119, | |
| "grad_norm": 0.8883550763130188, | |
| "learning_rate": 1.9310878362294692e-05, | |
| "loss": 2.1701, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 1.848478676002546, | |
| "grad_norm": 0.8294488787651062, | |
| "learning_rate": 1.922586459006359e-05, | |
| "loss": 2.2064, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 1.8535709739019732, | |
| "grad_norm": 0.8737560510635376, | |
| "learning_rate": 1.914085081783249e-05, | |
| "loss": 2.1529, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 1.8586632718014005, | |
| "grad_norm": 0.8156319260597229, | |
| "learning_rate": 1.9055837045601387e-05, | |
| "loss": 2.1628, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 1.8637555697008275, | |
| "grad_norm": 0.8669657111167908, | |
| "learning_rate": 1.8970823273370286e-05, | |
| "loss": 2.2155, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 1.8688478676002545, | |
| "grad_norm": 0.8657876253128052, | |
| "learning_rate": 1.8885809501139185e-05, | |
| "loss": 2.1506, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 1.8739401654996817, | |
| "grad_norm": 0.8771129250526428, | |
| "learning_rate": 1.880079572890808e-05, | |
| "loss": 2.1797, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 1.879032463399109, | |
| "grad_norm": 0.8845404982566833, | |
| "learning_rate": 1.8715781956676984e-05, | |
| "loss": 2.1434, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 1.8841247612985361, | |
| "grad_norm": 0.9354609251022339, | |
| "learning_rate": 1.8630768184445883e-05, | |
| "loss": 2.1701, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.8892170591979631, | |
| "grad_norm": 0.7781304717063904, | |
| "learning_rate": 1.854575441221478e-05, | |
| "loss": 2.2095, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 1.8943093570973901, | |
| "grad_norm": 0.9069561958312988, | |
| "learning_rate": 1.846074063998368e-05, | |
| "loss": 2.169, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 1.8994016549968173, | |
| "grad_norm": 0.9173194766044617, | |
| "learning_rate": 1.8375726867752578e-05, | |
| "loss": 2.121, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 1.9044939528962446, | |
| "grad_norm": 0.864583432674408, | |
| "learning_rate": 1.8290713095521474e-05, | |
| "loss": 2.1711, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 1.9095862507956716, | |
| "grad_norm": 0.7620731592178345, | |
| "learning_rate": 1.8205699323290373e-05, | |
| "loss": 2.1967, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 1.9146785486950986, | |
| "grad_norm": 0.7830232977867126, | |
| "learning_rate": 1.8120685551059273e-05, | |
| "loss": 2.1574, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 1.9197708465945258, | |
| "grad_norm": 0.8825329542160034, | |
| "learning_rate": 1.803567177882817e-05, | |
| "loss": 2.1432, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 1.924863144493953, | |
| "grad_norm": 0.9680500030517578, | |
| "learning_rate": 1.795065800659707e-05, | |
| "loss": 2.1808, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 1.92995544239338, | |
| "grad_norm": 0.9914782047271729, | |
| "learning_rate": 1.786564423436597e-05, | |
| "loss": 2.1945, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 1.935047740292807, | |
| "grad_norm": 0.882604718208313, | |
| "learning_rate": 1.7780630462134867e-05, | |
| "loss": 2.15, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.9401400381922342, | |
| "grad_norm": 0.8211714625358582, | |
| "learning_rate": 1.7695616689903766e-05, | |
| "loss": 2.178, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 1.9452323360916615, | |
| "grad_norm": 0.9662156701087952, | |
| "learning_rate": 1.7610602917672665e-05, | |
| "loss": 2.164, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 1.9503246339910885, | |
| "grad_norm": 0.8627343773841858, | |
| "learning_rate": 1.752558914544156e-05, | |
| "loss": 2.1977, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 1.9554169318905155, | |
| "grad_norm": 0.8883799910545349, | |
| "learning_rate": 1.744057537321046e-05, | |
| "loss": 2.1813, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 1.9605092297899427, | |
| "grad_norm": 0.9309747219085693, | |
| "learning_rate": 1.735556160097936e-05, | |
| "loss": 2.1686, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 1.96560152768937, | |
| "grad_norm": 0.9126595854759216, | |
| "learning_rate": 1.7270547828748256e-05, | |
| "loss": 2.1571, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 1.970693825588797, | |
| "grad_norm": 0.9490466117858887, | |
| "learning_rate": 1.718553405651716e-05, | |
| "loss": 2.1589, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 1.975786123488224, | |
| "grad_norm": 0.8641236424446106, | |
| "learning_rate": 1.7100520284286058e-05, | |
| "loss": 2.1728, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 1.9808784213876511, | |
| "grad_norm": 1.040710210800171, | |
| "learning_rate": 1.7015506512054954e-05, | |
| "loss": 2.1888, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 1.9859707192870784, | |
| "grad_norm": 0.8207067251205444, | |
| "learning_rate": 1.6930492739823853e-05, | |
| "loss": 2.1504, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 1.9910630171865054, | |
| "grad_norm": 0.8451477289199829, | |
| "learning_rate": 1.6845478967592752e-05, | |
| "loss": 2.1791, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 1.9961553150859326, | |
| "grad_norm": 0.9080301523208618, | |
| "learning_rate": 1.6760465195361648e-05, | |
| "loss": 2.181, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 2.0012221514958624, | |
| "grad_norm": 0.9207277297973633, | |
| "learning_rate": 1.6675451423130548e-05, | |
| "loss": 2.1537, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 2.0063144493952896, | |
| "grad_norm": 0.8472919464111328, | |
| "learning_rate": 1.6590437650899447e-05, | |
| "loss": 2.1691, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 2.011406747294717, | |
| "grad_norm": 0.9604014754295349, | |
| "learning_rate": 1.6505423878668343e-05, | |
| "loss": 2.1409, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 2.016499045194144, | |
| "grad_norm": 0.957785964012146, | |
| "learning_rate": 1.6420410106437242e-05, | |
| "loss": 2.126, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 2.021591343093571, | |
| "grad_norm": 0.8542806506156921, | |
| "learning_rate": 1.6335396334206145e-05, | |
| "loss": 2.1532, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 2.026683640992998, | |
| "grad_norm": 0.9949219822883606, | |
| "learning_rate": 1.625038256197504e-05, | |
| "loss": 2.1437, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 2.0317759388924252, | |
| "grad_norm": 0.8735845685005188, | |
| "learning_rate": 1.616536878974394e-05, | |
| "loss": 2.133, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 2.0368682367918525, | |
| "grad_norm": 0.9472355842590332, | |
| "learning_rate": 1.608035501751284e-05, | |
| "loss": 2.146, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 2.0419605346912792, | |
| "grad_norm": 0.9042348861694336, | |
| "learning_rate": 1.5995341245281735e-05, | |
| "loss": 2.1288, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 2.0470528325907065, | |
| "grad_norm": 0.8667154908180237, | |
| "learning_rate": 1.5910327473050635e-05, | |
| "loss": 2.1182, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 2.0521451304901337, | |
| "grad_norm": 0.9168582558631897, | |
| "learning_rate": 1.5825313700819534e-05, | |
| "loss": 2.1227, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 2.057237428389561, | |
| "grad_norm": 0.8843423128128052, | |
| "learning_rate": 1.574029992858843e-05, | |
| "loss": 2.1564, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 2.0623297262889877, | |
| "grad_norm": 0.8709278106689453, | |
| "learning_rate": 1.565528615635733e-05, | |
| "loss": 2.129, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 2.067422024188415, | |
| "grad_norm": 1.0448068380355835, | |
| "learning_rate": 1.5570272384126232e-05, | |
| "loss": 2.1259, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 2.072514322087842, | |
| "grad_norm": 1.014841914176941, | |
| "learning_rate": 1.5485258611895128e-05, | |
| "loss": 2.1526, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 2.0776066199872694, | |
| "grad_norm": 0.9346544146537781, | |
| "learning_rate": 1.5400244839664027e-05, | |
| "loss": 2.1349, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 2.082698917886696, | |
| "grad_norm": 1.029351830482483, | |
| "learning_rate": 1.5315231067432927e-05, | |
| "loss": 2.1224, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 2.0877912157861234, | |
| "grad_norm": 0.8560373783111572, | |
| "learning_rate": 1.5230217295201824e-05, | |
| "loss": 2.0945, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 2.0928835136855506, | |
| "grad_norm": 0.8771845698356628, | |
| "learning_rate": 1.5145203522970722e-05, | |
| "loss": 2.1215, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 2.097975811584978, | |
| "grad_norm": 0.7786750197410583, | |
| "learning_rate": 1.506018975073962e-05, | |
| "loss": 2.1119, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 2.103068109484405, | |
| "grad_norm": 0.8961013555526733, | |
| "learning_rate": 1.4975175978508519e-05, | |
| "loss": 2.1284, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 2.108160407383832, | |
| "grad_norm": 0.7917054295539856, | |
| "learning_rate": 1.4890162206277417e-05, | |
| "loss": 2.1663, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 2.113252705283259, | |
| "grad_norm": 0.9229695200920105, | |
| "learning_rate": 1.4805148434046318e-05, | |
| "loss": 2.1255, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 2.1183450031826863, | |
| "grad_norm": 0.8761498332023621, | |
| "learning_rate": 1.4720134661815215e-05, | |
| "loss": 2.1271, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 2.1234373010821135, | |
| "grad_norm": 0.8369442820549011, | |
| "learning_rate": 1.4635120889584114e-05, | |
| "loss": 2.1381, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 2.1285295989815403, | |
| "grad_norm": 1.058815836906433, | |
| "learning_rate": 1.4550107117353012e-05, | |
| "loss": 2.1253, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 2.1336218968809675, | |
| "grad_norm": 0.8793694972991943, | |
| "learning_rate": 1.4465093345121911e-05, | |
| "loss": 2.1327, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 2.1387141947803947, | |
| "grad_norm": 0.9903535842895508, | |
| "learning_rate": 1.4380079572890809e-05, | |
| "loss": 2.1585, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 2.143806492679822, | |
| "grad_norm": 0.8910212516784668, | |
| "learning_rate": 1.4295065800659707e-05, | |
| "loss": 2.1482, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 2.1488987905792487, | |
| "grad_norm": 0.9088174700737, | |
| "learning_rate": 1.4210052028428606e-05, | |
| "loss": 2.1391, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 2.153991088478676, | |
| "grad_norm": 0.9213513731956482, | |
| "learning_rate": 1.4125038256197504e-05, | |
| "loss": 2.1447, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 2.159083386378103, | |
| "grad_norm": 0.9317104816436768, | |
| "learning_rate": 1.4040024483966401e-05, | |
| "loss": 2.1115, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 2.1641756842775304, | |
| "grad_norm": 0.7989690899848938, | |
| "learning_rate": 1.3955010711735302e-05, | |
| "loss": 2.1385, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 2.169267982176957, | |
| "grad_norm": 0.8436581492424011, | |
| "learning_rate": 1.3869996939504202e-05, | |
| "loss": 2.1487, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 2.1743602800763844, | |
| "grad_norm": 0.9113427400588989, | |
| "learning_rate": 1.37849831672731e-05, | |
| "loss": 2.0851, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 2.1794525779758116, | |
| "grad_norm": 0.8313522338867188, | |
| "learning_rate": 1.3699969395041997e-05, | |
| "loss": 2.1502, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 2.184544875875239, | |
| "grad_norm": 0.9525701999664307, | |
| "learning_rate": 1.3614955622810896e-05, | |
| "loss": 2.1206, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 2.189637173774666, | |
| "grad_norm": 0.9474479556083679, | |
| "learning_rate": 1.3529941850579794e-05, | |
| "loss": 2.1117, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 2.194729471674093, | |
| "grad_norm": 0.8311910629272461, | |
| "learning_rate": 1.3444928078348693e-05, | |
| "loss": 2.1268, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 2.19982176957352, | |
| "grad_norm": 0.879364013671875, | |
| "learning_rate": 1.335991430611759e-05, | |
| "loss": 2.1426, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 2.2049140674729473, | |
| "grad_norm": 0.8633144497871399, | |
| "learning_rate": 1.3274900533886488e-05, | |
| "loss": 2.1324, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 2.210006365372374, | |
| "grad_norm": 0.8333730697631836, | |
| "learning_rate": 1.318988676165539e-05, | |
| "loss": 2.1246, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 2.2150986632718013, | |
| "grad_norm": 0.8649702072143555, | |
| "learning_rate": 1.3104872989424289e-05, | |
| "loss": 2.122, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 2.2201909611712285, | |
| "grad_norm": 0.8680943846702576, | |
| "learning_rate": 1.3019859217193186e-05, | |
| "loss": 2.1295, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 2.2252832590706557, | |
| "grad_norm": 0.9396230578422546, | |
| "learning_rate": 1.2934845444962084e-05, | |
| "loss": 2.1458, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 2.230375556970083, | |
| "grad_norm": 0.9014144539833069, | |
| "learning_rate": 1.2849831672730983e-05, | |
| "loss": 2.1573, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 2.2354678548695097, | |
| "grad_norm": 0.9344182014465332, | |
| "learning_rate": 1.2764817900499881e-05, | |
| "loss": 2.1516, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 2.240560152768937, | |
| "grad_norm": 0.979686439037323, | |
| "learning_rate": 1.267980412826878e-05, | |
| "loss": 2.1307, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.245652450668364, | |
| "grad_norm": 0.8325761556625366, | |
| "learning_rate": 1.2594790356037678e-05, | |
| "loss": 2.1498, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 2.2507447485677914, | |
| "grad_norm": 0.8997836709022522, | |
| "learning_rate": 1.2509776583806576e-05, | |
| "loss": 2.1494, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 2.255837046467218, | |
| "grad_norm": 0.8690670132637024, | |
| "learning_rate": 1.2424762811575475e-05, | |
| "loss": 2.1393, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 2.2609293443666454, | |
| "grad_norm": 0.7817577719688416, | |
| "learning_rate": 1.2339749039344374e-05, | |
| "loss": 2.1341, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 2.2660216422660726, | |
| "grad_norm": 0.8697742223739624, | |
| "learning_rate": 1.2254735267113272e-05, | |
| "loss": 2.1469, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 2.2711139401655, | |
| "grad_norm": 0.8965489268302917, | |
| "learning_rate": 1.2169721494882171e-05, | |
| "loss": 2.1257, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 2.2762062380649266, | |
| "grad_norm": 1.0732325315475464, | |
| "learning_rate": 1.208470772265107e-05, | |
| "loss": 2.1131, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 2.281298535964354, | |
| "grad_norm": 0.7745924592018127, | |
| "learning_rate": 1.1999693950419968e-05, | |
| "loss": 2.1153, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 2.286390833863781, | |
| "grad_norm": 0.8988758325576782, | |
| "learning_rate": 1.1914680178188868e-05, | |
| "loss": 2.1545, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 2.2914831317632083, | |
| "grad_norm": 0.9772248268127441, | |
| "learning_rate": 1.1829666405957767e-05, | |
| "loss": 2.1333, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 2.296575429662635, | |
| "grad_norm": 0.8579228520393372, | |
| "learning_rate": 1.1744652633726664e-05, | |
| "loss": 2.1122, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 2.3016677275620623, | |
| "grad_norm": 0.8738901019096375, | |
| "learning_rate": 1.1659638861495562e-05, | |
| "loss": 2.0938, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 2.3067600254614895, | |
| "grad_norm": 0.8962051868438721, | |
| "learning_rate": 1.1574625089264461e-05, | |
| "loss": 2.1216, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 2.3118523233609167, | |
| "grad_norm": 0.8730968236923218, | |
| "learning_rate": 1.1489611317033359e-05, | |
| "loss": 2.1067, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 2.316944621260344, | |
| "grad_norm": 0.9516613483428955, | |
| "learning_rate": 1.1404597544802258e-05, | |
| "loss": 2.1092, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 2.3220369191597707, | |
| "grad_norm": 1.0411871671676636, | |
| "learning_rate": 1.1319583772571158e-05, | |
| "loss": 2.1199, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 2.327129217059198, | |
| "grad_norm": 0.9724430441856384, | |
| "learning_rate": 1.1234570000340055e-05, | |
| "loss": 2.1335, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 2.332221514958625, | |
| "grad_norm": 0.8349046111106873, | |
| "learning_rate": 1.1149556228108955e-05, | |
| "loss": 2.1249, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 2.337313812858052, | |
| "grad_norm": 0.8713769316673279, | |
| "learning_rate": 1.1064542455877852e-05, | |
| "loss": 2.1054, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 2.342406110757479, | |
| "grad_norm": 0.8659300208091736, | |
| "learning_rate": 1.0979528683646752e-05, | |
| "loss": 2.1095, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 2.3474984086569064, | |
| "grad_norm": 1.0436406135559082, | |
| "learning_rate": 1.089451491141565e-05, | |
| "loss": 2.1337, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 2.3525907065563336, | |
| "grad_norm": 0.8275535106658936, | |
| "learning_rate": 1.0809501139184549e-05, | |
| "loss": 2.1209, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 2.357683004455761, | |
| "grad_norm": 0.9503908157348633, | |
| "learning_rate": 1.0724487366953446e-05, | |
| "loss": 2.1262, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 2.3627753023551876, | |
| "grad_norm": 0.8849694728851318, | |
| "learning_rate": 1.0639473594722346e-05, | |
| "loss": 2.121, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 2.367867600254615, | |
| "grad_norm": 0.8742644786834717, | |
| "learning_rate": 1.0554459822491245e-05, | |
| "loss": 2.1421, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 2.372959898154042, | |
| "grad_norm": 0.8519076704978943, | |
| "learning_rate": 1.0469446050260143e-05, | |
| "loss": 2.1046, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 2.3780521960534693, | |
| "grad_norm": 0.8561546206474304, | |
| "learning_rate": 1.038443227802904e-05, | |
| "loss": 2.1262, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 2.383144493952896, | |
| "grad_norm": 0.8309553265571594, | |
| "learning_rate": 1.029941850579794e-05, | |
| "loss": 2.138, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 2.3882367918523233, | |
| "grad_norm": 1.0880669355392456, | |
| "learning_rate": 1.0214404733566839e-05, | |
| "loss": 2.0881, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 2.3933290897517505, | |
| "grad_norm": 0.9982330799102783, | |
| "learning_rate": 1.0129390961335736e-05, | |
| "loss": 2.1086, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 2.3984213876511777, | |
| "grad_norm": 0.9612807035446167, | |
| "learning_rate": 1.0044377189104636e-05, | |
| "loss": 2.1207, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 2.4035136855506045, | |
| "grad_norm": 0.848710298538208, | |
| "learning_rate": 9.959363416873533e-06, | |
| "loss": 2.1301, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 2.4086059834500317, | |
| "grad_norm": 0.8840051889419556, | |
| "learning_rate": 9.874349644642433e-06, | |
| "loss": 2.1118, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 2.413698281349459, | |
| "grad_norm": 0.916346549987793, | |
| "learning_rate": 9.789335872411332e-06, | |
| "loss": 2.128, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 2.418790579248886, | |
| "grad_norm": 0.8974706530570984, | |
| "learning_rate": 9.70432210018023e-06, | |
| "loss": 2.1452, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 2.423882877148313, | |
| "grad_norm": 1.0237131118774414, | |
| "learning_rate": 9.619308327949127e-06, | |
| "loss": 2.121, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 2.42897517504774, | |
| "grad_norm": 0.9156752228736877, | |
| "learning_rate": 9.534294555718027e-06, | |
| "loss": 2.0985, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 2.4340674729471674, | |
| "grad_norm": 0.9210427403450012, | |
| "learning_rate": 9.449280783486926e-06, | |
| "loss": 2.0653, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 2.4391597708465946, | |
| "grad_norm": 0.8185928463935852, | |
| "learning_rate": 9.364267011255824e-06, | |
| "loss": 2.0994, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 2.444252068746022, | |
| "grad_norm": 0.923605740070343, | |
| "learning_rate": 9.279253239024723e-06, | |
| "loss": 2.1402, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 2.4493443666454486, | |
| "grad_norm": 0.8515633344650269, | |
| "learning_rate": 9.19423946679362e-06, | |
| "loss": 2.1273, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 2.454436664544876, | |
| "grad_norm": 0.8325629830360413, | |
| "learning_rate": 9.109225694562518e-06, | |
| "loss": 2.0974, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 2.459528962444303, | |
| "grad_norm": 0.8125095963478088, | |
| "learning_rate": 9.02421192233142e-06, | |
| "loss": 2.1157, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 2.4646212603437303, | |
| "grad_norm": 0.8951058387756348, | |
| "learning_rate": 8.939198150100317e-06, | |
| "loss": 2.1111, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 2.469713558243157, | |
| "grad_norm": 0.8785336017608643, | |
| "learning_rate": 8.854184377869214e-06, | |
| "loss": 2.1412, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 2.4748058561425843, | |
| "grad_norm": 0.9884998202323914, | |
| "learning_rate": 8.769170605638114e-06, | |
| "loss": 2.1403, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 2.4798981540420115, | |
| "grad_norm": 0.9092361330986023, | |
| "learning_rate": 8.684156833407011e-06, | |
| "loss": 2.1341, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 2.4849904519414387, | |
| "grad_norm": 0.9467695951461792, | |
| "learning_rate": 8.59914306117591e-06, | |
| "loss": 2.1098, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 2.4900827498408655, | |
| "grad_norm": 0.8339031338691711, | |
| "learning_rate": 8.51412928894481e-06, | |
| "loss": 2.1146, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 2.4951750477402928, | |
| "grad_norm": 0.8132495284080505, | |
| "learning_rate": 8.429115516713708e-06, | |
| "loss": 2.1721, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 2.50026734563972, | |
| "grad_norm": 0.9209297895431519, | |
| "learning_rate": 8.344101744482605e-06, | |
| "loss": 2.0942, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 2.505359643539147, | |
| "grad_norm": 0.9470928311347961, | |
| "learning_rate": 8.259087972251506e-06, | |
| "loss": 2.0926, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 2.510451941438574, | |
| "grad_norm": 0.9337894320487976, | |
| "learning_rate": 8.174074200020404e-06, | |
| "loss": 2.1189, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 2.515544239338001, | |
| "grad_norm": 0.9764918088912964, | |
| "learning_rate": 8.089060427789302e-06, | |
| "loss": 2.1185, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 2.5206365372374284, | |
| "grad_norm": 0.894453763961792, | |
| "learning_rate": 8.004046655558201e-06, | |
| "loss": 2.1289, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 2.5257288351368556, | |
| "grad_norm": 0.8645434379577637, | |
| "learning_rate": 7.919032883327099e-06, | |
| "loss": 2.1025, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 2.530821133036283, | |
| "grad_norm": 0.8322845101356506, | |
| "learning_rate": 7.834019111095998e-06, | |
| "loss": 2.1128, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 2.5359134309357096, | |
| "grad_norm": 1.0294426679611206, | |
| "learning_rate": 7.749005338864897e-06, | |
| "loss": 2.1348, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 2.541005728835137, | |
| "grad_norm": 0.9489388465881348, | |
| "learning_rate": 7.663991566633795e-06, | |
| "loss": 2.1089, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 2.546098026734564, | |
| "grad_norm": 0.9332979917526245, | |
| "learning_rate": 7.578977794402693e-06, | |
| "loss": 2.1677, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 2.551190324633991, | |
| "grad_norm": 0.8114882111549377, | |
| "learning_rate": 7.493964022171592e-06, | |
| "loss": 2.1265, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 2.556282622533418, | |
| "grad_norm": 0.8496439456939697, | |
| "learning_rate": 7.408950249940491e-06, | |
| "loss": 2.1713, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 2.5613749204328453, | |
| "grad_norm": 1.149905800819397, | |
| "learning_rate": 7.32393647770939e-06, | |
| "loss": 2.1234, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 2.5664672183322725, | |
| "grad_norm": 1.0552695989608765, | |
| "learning_rate": 7.238922705478287e-06, | |
| "loss": 2.1398, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 2.5715595162316998, | |
| "grad_norm": 0.9433385133743286, | |
| "learning_rate": 7.153908933247186e-06, | |
| "loss": 2.0986, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 2.5766518141311265, | |
| "grad_norm": 0.889086127281189, | |
| "learning_rate": 7.068895161016086e-06, | |
| "loss": 2.1338, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 2.5817441120305538, | |
| "grad_norm": 0.8793154358863831, | |
| "learning_rate": 6.9838813887849835e-06, | |
| "loss": 2.1095, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 2.586836409929981, | |
| "grad_norm": 0.7565730214118958, | |
| "learning_rate": 6.898867616553882e-06, | |
| "loss": 2.1219, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 2.5919287078294078, | |
| "grad_norm": 0.8305276036262512, | |
| "learning_rate": 6.8138538443227805e-06, | |
| "loss": 2.099, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 2.597021005728835, | |
| "grad_norm": 0.9467841386795044, | |
| "learning_rate": 6.728840072091679e-06, | |
| "loss": 2.123, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 2.602113303628262, | |
| "grad_norm": 0.9913722276687622, | |
| "learning_rate": 6.643826299860578e-06, | |
| "loss": 2.1189, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 2.6072056015276894, | |
| "grad_norm": 0.9008012413978577, | |
| "learning_rate": 6.558812527629477e-06, | |
| "loss": 2.155, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 2.6122978994271167, | |
| "grad_norm": 0.9230712056159973, | |
| "learning_rate": 6.473798755398374e-06, | |
| "loss": 2.1333, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 2.617390197326544, | |
| "grad_norm": 1.0198971033096313, | |
| "learning_rate": 6.388784983167273e-06, | |
| "loss": 2.1374, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 2.6224824952259707, | |
| "grad_norm": 0.9199273586273193, | |
| "learning_rate": 6.303771210936171e-06, | |
| "loss": 2.1332, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 2.627574793125398, | |
| "grad_norm": 0.8723760843276978, | |
| "learning_rate": 6.21875743870507e-06, | |
| "loss": 2.1547, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 2.632667091024825, | |
| "grad_norm": 0.9192347526550293, | |
| "learning_rate": 6.133743666473969e-06, | |
| "loss": 2.1192, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 2.637759388924252, | |
| "grad_norm": 0.9517456889152527, | |
| "learning_rate": 6.048729894242868e-06, | |
| "loss": 2.1143, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 2.642851686823679, | |
| "grad_norm": 0.9906876683235168, | |
| "learning_rate": 5.963716122011766e-06, | |
| "loss": 2.1171, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 2.6479439847231063, | |
| "grad_norm": 0.9755644202232361, | |
| "learning_rate": 5.878702349780665e-06, | |
| "loss": 2.163, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 2.6530362826225335, | |
| "grad_norm": 0.9300287961959839, | |
| "learning_rate": 5.793688577549564e-06, | |
| "loss": 2.1218, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 2.6581285805219608, | |
| "grad_norm": 0.8865501284599304, | |
| "learning_rate": 5.7086748053184616e-06, | |
| "loss": 2.1356, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 2.6632208784213875, | |
| "grad_norm": 0.8156447410583496, | |
| "learning_rate": 5.62366103308736e-06, | |
| "loss": 2.1171, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 2.6683131763208148, | |
| "grad_norm": 0.8186530470848083, | |
| "learning_rate": 5.538647260856259e-06, | |
| "loss": 2.1052, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 2.673405474220242, | |
| "grad_norm": 0.790550708770752, | |
| "learning_rate": 5.453633488625157e-06, | |
| "loss": 2.1071, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 2.6784977721196688, | |
| "grad_norm": 0.8866438865661621, | |
| "learning_rate": 5.368619716394056e-06, | |
| "loss": 2.1354, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 2.683590070019096, | |
| "grad_norm": 0.9953215718269348, | |
| "learning_rate": 5.283605944162955e-06, | |
| "loss": 2.1383, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 2.688682367918523, | |
| "grad_norm": 0.9829987287521362, | |
| "learning_rate": 5.198592171931853e-06, | |
| "loss": 2.0919, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 2.6937746658179504, | |
| "grad_norm": 0.9085790514945984, | |
| "learning_rate": 5.113578399700752e-06, | |
| "loss": 2.1178, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 2.6988669637173777, | |
| "grad_norm": 0.8004271388053894, | |
| "learning_rate": 5.02856462746965e-06, | |
| "loss": 2.1239, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 2.7039592616168044, | |
| "grad_norm": 0.9412344098091125, | |
| "learning_rate": 4.943550855238549e-06, | |
| "loss": 2.108, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 2.7090515595162317, | |
| "grad_norm": 0.9245398640632629, | |
| "learning_rate": 4.858537083007447e-06, | |
| "loss": 2.1241, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 2.714143857415659, | |
| "grad_norm": 0.9695274233818054, | |
| "learning_rate": 4.7735233107763465e-06, | |
| "loss": 2.1106, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 2.7192361553150857, | |
| "grad_norm": 0.9269813895225525, | |
| "learning_rate": 4.688509538545244e-06, | |
| "loss": 2.1075, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 2.724328453214513, | |
| "grad_norm": 0.9783353805541992, | |
| "learning_rate": 4.6034957663141435e-06, | |
| "loss": 2.1127, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 2.72942075111394, | |
| "grad_norm": 0.9476038813591003, | |
| "learning_rate": 4.518481994083042e-06, | |
| "loss": 2.1284, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 2.7345130490133673, | |
| "grad_norm": 0.93116295337677, | |
| "learning_rate": 4.43346822185194e-06, | |
| "loss": 2.1004, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 2.7396053469127946, | |
| "grad_norm": 0.9898892641067505, | |
| "learning_rate": 4.348454449620839e-06, | |
| "loss": 2.1129, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 2.744697644812222, | |
| "grad_norm": 0.9059526920318604, | |
| "learning_rate": 4.263440677389737e-06, | |
| "loss": 2.1189, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 2.7497899427116486, | |
| "grad_norm": 0.8806390762329102, | |
| "learning_rate": 4.178426905158636e-06, | |
| "loss": 2.1416, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 2.754882240611076, | |
| "grad_norm": 0.9231753945350647, | |
| "learning_rate": 4.093413132927534e-06, | |
| "loss": 2.1373, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 2.759974538510503, | |
| "grad_norm": 0.7574446201324463, | |
| "learning_rate": 4.008399360696434e-06, | |
| "loss": 2.1355, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 2.76506683640993, | |
| "grad_norm": 0.8553287982940674, | |
| "learning_rate": 3.923385588465331e-06, | |
| "loss": 2.0786, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 2.770159134309357, | |
| "grad_norm": 0.7898595333099365, | |
| "learning_rate": 3.83837181623423e-06, | |
| "loss": 2.0941, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 2.7752514322087842, | |
| "grad_norm": 0.8895372748374939, | |
| "learning_rate": 3.7533580440031287e-06, | |
| "loss": 2.1311, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 2.7803437301082115, | |
| "grad_norm": 0.9352322816848755, | |
| "learning_rate": 3.668344271772027e-06, | |
| "loss": 2.102, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 2.7854360280076387, | |
| "grad_norm": 1.003927230834961, | |
| "learning_rate": 3.583330499540926e-06, | |
| "loss": 2.1119, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 2.7905283259070655, | |
| "grad_norm": 0.9228959083557129, | |
| "learning_rate": 3.498316727309824e-06, | |
| "loss": 2.142, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 2.7956206238064927, | |
| "grad_norm": 0.9431111812591553, | |
| "learning_rate": 3.413302955078723e-06, | |
| "loss": 2.12, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 2.80071292170592, | |
| "grad_norm": 0.9116231799125671, | |
| "learning_rate": 3.3282891828476215e-06, | |
| "loss": 2.1261, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 2.8058052196053467, | |
| "grad_norm": 0.9542424082756042, | |
| "learning_rate": 3.2432754106165196e-06, | |
| "loss": 2.1151, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 2.810897517504774, | |
| "grad_norm": 0.8199505805969238, | |
| "learning_rate": 3.1582616383854185e-06, | |
| "loss": 2.0883, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 2.815989815404201, | |
| "grad_norm": 0.8526725769042969, | |
| "learning_rate": 3.0732478661543174e-06, | |
| "loss": 2.1094, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 2.8210821133036283, | |
| "grad_norm": 0.9284189343452454, | |
| "learning_rate": 2.9882340939232155e-06, | |
| "loss": 2.1072, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 2.8261744112030556, | |
| "grad_norm": 0.9289183616638184, | |
| "learning_rate": 2.9032203216921144e-06, | |
| "loss": 2.1227, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 2.8312667091024823, | |
| "grad_norm": 1.0548968315124512, | |
| "learning_rate": 2.818206549461013e-06, | |
| "loss": 2.138, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 2.8363590070019096, | |
| "grad_norm": 0.8402355313301086, | |
| "learning_rate": 2.7331927772299113e-06, | |
| "loss": 2.1394, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 2.841451304901337, | |
| "grad_norm": 0.9172413349151611, | |
| "learning_rate": 2.64817900499881e-06, | |
| "loss": 2.114, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 2.8465436028007636, | |
| "grad_norm": 0.8457333445549011, | |
| "learning_rate": 2.5631652327677087e-06, | |
| "loss": 2.1268, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 2.851635900700191, | |
| "grad_norm": 0.8858858942985535, | |
| "learning_rate": 2.478151460536607e-06, | |
| "loss": 2.0901, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 2.856728198599618, | |
| "grad_norm": 0.8789589405059814, | |
| "learning_rate": 2.3931376883055057e-06, | |
| "loss": 2.1154, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 2.8618204964990452, | |
| "grad_norm": 0.9234612584114075, | |
| "learning_rate": 2.308123916074404e-06, | |
| "loss": 2.1106, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 2.8669127943984725, | |
| "grad_norm": 0.8070857524871826, | |
| "learning_rate": 2.2231101438433026e-06, | |
| "loss": 2.1181, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 2.8720050922978997, | |
| "grad_norm": 0.9172016978263855, | |
| "learning_rate": 2.138096371612201e-06, | |
| "loss": 2.0832, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 2.8770973901973265, | |
| "grad_norm": 0.9449873566627502, | |
| "learning_rate": 2.0530825993811e-06, | |
| "loss": 2.126, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 2.8821896880967537, | |
| "grad_norm": 1.0262093544006348, | |
| "learning_rate": 1.9680688271499985e-06, | |
| "loss": 2.1117, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 2.887281985996181, | |
| "grad_norm": 0.7934767007827759, | |
| "learning_rate": 1.8830550549188972e-06, | |
| "loss": 2.1256, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 2.8923742838956077, | |
| "grad_norm": 0.9590465426445007, | |
| "learning_rate": 1.7980412826877954e-06, | |
| "loss": 2.1335, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 2.897466581795035, | |
| "grad_norm": 1.006219744682312, | |
| "learning_rate": 1.713027510456694e-06, | |
| "loss": 2.0888, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 2.902558879694462, | |
| "grad_norm": 0.9063106179237366, | |
| "learning_rate": 1.6280137382255926e-06, | |
| "loss": 2.1506, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 2.9076511775938894, | |
| "grad_norm": 0.8653075695037842, | |
| "learning_rate": 1.542999965994491e-06, | |
| "loss": 2.0845, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 2.9127434754933166, | |
| "grad_norm": 0.9707706570625305, | |
| "learning_rate": 1.4579861937633898e-06, | |
| "loss": 2.0865, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 2.9178357733927434, | |
| "grad_norm": 0.9578688740730286, | |
| "learning_rate": 1.3729724215322882e-06, | |
| "loss": 2.1098, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 2.9229280712921706, | |
| "grad_norm": 0.8037517070770264, | |
| "learning_rate": 1.2879586493011867e-06, | |
| "loss": 2.085, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 2.928020369191598, | |
| "grad_norm": 0.9694920182228088, | |
| "learning_rate": 1.2029448770700854e-06, | |
| "loss": 2.0926, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 2.9331126670910246, | |
| "grad_norm": 0.8718476891517639, | |
| "learning_rate": 1.1179311048389841e-06, | |
| "loss": 2.1112, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 2.938204964990452, | |
| "grad_norm": 0.8940988779067993, | |
| "learning_rate": 1.0329173326078824e-06, | |
| "loss": 2.12, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 2.943297262889879, | |
| "grad_norm": 0.9514064192771912, | |
| "learning_rate": 9.479035603767811e-07, | |
| "loss": 2.1416, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 2.9483895607893063, | |
| "grad_norm": 0.9789698719978333, | |
| "learning_rate": 8.628897881456797e-07, | |
| "loss": 2.0913, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 2.9534818586887335, | |
| "grad_norm": 1.028600811958313, | |
| "learning_rate": 7.778760159145782e-07, | |
| "loss": 2.1142, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 2.9585741565881603, | |
| "grad_norm": 0.850046694278717, | |
| "learning_rate": 6.928622436834767e-07, | |
| "loss": 2.0929, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 2.9636664544875875, | |
| "grad_norm": 0.8758450150489807, | |
| "learning_rate": 6.078484714523753e-07, | |
| "loss": 2.0991, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 2.9687587523870147, | |
| "grad_norm": 0.9652713537216187, | |
| "learning_rate": 5.228346992212739e-07, | |
| "loss": 2.1095, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 2.973851050286442, | |
| "grad_norm": 1.0260512828826904, | |
| "learning_rate": 4.3782092699017247e-07, | |
| "loss": 2.1069, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 2.9789433481858687, | |
| "grad_norm": 0.7857241034507751, | |
| "learning_rate": 3.5280715475907095e-07, | |
| "loss": 2.1014, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 2.984035646085296, | |
| "grad_norm": 0.964096188545227, | |
| "learning_rate": 2.6779338252796954e-07, | |
| "loss": 2.0981, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 2.989127943984723, | |
| "grad_norm": 0.8568851351737976, | |
| "learning_rate": 1.827796102968681e-07, | |
| "loss": 2.1283, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 2.9942202418841504, | |
| "grad_norm": 0.9048463702201843, | |
| "learning_rate": 9.776583806576667e-08, | |
| "loss": 2.1011, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 2.9993125397835776, | |
| "grad_norm": 0.8119781613349915, | |
| "learning_rate": 1.2752065834665216e-08, | |
| "loss": 2.0672, | |
| "step": 58900 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 58914, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 5000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.462938693632e+17, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |