| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.9997600095996155, | |
| "eval_steps": 500, | |
| "global_step": 150000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 3.999840006399744e-05, | |
| "grad_norm": 1.4777933359146118, | |
| "learning_rate": 0.001, | |
| "loss": 10.8399, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.003999840006399744, | |
| "grad_norm": 0.22688856720924377, | |
| "learning_rate": 0.001, | |
| "loss": 7.558, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.007999680012799487, | |
| "grad_norm": 0.24372541904449463, | |
| "learning_rate": 0.001, | |
| "loss": 6.3052, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.011999520019199232, | |
| "grad_norm": 0.5153403282165527, | |
| "learning_rate": 0.001, | |
| "loss": 5.9045, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.015999360025598975, | |
| "grad_norm": 0.5177266597747803, | |
| "learning_rate": 0.001, | |
| "loss": 5.6063, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.01999920003199872, | |
| "grad_norm": 0.6018334627151489, | |
| "learning_rate": 0.001, | |
| "loss": 5.4024, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.023999040038398464, | |
| "grad_norm": 0.4220522940158844, | |
| "learning_rate": 0.001, | |
| "loss": 5.2604, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.027998880044798207, | |
| "grad_norm": 0.4826813042163849, | |
| "learning_rate": 0.001, | |
| "loss": 5.149, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.03199872005119795, | |
| "grad_norm": 0.5880510807037354, | |
| "learning_rate": 0.001, | |
| "loss": 5.0663, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.035998560057597696, | |
| "grad_norm": 0.4875524640083313, | |
| "learning_rate": 0.001, | |
| "loss": 5.0008, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.03999840006399744, | |
| "grad_norm": 0.526023805141449, | |
| "learning_rate": 0.001, | |
| "loss": 4.9403, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.04399824007039718, | |
| "grad_norm": 0.4925878942012787, | |
| "learning_rate": 0.001, | |
| "loss": 4.8988, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.04799808007679693, | |
| "grad_norm": 0.5051125884056091, | |
| "learning_rate": 0.001, | |
| "loss": 4.8552, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.051997920083196675, | |
| "grad_norm": 0.6583888530731201, | |
| "learning_rate": 0.001, | |
| "loss": 4.8247, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.055997760089596414, | |
| "grad_norm": 0.4777211546897888, | |
| "learning_rate": 0.001, | |
| "loss": 4.7899, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.05999760009599616, | |
| "grad_norm": 0.5130186080932617, | |
| "learning_rate": 0.001, | |
| "loss": 4.7612, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.0639974401023959, | |
| "grad_norm": 0.5315548777580261, | |
| "learning_rate": 0.001, | |
| "loss": 4.7361, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.06799728010879565, | |
| "grad_norm": 0.47726863622665405, | |
| "learning_rate": 0.001, | |
| "loss": 4.7013, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.07199712011519539, | |
| "grad_norm": 0.5642004013061523, | |
| "learning_rate": 0.001, | |
| "loss": 4.6645, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.07599696012159514, | |
| "grad_norm": 0.545949399471283, | |
| "learning_rate": 0.001, | |
| "loss": 4.6219, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.07999680012799489, | |
| "grad_norm": 0.5206509232521057, | |
| "learning_rate": 0.001, | |
| "loss": 4.5969, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.08399664013439462, | |
| "grad_norm": 0.44842126965522766, | |
| "learning_rate": 0.001, | |
| "loss": 4.5708, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.08799648014079436, | |
| "grad_norm": 0.6365007162094116, | |
| "learning_rate": 0.001, | |
| "loss": 4.5523, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.09199632014719411, | |
| "grad_norm": 0.5729643702507019, | |
| "learning_rate": 0.001, | |
| "loss": 4.5305, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.09599616015359386, | |
| "grad_norm": 0.5888395309448242, | |
| "learning_rate": 0.001, | |
| "loss": 4.5189, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.0999960001599936, | |
| "grad_norm": 0.6099081039428711, | |
| "learning_rate": 0.001, | |
| "loss": 4.5038, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.10399584016639335, | |
| "grad_norm": 0.5354722738265991, | |
| "learning_rate": 0.001, | |
| "loss": 4.487, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.10799568017279308, | |
| "grad_norm": 0.6929482221603394, | |
| "learning_rate": 0.001, | |
| "loss": 4.4784, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.11199552017919283, | |
| "grad_norm": 0.5511060357093811, | |
| "learning_rate": 0.001, | |
| "loss": 4.4682, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.11599536018559257, | |
| "grad_norm": 0.5773873329162598, | |
| "learning_rate": 0.001, | |
| "loss": 4.4589, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.11999520019199232, | |
| "grad_norm": 0.6061297655105591, | |
| "learning_rate": 0.001, | |
| "loss": 4.452, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.12399504019839207, | |
| "grad_norm": 0.6082037687301636, | |
| "learning_rate": 0.001, | |
| "loss": 4.443, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.1279948802047918, | |
| "grad_norm": 0.5562213063240051, | |
| "learning_rate": 0.001, | |
| "loss": 4.4342, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.13199472021119155, | |
| "grad_norm": 0.5706282258033752, | |
| "learning_rate": 0.001, | |
| "loss": 4.4281, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.1359945602175913, | |
| "grad_norm": 0.6546366810798645, | |
| "learning_rate": 0.001, | |
| "loss": 4.4192, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.13999440022399104, | |
| "grad_norm": 0.5441614389419556, | |
| "learning_rate": 0.001, | |
| "loss": 4.4157, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.14399424023039079, | |
| "grad_norm": 0.547122061252594, | |
| "learning_rate": 0.001, | |
| "loss": 4.4079, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.14799408023679053, | |
| "grad_norm": 0.520165741443634, | |
| "learning_rate": 0.001, | |
| "loss": 4.4047, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.15199392024319028, | |
| "grad_norm": 0.529140293598175, | |
| "learning_rate": 0.001, | |
| "loss": 4.3997, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.15599376024959002, | |
| "grad_norm": 0.5902653336524963, | |
| "learning_rate": 0.001, | |
| "loss": 4.3921, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.15999360025598977, | |
| "grad_norm": 0.6136724948883057, | |
| "learning_rate": 0.001, | |
| "loss": 4.3877, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.16399344026238952, | |
| "grad_norm": 0.5538173913955688, | |
| "learning_rate": 0.001, | |
| "loss": 4.383, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.16799328026878924, | |
| "grad_norm": 0.5516422986984253, | |
| "learning_rate": 0.001, | |
| "loss": 4.3815, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.17199312027518898, | |
| "grad_norm": 0.535236656665802, | |
| "learning_rate": 0.001, | |
| "loss": 4.375, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.17599296028158873, | |
| "grad_norm": 0.5214977860450745, | |
| "learning_rate": 0.001, | |
| "loss": 4.3716, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.17999280028798847, | |
| "grad_norm": 0.473036527633667, | |
| "learning_rate": 0.001, | |
| "loss": 4.3679, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.18399264029438822, | |
| "grad_norm": 0.5115819573402405, | |
| "learning_rate": 0.001, | |
| "loss": 4.3661, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.18799248030078797, | |
| "grad_norm": 0.5074037313461304, | |
| "learning_rate": 0.001, | |
| "loss": 4.3606, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.1919923203071877, | |
| "grad_norm": 0.4992203116416931, | |
| "learning_rate": 0.001, | |
| "loss": 4.3579, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.19599216031358746, | |
| "grad_norm": 0.5755491256713867, | |
| "learning_rate": 0.001, | |
| "loss": 4.3501, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.1999920003199872, | |
| "grad_norm": 0.5061823725700378, | |
| "learning_rate": 0.001, | |
| "loss": 4.3486, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.20399184032638695, | |
| "grad_norm": 0.5323928594589233, | |
| "learning_rate": 0.001, | |
| "loss": 4.3477, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.2079916803327867, | |
| "grad_norm": 0.5301046967506409, | |
| "learning_rate": 0.001, | |
| "loss": 4.3411, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.21199152033918645, | |
| "grad_norm": 0.563123345375061, | |
| "learning_rate": 0.001, | |
| "loss": 4.3405, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.21599136034558616, | |
| "grad_norm": 0.4736695885658264, | |
| "learning_rate": 0.001, | |
| "loss": 4.3391, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.2199912003519859, | |
| "grad_norm": 0.46568775177001953, | |
| "learning_rate": 0.001, | |
| "loss": 4.3372, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.22399104035838566, | |
| "grad_norm": 0.591935396194458, | |
| "learning_rate": 0.001, | |
| "loss": 4.3318, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.2279908803647854, | |
| "grad_norm": 0.4887066185474396, | |
| "learning_rate": 0.001, | |
| "loss": 4.3318, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.23199072037118515, | |
| "grad_norm": 0.4820877015590668, | |
| "learning_rate": 0.001, | |
| "loss": 4.3253, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.2359905603775849, | |
| "grad_norm": 0.48746803402900696, | |
| "learning_rate": 0.001, | |
| "loss": 4.3242, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.23999040038398464, | |
| "grad_norm": 0.5339901447296143, | |
| "learning_rate": 0.001, | |
| "loss": 4.3226, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.2439902403903844, | |
| "grad_norm": 0.5406463146209717, | |
| "learning_rate": 0.001, | |
| "loss": 4.3197, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.24799008039678413, | |
| "grad_norm": 0.5854783654212952, | |
| "learning_rate": 0.001, | |
| "loss": 4.318, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.25198992040318385, | |
| "grad_norm": 0.47678840160369873, | |
| "learning_rate": 0.001, | |
| "loss": 4.3187, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.2559897604095836, | |
| "grad_norm": 0.5523233413696289, | |
| "learning_rate": 0.001, | |
| "loss": 4.3117, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.25998960041598335, | |
| "grad_norm": 0.46079719066619873, | |
| "learning_rate": 0.001, | |
| "loss": 4.3165, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.2639894404223831, | |
| "grad_norm": 0.4956238269805908, | |
| "learning_rate": 0.001, | |
| "loss": 4.3116, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.26798928042878284, | |
| "grad_norm": 0.532508373260498, | |
| "learning_rate": 0.001, | |
| "loss": 4.307, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.2719891204351826, | |
| "grad_norm": 0.5244960188865662, | |
| "learning_rate": 0.001, | |
| "loss": 4.309, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.27598896044158233, | |
| "grad_norm": 0.48627936840057373, | |
| "learning_rate": 0.001, | |
| "loss": 4.3025, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.2799888004479821, | |
| "grad_norm": 0.5393197536468506, | |
| "learning_rate": 0.001, | |
| "loss": 4.3062, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.2839886404543818, | |
| "grad_norm": 0.5384635925292969, | |
| "learning_rate": 0.001, | |
| "loss": 4.2993, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.28798848046078157, | |
| "grad_norm": 0.5455852150917053, | |
| "learning_rate": 0.001, | |
| "loss": 4.299, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.2919883204671813, | |
| "grad_norm": 0.45626768469810486, | |
| "learning_rate": 0.001, | |
| "loss": 4.2983, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.29598816047358106, | |
| "grad_norm": 0.5111705660820007, | |
| "learning_rate": 0.001, | |
| "loss": 4.2992, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.2999880004799808, | |
| "grad_norm": 0.5607153177261353, | |
| "learning_rate": 0.001, | |
| "loss": 4.291, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.30398784048638056, | |
| "grad_norm": 0.5863308906555176, | |
| "learning_rate": 0.001, | |
| "loss": 4.2928, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.3079876804927803, | |
| "grad_norm": 0.48837366700172424, | |
| "learning_rate": 0.001, | |
| "loss": 4.2951, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.31198752049918005, | |
| "grad_norm": 0.5300312042236328, | |
| "learning_rate": 0.001, | |
| "loss": 4.2891, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.3159873605055798, | |
| "grad_norm": 0.4869995415210724, | |
| "learning_rate": 0.001, | |
| "loss": 4.2868, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.31998720051197954, | |
| "grad_norm": 0.5378390550613403, | |
| "learning_rate": 0.001, | |
| "loss": 4.2863, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.3239870405183793, | |
| "grad_norm": 0.49154022336006165, | |
| "learning_rate": 0.001, | |
| "loss": 4.2902, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.32798688052477903, | |
| "grad_norm": 0.5984882712364197, | |
| "learning_rate": 0.001, | |
| "loss": 4.2851, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.3319867205311788, | |
| "grad_norm": 0.5132819414138794, | |
| "learning_rate": 0.001, | |
| "loss": 4.2854, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.33598656053757847, | |
| "grad_norm": 0.5187487006187439, | |
| "learning_rate": 0.001, | |
| "loss": 4.2853, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.3399864005439782, | |
| "grad_norm": 0.5310469269752502, | |
| "learning_rate": 0.001, | |
| "loss": 4.2785, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.34398624055037796, | |
| "grad_norm": 0.5427576899528503, | |
| "learning_rate": 0.001, | |
| "loss": 4.2799, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.3479860805567777, | |
| "grad_norm": 0.566967248916626, | |
| "learning_rate": 0.001, | |
| "loss": 4.2822, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.35198592056317746, | |
| "grad_norm": 0.48460662364959717, | |
| "learning_rate": 0.001, | |
| "loss": 4.2772, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.3559857605695772, | |
| "grad_norm": 0.5119643807411194, | |
| "learning_rate": 0.001, | |
| "loss": 4.2782, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.35998560057597695, | |
| "grad_norm": 0.5386670827865601, | |
| "learning_rate": 0.001, | |
| "loss": 4.2749, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.3639854405823767, | |
| "grad_norm": 0.5337742567062378, | |
| "learning_rate": 0.001, | |
| "loss": 4.2745, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.36798528058877644, | |
| "grad_norm": 0.522514283657074, | |
| "learning_rate": 0.001, | |
| "loss": 4.2717, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.3719851205951762, | |
| "grad_norm": 0.549670934677124, | |
| "learning_rate": 0.001, | |
| "loss": 4.272, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.37598496060157593, | |
| "grad_norm": 0.6063629388809204, | |
| "learning_rate": 0.001, | |
| "loss": 4.2684, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.3799848006079757, | |
| "grad_norm": 0.5420516133308411, | |
| "learning_rate": 0.001, | |
| "loss": 4.2674, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.3839846406143754, | |
| "grad_norm": 0.48435646295547485, | |
| "learning_rate": 0.001, | |
| "loss": 4.2675, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.3879844806207752, | |
| "grad_norm": 0.5914377570152283, | |
| "learning_rate": 0.001, | |
| "loss": 4.2737, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.3919843206271749, | |
| "grad_norm": 0.4351874589920044, | |
| "learning_rate": 0.001, | |
| "loss": 4.2684, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.39598416063357467, | |
| "grad_norm": 0.5514108538627625, | |
| "learning_rate": 0.001, | |
| "loss": 4.2671, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.3999840006399744, | |
| "grad_norm": 0.5687771439552307, | |
| "learning_rate": 0.001, | |
| "loss": 4.2626, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.40398384064637416, | |
| "grad_norm": 0.5736687779426575, | |
| "learning_rate": 0.001, | |
| "loss": 4.2673, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.4079836806527739, | |
| "grad_norm": 0.5822706818580627, | |
| "learning_rate": 0.001, | |
| "loss": 4.264, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.41198352065917365, | |
| "grad_norm": 0.5612554550170898, | |
| "learning_rate": 0.001, | |
| "loss": 4.2609, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.4159833606655734, | |
| "grad_norm": 0.5104981660842896, | |
| "learning_rate": 0.001, | |
| "loss": 4.2625, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.41998320067197314, | |
| "grad_norm": 0.5948505997657776, | |
| "learning_rate": 0.001, | |
| "loss": 4.2566, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.4239830406783729, | |
| "grad_norm": 0.5640226006507874, | |
| "learning_rate": 0.001, | |
| "loss": 4.2602, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.4279828806847726, | |
| "grad_norm": 0.6263200640678406, | |
| "learning_rate": 0.001, | |
| "loss": 4.2603, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.43198272069117233, | |
| "grad_norm": 0.48344025015830994, | |
| "learning_rate": 0.001, | |
| "loss": 4.2596, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.4359825606975721, | |
| "grad_norm": 0.49527639150619507, | |
| "learning_rate": 0.001, | |
| "loss": 4.2579, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.4399824007039718, | |
| "grad_norm": 0.4601668417453766, | |
| "learning_rate": 0.001, | |
| "loss": 4.2532, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.44398224071037157, | |
| "grad_norm": 0.4835492968559265, | |
| "learning_rate": 0.001, | |
| "loss": 4.2565, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.4479820807167713, | |
| "grad_norm": 0.4633197784423828, | |
| "learning_rate": 0.001, | |
| "loss": 4.2531, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.45198192072317106, | |
| "grad_norm": 0.5395948886871338, | |
| "learning_rate": 0.001, | |
| "loss": 4.2558, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.4559817607295708, | |
| "grad_norm": 0.5230295658111572, | |
| "learning_rate": 0.001, | |
| "loss": 4.2497, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.45998160073597055, | |
| "grad_norm": 0.47804856300354004, | |
| "learning_rate": 0.001, | |
| "loss": 4.2561, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.4639814407423703, | |
| "grad_norm": 0.4512189030647278, | |
| "learning_rate": 0.001, | |
| "loss": 4.2479, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.46798128074877005, | |
| "grad_norm": 0.7052320837974548, | |
| "learning_rate": 0.001, | |
| "loss": 4.2509, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.4719811207551698, | |
| "grad_norm": 0.4637924134731293, | |
| "learning_rate": 0.001, | |
| "loss": 4.2525, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.47598096076156954, | |
| "grad_norm": 0.442754864692688, | |
| "learning_rate": 0.001, | |
| "loss": 4.2499, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.4799808007679693, | |
| "grad_norm": 0.48194420337677, | |
| "learning_rate": 0.001, | |
| "loss": 4.2496, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.48398064077436903, | |
| "grad_norm": 0.5276590585708618, | |
| "learning_rate": 0.001, | |
| "loss": 4.2488, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.4879804807807688, | |
| "grad_norm": 0.4882962107658386, | |
| "learning_rate": 0.001, | |
| "loss": 4.2438, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.4919803207871685, | |
| "grad_norm": 0.47169601917266846, | |
| "learning_rate": 0.001, | |
| "loss": 4.2453, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.49598016079356827, | |
| "grad_norm": 0.48581433296203613, | |
| "learning_rate": 0.001, | |
| "loss": 4.2462, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.499980000799968, | |
| "grad_norm": 0.4135693609714508, | |
| "learning_rate": 0.001, | |
| "loss": 4.2468, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.5039798408063677, | |
| "grad_norm": 0.5517194271087646, | |
| "learning_rate": 0.001, | |
| "loss": 4.2432, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.5079796808127675, | |
| "grad_norm": 0.4932815134525299, | |
| "learning_rate": 0.001, | |
| "loss": 4.247, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.5119795208191672, | |
| "grad_norm": 0.4821571111679077, | |
| "learning_rate": 0.001, | |
| "loss": 4.2412, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.515979360825567, | |
| "grad_norm": 0.49198025465011597, | |
| "learning_rate": 0.001, | |
| "loss": 4.2415, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.5199792008319667, | |
| "grad_norm": 0.47645890712738037, | |
| "learning_rate": 0.001, | |
| "loss": 4.238, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.5239790408383664, | |
| "grad_norm": 0.4690765142440796, | |
| "learning_rate": 0.001, | |
| "loss": 4.2408, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.5279788808447662, | |
| "grad_norm": 0.4654984474182129, | |
| "learning_rate": 0.001, | |
| "loss": 4.2396, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.5319787208511659, | |
| "grad_norm": 0.542238712310791, | |
| "learning_rate": 0.001, | |
| "loss": 4.2381, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.5359785608575657, | |
| "grad_norm": 0.4381965100765228, | |
| "learning_rate": 0.001, | |
| "loss": 4.2369, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.5399784008639654, | |
| "grad_norm": 0.429868221282959, | |
| "learning_rate": 0.001, | |
| "loss": 4.2429, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.5439782408703652, | |
| "grad_norm": 0.4983363151550293, | |
| "learning_rate": 0.001, | |
| "loss": 4.2428, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.5479780808767649, | |
| "grad_norm": 0.4784950017929077, | |
| "learning_rate": 0.001, | |
| "loss": 4.2388, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.5519779208831647, | |
| "grad_norm": 0.5002242922782898, | |
| "learning_rate": 0.001, | |
| "loss": 4.2359, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.5559777608895644, | |
| "grad_norm": 0.51786869764328, | |
| "learning_rate": 0.001, | |
| "loss": 4.2385, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.5599776008959642, | |
| "grad_norm": 0.4682203233242035, | |
| "learning_rate": 0.001, | |
| "loss": 4.2385, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.5639774409023639, | |
| "grad_norm": 0.4645497500896454, | |
| "learning_rate": 0.001, | |
| "loss": 4.2353, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.5679772809087636, | |
| "grad_norm": 0.5080273151397705, | |
| "learning_rate": 0.001, | |
| "loss": 4.2344, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.5719771209151634, | |
| "grad_norm": 0.4649428129196167, | |
| "learning_rate": 0.001, | |
| "loss": 4.2378, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.5759769609215631, | |
| "grad_norm": 0.47332581877708435, | |
| "learning_rate": 0.001, | |
| "loss": 4.2347, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.5799768009279629, | |
| "grad_norm": 0.48201680183410645, | |
| "learning_rate": 0.001, | |
| "loss": 4.2354, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.5839766409343626, | |
| "grad_norm": 0.6143534779548645, | |
| "learning_rate": 0.001, | |
| "loss": 4.2376, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.5879764809407624, | |
| "grad_norm": 0.5474959015846252, | |
| "learning_rate": 0.001, | |
| "loss": 4.2318, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.5919763209471621, | |
| "grad_norm": 0.4949159026145935, | |
| "learning_rate": 0.001, | |
| "loss": 4.2316, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.5959761609535619, | |
| "grad_norm": 0.5090238451957703, | |
| "learning_rate": 0.001, | |
| "loss": 4.235, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.5999760009599616, | |
| "grad_norm": 0.5531513094902039, | |
| "learning_rate": 0.001, | |
| "loss": 4.2344, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.6039758409663614, | |
| "grad_norm": 0.48956552147865295, | |
| "learning_rate": 0.001, | |
| "loss": 4.2324, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.6079756809727611, | |
| "grad_norm": 0.4767073690891266, | |
| "learning_rate": 0.001, | |
| "loss": 4.2344, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.6119755209791609, | |
| "grad_norm": 0.4603271782398224, | |
| "learning_rate": 0.001, | |
| "loss": 4.2335, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.6159753609855606, | |
| "grad_norm": 0.46433115005493164, | |
| "learning_rate": 0.001, | |
| "loss": 4.2292, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.6199752009919604, | |
| "grad_norm": 0.493105411529541, | |
| "learning_rate": 0.001, | |
| "loss": 4.2317, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.6239750409983601, | |
| "grad_norm": 0.47192350029945374, | |
| "learning_rate": 0.001, | |
| "loss": 4.2328, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.6279748810047598, | |
| "grad_norm": 0.4708082675933838, | |
| "learning_rate": 0.001, | |
| "loss": 4.231, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.6319747210111596, | |
| "grad_norm": 0.45078226923942566, | |
| "learning_rate": 0.001, | |
| "loss": 4.2271, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.6359745610175593, | |
| "grad_norm": 0.4881497919559479, | |
| "learning_rate": 0.001, | |
| "loss": 4.2284, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.6399744010239591, | |
| "grad_norm": 0.5195273160934448, | |
| "learning_rate": 0.001, | |
| "loss": 4.234, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.6439742410303588, | |
| "grad_norm": 0.4363176226615906, | |
| "learning_rate": 0.001, | |
| "loss": 4.2278, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.6479740810367586, | |
| "grad_norm": 0.5371832251548767, | |
| "learning_rate": 0.001, | |
| "loss": 4.2242, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.6519739210431583, | |
| "grad_norm": 0.47699272632598877, | |
| "learning_rate": 0.001, | |
| "loss": 4.2268, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.6559737610495581, | |
| "grad_norm": 0.5306685566902161, | |
| "learning_rate": 0.001, | |
| "loss": 4.2284, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.6599736010559578, | |
| "grad_norm": 0.41332292556762695, | |
| "learning_rate": 0.001, | |
| "loss": 4.2287, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.6639734410623576, | |
| "grad_norm": 0.4745205044746399, | |
| "learning_rate": 0.001, | |
| "loss": 4.2259, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.6679732810687572, | |
| "grad_norm": 0.4349898397922516, | |
| "learning_rate": 0.001, | |
| "loss": 4.2239, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.6719731210751569, | |
| "grad_norm": 0.4942924380302429, | |
| "learning_rate": 0.001, | |
| "loss": 4.2223, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.6759729610815567, | |
| "grad_norm": 0.5019668340682983, | |
| "learning_rate": 0.001, | |
| "loss": 4.2233, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.6799728010879564, | |
| "grad_norm": 0.41412749886512756, | |
| "learning_rate": 0.001, | |
| "loss": 4.2237, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.6839726410943562, | |
| "grad_norm": 0.47330179810523987, | |
| "learning_rate": 0.001, | |
| "loss": 4.2241, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.6879724811007559, | |
| "grad_norm": 0.45668381452560425, | |
| "learning_rate": 0.001, | |
| "loss": 4.222, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.6919723211071557, | |
| "grad_norm": 0.437050461769104, | |
| "learning_rate": 0.001, | |
| "loss": 4.2226, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.6959721611135554, | |
| "grad_norm": 0.4829418659210205, | |
| "learning_rate": 0.001, | |
| "loss": 4.2252, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.6999720011199552, | |
| "grad_norm": 0.47243532538414, | |
| "learning_rate": 0.001, | |
| "loss": 4.2167, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.7039718411263549, | |
| "grad_norm": 0.4851880669593811, | |
| "learning_rate": 0.001, | |
| "loss": 4.2203, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.7079716811327547, | |
| "grad_norm": 0.5821521282196045, | |
| "learning_rate": 0.001, | |
| "loss": 4.2222, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.7119715211391544, | |
| "grad_norm": 0.4213780462741852, | |
| "learning_rate": 0.001, | |
| "loss": 4.2224, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.7159713611455542, | |
| "grad_norm": 0.4478498101234436, | |
| "learning_rate": 0.001, | |
| "loss": 4.2176, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.7199712011519539, | |
| "grad_norm": 0.41999441385269165, | |
| "learning_rate": 0.001, | |
| "loss": 4.2183, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.7239710411583536, | |
| "grad_norm": 0.4531497657299042, | |
| "learning_rate": 0.001, | |
| "loss": 4.2201, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.7279708811647534, | |
| "grad_norm": 0.5436483025550842, | |
| "learning_rate": 0.001, | |
| "loss": 4.2181, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.7319707211711531, | |
| "grad_norm": 0.5103757381439209, | |
| "learning_rate": 0.001, | |
| "loss": 4.2195, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.7359705611775529, | |
| "grad_norm": 0.5203491449356079, | |
| "learning_rate": 0.001, | |
| "loss": 4.2197, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.7399704011839526, | |
| "grad_norm": 0.5450323224067688, | |
| "learning_rate": 0.001, | |
| "loss": 4.2175, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.7439702411903524, | |
| "grad_norm": 0.4617632031440735, | |
| "learning_rate": 0.001, | |
| "loss": 4.2136, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.7479700811967521, | |
| "grad_norm": 0.477172315120697, | |
| "learning_rate": 0.001, | |
| "loss": 4.2166, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.7519699212031519, | |
| "grad_norm": 0.538207471370697, | |
| "learning_rate": 0.001, | |
| "loss": 4.2189, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.7559697612095516, | |
| "grad_norm": 0.39729467034339905, | |
| "learning_rate": 0.001, | |
| "loss": 4.2162, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.7599696012159514, | |
| "grad_norm": 0.4556116759777069, | |
| "learning_rate": 0.001, | |
| "loss": 4.2177, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.7639694412223511, | |
| "grad_norm": 0.48764076828956604, | |
| "learning_rate": 0.001, | |
| "loss": 4.2178, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.7679692812287509, | |
| "grad_norm": 0.5256556272506714, | |
| "learning_rate": 0.001, | |
| "loss": 4.2147, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.7719691212351506, | |
| "grad_norm": 0.48659247159957886, | |
| "learning_rate": 0.001, | |
| "loss": 4.2167, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.7759689612415503, | |
| "grad_norm": 0.4753814935684204, | |
| "learning_rate": 0.001, | |
| "loss": 4.2143, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.7799688012479501, | |
| "grad_norm": 0.47923025488853455, | |
| "learning_rate": 0.001, | |
| "loss": 4.2154, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.7839686412543498, | |
| "grad_norm": 0.5025440454483032, | |
| "learning_rate": 0.001, | |
| "loss": 4.2147, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.7879684812607496, | |
| "grad_norm": 0.5111387968063354, | |
| "learning_rate": 0.001, | |
| "loss": 4.2162, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.7919683212671493, | |
| "grad_norm": 0.5092292428016663, | |
| "learning_rate": 0.001, | |
| "loss": 4.2147, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.7959681612735491, | |
| "grad_norm": 0.4506489634513855, | |
| "learning_rate": 0.001, | |
| "loss": 4.2158, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.7999680012799488, | |
| "grad_norm": 0.43973225355148315, | |
| "learning_rate": 0.001, | |
| "loss": 4.2122, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.8039678412863486, | |
| "grad_norm": 0.46984151005744934, | |
| "learning_rate": 0.001, | |
| "loss": 4.2125, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.8079676812927483, | |
| "grad_norm": 0.4673251509666443, | |
| "learning_rate": 0.001, | |
| "loss": 4.2122, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.8119675212991481, | |
| "grad_norm": 0.5964290499687195, | |
| "learning_rate": 0.001, | |
| "loss": 4.2097, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.8159673613055478, | |
| "grad_norm": 0.4411192536354065, | |
| "learning_rate": 0.001, | |
| "loss": 4.208, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.8199672013119476, | |
| "grad_norm": 0.4942370355129242, | |
| "learning_rate": 0.001, | |
| "loss": 4.2119, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.8239670413183473, | |
| "grad_norm": 0.4818095862865448, | |
| "learning_rate": 0.001, | |
| "loss": 4.2103, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.827966881324747, | |
| "grad_norm": 0.43105003237724304, | |
| "learning_rate": 0.001, | |
| "loss": 4.2108, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.8319667213311468, | |
| "grad_norm": 0.447951078414917, | |
| "learning_rate": 0.001, | |
| "loss": 4.2137, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.8359665613375465, | |
| "grad_norm": 0.4796685576438904, | |
| "learning_rate": 0.001, | |
| "loss": 4.212, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.8399664013439463, | |
| "grad_norm": 0.443522572517395, | |
| "learning_rate": 0.001, | |
| "loss": 4.2076, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.843966241350346, | |
| "grad_norm": 0.4776618778705597, | |
| "learning_rate": 0.001, | |
| "loss": 4.2128, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.8479660813567458, | |
| "grad_norm": 0.45614078640937805, | |
| "learning_rate": 0.001, | |
| "loss": 4.2061, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.8519659213631455, | |
| "grad_norm": 0.574044942855835, | |
| "learning_rate": 0.001, | |
| "loss": 4.2088, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.8559657613695452, | |
| "grad_norm": 0.6252190470695496, | |
| "learning_rate": 0.001, | |
| "loss": 4.2092, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.8599656013759449, | |
| "grad_norm": 0.4226459860801697, | |
| "learning_rate": 0.001, | |
| "loss": 4.2075, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.8639654413823447, | |
| "grad_norm": 0.45728734135627747, | |
| "learning_rate": 0.001, | |
| "loss": 4.2086, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.8679652813887444, | |
| "grad_norm": 0.486545592546463, | |
| "learning_rate": 0.001, | |
| "loss": 4.2108, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.8719651213951441, | |
| "grad_norm": 0.45706841349601746, | |
| "learning_rate": 0.001, | |
| "loss": 4.2101, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.8759649614015439, | |
| "grad_norm": 0.48993363976478577, | |
| "learning_rate": 0.001, | |
| "loss": 4.2092, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.8799648014079436, | |
| "grad_norm": 0.42304420471191406, | |
| "learning_rate": 0.001, | |
| "loss": 4.213, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.8839646414143434, | |
| "grad_norm": 0.4448954463005066, | |
| "learning_rate": 0.001, | |
| "loss": 4.2066, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.8879644814207431, | |
| "grad_norm": 0.4916422367095947, | |
| "learning_rate": 0.001, | |
| "loss": 4.2028, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.8919643214271429, | |
| "grad_norm": 0.44371068477630615, | |
| "learning_rate": 0.001, | |
| "loss": 4.2084, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.8959641614335426, | |
| "grad_norm": 0.42282700538635254, | |
| "learning_rate": 0.001, | |
| "loss": 4.204, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.8999640014399424, | |
| "grad_norm": 0.40497201681137085, | |
| "learning_rate": 0.001, | |
| "loss": 4.2057, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.9039638414463421, | |
| "grad_norm": 0.4718570113182068, | |
| "learning_rate": 0.001, | |
| "loss": 4.2096, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.9079636814527419, | |
| "grad_norm": 0.4425433576107025, | |
| "learning_rate": 0.001, | |
| "loss": 4.2071, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.9119635214591416, | |
| "grad_norm": 0.5133687853813171, | |
| "learning_rate": 0.001, | |
| "loss": 4.2074, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.9159633614655414, | |
| "grad_norm": 0.5017001032829285, | |
| "learning_rate": 0.001, | |
| "loss": 4.2065, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.9199632014719411, | |
| "grad_norm": 0.47720518708229065, | |
| "learning_rate": 0.001, | |
| "loss": 4.2068, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.9239630414783409, | |
| "grad_norm": 0.5085333585739136, | |
| "learning_rate": 0.001, | |
| "loss": 4.2069, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.9279628814847406, | |
| "grad_norm": 0.451284259557724, | |
| "learning_rate": 0.001, | |
| "loss": 4.2052, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.9319627214911403, | |
| "grad_norm": 0.4576238691806793, | |
| "learning_rate": 0.001, | |
| "loss": 4.205, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.9359625614975401, | |
| "grad_norm": 0.45982882380485535, | |
| "learning_rate": 0.001, | |
| "loss": 4.2013, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.9399624015039398, | |
| "grad_norm": 0.5430781841278076, | |
| "learning_rate": 0.001, | |
| "loss": 4.2077, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.9439622415103396, | |
| "grad_norm": 0.49019157886505127, | |
| "learning_rate": 0.001, | |
| "loss": 4.2052, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.9479620815167393, | |
| "grad_norm": 0.4381950795650482, | |
| "learning_rate": 0.001, | |
| "loss": 4.2023, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.9519619215231391, | |
| "grad_norm": 0.5325062870979309, | |
| "learning_rate": 0.001, | |
| "loss": 4.2044, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.9559617615295388, | |
| "grad_norm": 0.4855589270591736, | |
| "learning_rate": 0.001, | |
| "loss": 4.205, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.9599616015359386, | |
| "grad_norm": 0.4132635295391083, | |
| "learning_rate": 0.001, | |
| "loss": 4.2018, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.9639614415423383, | |
| "grad_norm": 0.4958603084087372, | |
| "learning_rate": 0.001, | |
| "loss": 4.2027, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.9679612815487381, | |
| "grad_norm": 0.44566038250923157, | |
| "learning_rate": 0.001, | |
| "loss": 4.2043, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.9719611215551378, | |
| "grad_norm": 0.4078667163848877, | |
| "learning_rate": 0.001, | |
| "loss": 4.2044, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.9759609615615376, | |
| "grad_norm": 0.48166027665138245, | |
| "learning_rate": 0.001, | |
| "loss": 4.2022, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.9799608015679373, | |
| "grad_norm": 0.472896933555603, | |
| "learning_rate": 0.001, | |
| "loss": 4.2078, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.983960641574337, | |
| "grad_norm": 0.4770311117172241, | |
| "learning_rate": 0.001, | |
| "loss": 4.1982, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.9879604815807368, | |
| "grad_norm": 0.4926893413066864, | |
| "learning_rate": 0.001, | |
| "loss": 4.2014, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.9919603215871365, | |
| "grad_norm": 0.4080910086631775, | |
| "learning_rate": 0.001, | |
| "loss": 4.1947, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.9959601615935363, | |
| "grad_norm": 0.5428063273429871, | |
| "learning_rate": 0.001, | |
| "loss": 4.1999, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.999960001599936, | |
| "grad_norm": 0.4776434600353241, | |
| "learning_rate": 0.001, | |
| "loss": 4.2004, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.0039598416063358, | |
| "grad_norm": 0.5323604941368103, | |
| "learning_rate": 0.001, | |
| "loss": 4.1961, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 1.0079596816127354, | |
| "grad_norm": 0.5068919062614441, | |
| "learning_rate": 0.001, | |
| "loss": 4.1951, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 1.0119595216191353, | |
| "grad_norm": 0.439967542886734, | |
| "learning_rate": 0.001, | |
| "loss": 4.2012, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 1.015959361625535, | |
| "grad_norm": 0.5373870730400085, | |
| "learning_rate": 0.001, | |
| "loss": 4.1967, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 1.0199592016319348, | |
| "grad_norm": 0.45972001552581787, | |
| "learning_rate": 0.001, | |
| "loss": 4.198, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 1.0239590416383344, | |
| "grad_norm": 0.45675376057624817, | |
| "learning_rate": 0.001, | |
| "loss": 4.1974, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 1.0279588816447343, | |
| "grad_norm": 0.5330101847648621, | |
| "learning_rate": 0.001, | |
| "loss": 4.1964, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 1.031958721651134, | |
| "grad_norm": 0.557739794254303, | |
| "learning_rate": 0.001, | |
| "loss": 4.196, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 1.0359585616575337, | |
| "grad_norm": 0.4591217339038849, | |
| "learning_rate": 0.001, | |
| "loss": 4.1974, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 1.0399584016639334, | |
| "grad_norm": 0.43261614441871643, | |
| "learning_rate": 0.001, | |
| "loss": 4.1981, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.0439582416703332, | |
| "grad_norm": 0.4880464971065521, | |
| "learning_rate": 0.001, | |
| "loss": 4.1994, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 1.0479580816767329, | |
| "grad_norm": 0.48199212551116943, | |
| "learning_rate": 0.001, | |
| "loss": 4.1936, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 1.0519579216831327, | |
| "grad_norm": 0.5580593943595886, | |
| "learning_rate": 0.001, | |
| "loss": 4.1972, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 1.0559577616895324, | |
| "grad_norm": 0.44014519453048706, | |
| "learning_rate": 0.001, | |
| "loss": 4.1955, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 1.0599576016959322, | |
| "grad_norm": 0.5002579092979431, | |
| "learning_rate": 0.001, | |
| "loss": 4.2022, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 1.0639574417023319, | |
| "grad_norm": 0.4530857503414154, | |
| "learning_rate": 0.001, | |
| "loss": 4.1945, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 1.0679572817087317, | |
| "grad_norm": 0.4876604676246643, | |
| "learning_rate": 0.001, | |
| "loss": 4.1971, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 1.0719571217151314, | |
| "grad_norm": 0.4442366063594818, | |
| "learning_rate": 0.001, | |
| "loss": 4.1941, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 1.0759569617215312, | |
| "grad_norm": 0.42312711477279663, | |
| "learning_rate": 0.001, | |
| "loss": 4.1976, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 1.0799568017279308, | |
| "grad_norm": 0.49312129616737366, | |
| "learning_rate": 0.001, | |
| "loss": 4.1946, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.0839566417343307, | |
| "grad_norm": 0.4688827693462372, | |
| "learning_rate": 0.001, | |
| "loss": 4.1944, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 1.0879564817407303, | |
| "grad_norm": 0.48417580127716064, | |
| "learning_rate": 0.001, | |
| "loss": 4.1975, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 1.0919563217471302, | |
| "grad_norm": 0.4930320382118225, | |
| "learning_rate": 0.001, | |
| "loss": 4.1957, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 1.0959561617535298, | |
| "grad_norm": 0.5079306364059448, | |
| "learning_rate": 0.001, | |
| "loss": 4.1978, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 1.0999560017599297, | |
| "grad_norm": 0.5758777856826782, | |
| "learning_rate": 0.001, | |
| "loss": 4.1953, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 1.1039558417663293, | |
| "grad_norm": 0.49672508239746094, | |
| "learning_rate": 0.001, | |
| "loss": 4.1972, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 1.1079556817727292, | |
| "grad_norm": 0.4356079399585724, | |
| "learning_rate": 0.001, | |
| "loss": 4.1941, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 1.1119555217791288, | |
| "grad_norm": 0.44307178258895874, | |
| "learning_rate": 0.001, | |
| "loss": 4.1954, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 1.1159553617855287, | |
| "grad_norm": 0.5404129028320312, | |
| "learning_rate": 0.001, | |
| "loss": 4.1914, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 1.1199552017919283, | |
| "grad_norm": 0.47977906465530396, | |
| "learning_rate": 0.001, | |
| "loss": 4.1924, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.1239550417983282, | |
| "grad_norm": 0.4677433371543884, | |
| "learning_rate": 0.001, | |
| "loss": 4.1941, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 1.1279548818047278, | |
| "grad_norm": 0.6071330904960632, | |
| "learning_rate": 0.001, | |
| "loss": 4.1906, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 1.1319547218111277, | |
| "grad_norm": 0.48553600907325745, | |
| "learning_rate": 0.001, | |
| "loss": 4.1961, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 1.1359545618175273, | |
| "grad_norm": 0.4587904214859009, | |
| "learning_rate": 0.001, | |
| "loss": 4.1948, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 1.139954401823927, | |
| "grad_norm": 0.4619959890842438, | |
| "learning_rate": 0.001, | |
| "loss": 4.1905, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 1.1439542418303268, | |
| "grad_norm": 0.5305209755897522, | |
| "learning_rate": 0.001, | |
| "loss": 4.1918, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 1.1479540818367266, | |
| "grad_norm": 0.46056920289993286, | |
| "learning_rate": 0.001, | |
| "loss": 4.1918, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 1.1519539218431263, | |
| "grad_norm": 0.48591580986976624, | |
| "learning_rate": 0.001, | |
| "loss": 4.1965, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 1.155953761849526, | |
| "grad_norm": 0.5184019804000854, | |
| "learning_rate": 0.001, | |
| "loss": 4.1925, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 1.1599536018559258, | |
| "grad_norm": 0.44365832209587097, | |
| "learning_rate": 0.001, | |
| "loss": 4.1933, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.1639534418623254, | |
| "grad_norm": 0.5565987825393677, | |
| "learning_rate": 0.001, | |
| "loss": 4.187, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 1.1679532818687253, | |
| "grad_norm": 0.4826023280620575, | |
| "learning_rate": 0.001, | |
| "loss": 4.1955, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 1.171953121875125, | |
| "grad_norm": 0.5205375552177429, | |
| "learning_rate": 0.001, | |
| "loss": 4.1923, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 1.1759529618815248, | |
| "grad_norm": 0.5183901190757751, | |
| "learning_rate": 0.001, | |
| "loss": 4.1975, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 1.1799528018879244, | |
| "grad_norm": 0.49648305773735046, | |
| "learning_rate": 0.001, | |
| "loss": 4.193, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 1.1839526418943243, | |
| "grad_norm": 0.4555068612098694, | |
| "learning_rate": 0.001, | |
| "loss": 4.1908, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 1.1879524819007239, | |
| "grad_norm": 0.48755526542663574, | |
| "learning_rate": 0.001, | |
| "loss": 4.1925, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 1.1919523219071237, | |
| "grad_norm": 0.4887760281562805, | |
| "learning_rate": 0.001, | |
| "loss": 4.1881, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 1.1959521619135234, | |
| "grad_norm": 0.5118262767791748, | |
| "learning_rate": 0.001, | |
| "loss": 4.1877, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 1.1999520019199232, | |
| "grad_norm": 0.45270290970802307, | |
| "learning_rate": 0.001, | |
| "loss": 4.1933, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.2039518419263229, | |
| "grad_norm": 0.5188767910003662, | |
| "learning_rate": 0.001, | |
| "loss": 4.1932, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 1.2079516819327227, | |
| "grad_norm": 0.53879714012146, | |
| "learning_rate": 0.001, | |
| "loss": 4.1869, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 1.2119515219391224, | |
| "grad_norm": 0.5128753185272217, | |
| "learning_rate": 0.001, | |
| "loss": 4.1901, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 1.2159513619455222, | |
| "grad_norm": 0.3823694586753845, | |
| "learning_rate": 0.001, | |
| "loss": 4.1905, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 1.2199512019519219, | |
| "grad_norm": 0.4704561233520508, | |
| "learning_rate": 0.001, | |
| "loss": 4.1856, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 1.2239510419583217, | |
| "grad_norm": 0.4269457459449768, | |
| "learning_rate": 0.001, | |
| "loss": 4.1918, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 1.2279508819647214, | |
| "grad_norm": 0.44246116280555725, | |
| "learning_rate": 0.001, | |
| "loss": 4.1915, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 1.2319507219711212, | |
| "grad_norm": 0.45588257908821106, | |
| "learning_rate": 0.001, | |
| "loss": 4.1887, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 1.2359505619775208, | |
| "grad_norm": 0.5354055166244507, | |
| "learning_rate": 0.001, | |
| "loss": 4.1916, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 1.2399504019839207, | |
| "grad_norm": 0.48199784755706787, | |
| "learning_rate": 0.001, | |
| "loss": 4.1908, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.2439502419903203, | |
| "grad_norm": 0.48949673771858215, | |
| "learning_rate": 0.001, | |
| "loss": 4.1891, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 1.2479500819967202, | |
| "grad_norm": 0.49601200222969055, | |
| "learning_rate": 0.001, | |
| "loss": 4.1873, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 1.2519499220031198, | |
| "grad_norm": 0.4721723198890686, | |
| "learning_rate": 0.001, | |
| "loss": 4.1879, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 1.2559497620095197, | |
| "grad_norm": 0.44374367594718933, | |
| "learning_rate": 0.001, | |
| "loss": 4.1917, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 1.2599496020159193, | |
| "grad_norm": 0.48409733176231384, | |
| "learning_rate": 0.001, | |
| "loss": 4.1839, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 1.2639494420223192, | |
| "grad_norm": 0.4843854010105133, | |
| "learning_rate": 0.001, | |
| "loss": 4.1898, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 1.2679492820287188, | |
| "grad_norm": 0.45039990544319153, | |
| "learning_rate": 0.001, | |
| "loss": 4.1891, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 1.2719491220351187, | |
| "grad_norm": 0.3904966413974762, | |
| "learning_rate": 0.001, | |
| "loss": 4.1872, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 1.2759489620415183, | |
| "grad_norm": 0.4539620876312256, | |
| "learning_rate": 0.001, | |
| "loss": 4.1836, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 1.2799488020479182, | |
| "grad_norm": 0.46595314145088196, | |
| "learning_rate": 0.001, | |
| "loss": 4.19, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.2839486420543178, | |
| "grad_norm": 0.4878152012825012, | |
| "learning_rate": 0.001, | |
| "loss": 4.1848, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 1.2879484820607177, | |
| "grad_norm": 0.5768002271652222, | |
| "learning_rate": 0.001, | |
| "loss": 4.1912, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 1.2919483220671173, | |
| "grad_norm": 0.43661263585090637, | |
| "learning_rate": 0.001, | |
| "loss": 4.1894, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 1.295948162073517, | |
| "grad_norm": 0.4612700939178467, | |
| "learning_rate": 0.001, | |
| "loss": 4.1884, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 1.2999480020799168, | |
| "grad_norm": 0.4451783001422882, | |
| "learning_rate": 0.001, | |
| "loss": 4.1886, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.3039478420863166, | |
| "grad_norm": 0.5825871825218201, | |
| "learning_rate": 0.001, | |
| "loss": 4.191, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 1.3079476820927163, | |
| "grad_norm": 0.5060557126998901, | |
| "learning_rate": 0.001, | |
| "loss": 4.1858, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 1.311947522099116, | |
| "grad_norm": 0.49111461639404297, | |
| "learning_rate": 0.001, | |
| "loss": 4.1861, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 1.3159473621055158, | |
| "grad_norm": 0.511899471282959, | |
| "learning_rate": 0.001, | |
| "loss": 4.1901, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 1.3199472021119156, | |
| "grad_norm": 0.5053913593292236, | |
| "learning_rate": 0.001, | |
| "loss": 4.1885, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 1.3239470421183153, | |
| "grad_norm": 0.55963534116745, | |
| "learning_rate": 0.001, | |
| "loss": 4.1868, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 1.327946882124715, | |
| "grad_norm": 0.5135225653648376, | |
| "learning_rate": 0.001, | |
| "loss": 4.19, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 1.3319467221311148, | |
| "grad_norm": 0.5401255488395691, | |
| "learning_rate": 0.001, | |
| "loss": 4.1892, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 1.3359465621375146, | |
| "grad_norm": 0.5370189547538757, | |
| "learning_rate": 0.001, | |
| "loss": 4.1887, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 1.3399464021439143, | |
| "grad_norm": 0.452307790517807, | |
| "learning_rate": 0.001, | |
| "loss": 4.1837, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 1.3439462421503139, | |
| "grad_norm": 0.4923325777053833, | |
| "learning_rate": 0.001, | |
| "loss": 4.1876, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 1.3479460821567137, | |
| "grad_norm": 0.4178541600704193, | |
| "learning_rate": 0.001, | |
| "loss": 4.1859, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 1.3519459221631136, | |
| "grad_norm": 0.43804001808166504, | |
| "learning_rate": 0.001, | |
| "loss": 4.1859, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 1.3559457621695132, | |
| "grad_norm": 0.4893229901790619, | |
| "learning_rate": 0.001, | |
| "loss": 4.185, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 1.3599456021759129, | |
| "grad_norm": 0.43529701232910156, | |
| "learning_rate": 0.001, | |
| "loss": 4.1807, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.3639454421823127, | |
| "grad_norm": 0.4353291094303131, | |
| "learning_rate": 0.001, | |
| "loss": 4.182, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 1.3679452821887126, | |
| "grad_norm": 0.4755658507347107, | |
| "learning_rate": 0.001, | |
| "loss": 4.1849, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 1.3719451221951122, | |
| "grad_norm": 0.5512502193450928, | |
| "learning_rate": 0.001, | |
| "loss": 4.1857, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 1.3759449622015119, | |
| "grad_norm": 0.4462525546550751, | |
| "learning_rate": 0.001, | |
| "loss": 4.184, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 1.3799448022079117, | |
| "grad_norm": 0.5683126449584961, | |
| "learning_rate": 0.001, | |
| "loss": 4.1849, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 1.3839446422143113, | |
| "grad_norm": 0.4847952723503113, | |
| "learning_rate": 0.001, | |
| "loss": 4.1809, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 1.3879444822207112, | |
| "grad_norm": 0.5147800445556641, | |
| "learning_rate": 0.001, | |
| "loss": 4.182, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 1.3919443222271108, | |
| "grad_norm": 0.49664029479026794, | |
| "learning_rate": 0.001, | |
| "loss": 4.1826, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 1.3959441622335107, | |
| "grad_norm": 0.4566904902458191, | |
| "learning_rate": 0.001, | |
| "loss": 4.1855, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 1.3999440022399103, | |
| "grad_norm": 0.4743303954601288, | |
| "learning_rate": 0.001, | |
| "loss": 4.1854, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.4039438422463102, | |
| "grad_norm": 0.478000670671463, | |
| "learning_rate": 0.001, | |
| "loss": 4.187, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 1.4079436822527098, | |
| "grad_norm": 0.43782198429107666, | |
| "learning_rate": 0.001, | |
| "loss": 4.1808, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 1.4119435222591097, | |
| "grad_norm": 0.46672046184539795, | |
| "learning_rate": 0.001, | |
| "loss": 4.1814, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 1.4159433622655093, | |
| "grad_norm": 0.48321112990379333, | |
| "learning_rate": 0.001, | |
| "loss": 4.1814, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 1.4199432022719092, | |
| "grad_norm": 0.48716631531715393, | |
| "learning_rate": 0.001, | |
| "loss": 4.1831, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 1.4239430422783088, | |
| "grad_norm": 0.46520668268203735, | |
| "learning_rate": 0.001, | |
| "loss": 4.1823, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 1.4279428822847087, | |
| "grad_norm": 0.4660239815711975, | |
| "learning_rate": 0.001, | |
| "loss": 4.1845, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 1.4319427222911083, | |
| "grad_norm": 0.5418950319290161, | |
| "learning_rate": 0.001, | |
| "loss": 4.1828, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 1.4359425622975082, | |
| "grad_norm": 0.45179441571235657, | |
| "learning_rate": 0.001, | |
| "loss": 4.1821, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 1.4399424023039078, | |
| "grad_norm": 0.5119227766990662, | |
| "learning_rate": 0.001, | |
| "loss": 4.183, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.4439422423103077, | |
| "grad_norm": 0.4730793237686157, | |
| "learning_rate": 0.001, | |
| "loss": 4.1813, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 1.4479420823167073, | |
| "grad_norm": 0.4840889275074005, | |
| "learning_rate": 0.001, | |
| "loss": 4.182, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 1.4519419223231071, | |
| "grad_norm": 0.4688670039176941, | |
| "learning_rate": 0.001, | |
| "loss": 4.1828, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 1.4559417623295068, | |
| "grad_norm": 0.4670471251010895, | |
| "learning_rate": 0.001, | |
| "loss": 4.1784, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 1.4599416023359066, | |
| "grad_norm": 0.4444526731967926, | |
| "learning_rate": 0.001, | |
| "loss": 4.1793, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 1.4639414423423063, | |
| "grad_norm": 0.43045392632484436, | |
| "learning_rate": 0.001, | |
| "loss": 4.1814, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 1.4679412823487061, | |
| "grad_norm": 0.491200715303421, | |
| "learning_rate": 0.001, | |
| "loss": 4.1848, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 1.4719411223551058, | |
| "grad_norm": 0.4605400264263153, | |
| "learning_rate": 0.001, | |
| "loss": 4.1831, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 1.4759409623615056, | |
| "grad_norm": 0.47693344950675964, | |
| "learning_rate": 0.001, | |
| "loss": 4.1797, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 1.4799408023679053, | |
| "grad_norm": 0.5456331968307495, | |
| "learning_rate": 0.001, | |
| "loss": 4.1773, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.483940642374305, | |
| "grad_norm": 0.42828118801116943, | |
| "learning_rate": 0.001, | |
| "loss": 4.1794, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 1.4879404823807048, | |
| "grad_norm": 0.47602272033691406, | |
| "learning_rate": 0.001, | |
| "loss": 4.1821, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 1.4919403223871046, | |
| "grad_norm": 0.5995456576347351, | |
| "learning_rate": 0.001, | |
| "loss": 4.1799, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 1.4959401623935042, | |
| "grad_norm": 0.5107753276824951, | |
| "learning_rate": 0.001, | |
| "loss": 4.1838, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 1.4999400023999039, | |
| "grad_norm": 0.5625353455543518, | |
| "learning_rate": 0.001, | |
| "loss": 4.1823, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 1.5039398424063037, | |
| "grad_norm": 0.4833304286003113, | |
| "learning_rate": 0.001, | |
| "loss": 4.1825, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 1.5079396824127036, | |
| "grad_norm": 0.4333184063434601, | |
| "learning_rate": 0.001, | |
| "loss": 4.1797, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 1.5119395224191032, | |
| "grad_norm": 0.45237982273101807, | |
| "learning_rate": 0.001, | |
| "loss": 4.1804, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 1.5159393624255029, | |
| "grad_norm": 0.5843102335929871, | |
| "learning_rate": 0.001, | |
| "loss": 4.1771, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 1.5199392024319027, | |
| "grad_norm": 0.5091027021408081, | |
| "learning_rate": 0.001, | |
| "loss": 4.1825, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.5239390424383026, | |
| "grad_norm": 0.4457857310771942, | |
| "learning_rate": 0.001, | |
| "loss": 4.1784, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 1.5279388824447022, | |
| "grad_norm": 0.48936015367507935, | |
| "learning_rate": 0.001, | |
| "loss": 4.1798, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 1.5319387224511019, | |
| "grad_norm": 0.5162155032157898, | |
| "learning_rate": 0.001, | |
| "loss": 4.1824, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 1.5359385624575017, | |
| "grad_norm": 0.4464411735534668, | |
| "learning_rate": 0.001, | |
| "loss": 4.1808, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 1.5399384024639016, | |
| "grad_norm": 0.47520169615745544, | |
| "learning_rate": 0.001, | |
| "loss": 4.1844, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 1.5439382424703012, | |
| "grad_norm": 0.5208662152290344, | |
| "learning_rate": 0.001, | |
| "loss": 4.1771, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 1.5479380824767008, | |
| "grad_norm": 0.4846671223640442, | |
| "learning_rate": 0.001, | |
| "loss": 4.1782, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 1.5519379224831007, | |
| "grad_norm": 0.5209333300590515, | |
| "learning_rate": 0.001, | |
| "loss": 4.1786, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 1.5559377624895006, | |
| "grad_norm": 0.4502977430820465, | |
| "learning_rate": 0.001, | |
| "loss": 4.1803, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 1.5599376024959002, | |
| "grad_norm": 0.4156093895435333, | |
| "learning_rate": 0.001, | |
| "loss": 4.1785, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 1.5639374425022998, | |
| "grad_norm": 0.49340036511421204, | |
| "learning_rate": 0.001, | |
| "loss": 4.1802, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 1.5679372825086997, | |
| "grad_norm": 0.45686131715774536, | |
| "learning_rate": 0.001, | |
| "loss": 4.1772, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 1.5719371225150995, | |
| "grad_norm": 0.564764142036438, | |
| "learning_rate": 0.001, | |
| "loss": 4.1759, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 1.5759369625214992, | |
| "grad_norm": 0.5391719341278076, | |
| "learning_rate": 0.001, | |
| "loss": 4.1808, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 1.5799368025278988, | |
| "grad_norm": 0.5221198797225952, | |
| "learning_rate": 0.001, | |
| "loss": 4.1823, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 1.5839366425342987, | |
| "grad_norm": 0.4251661002635956, | |
| "learning_rate": 0.001, | |
| "loss": 4.1771, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 1.5879364825406985, | |
| "grad_norm": 0.382951021194458, | |
| "learning_rate": 0.001, | |
| "loss": 4.1776, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 1.5919363225470982, | |
| "grad_norm": 0.40156203508377075, | |
| "learning_rate": 0.001, | |
| "loss": 4.1793, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 1.5959361625534978, | |
| "grad_norm": 0.4980160593986511, | |
| "learning_rate": 0.001, | |
| "loss": 4.179, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 1.5999360025598977, | |
| "grad_norm": 0.5159147381782532, | |
| "learning_rate": 0.001, | |
| "loss": 4.179, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.6039358425662975, | |
| "grad_norm": 0.4275522828102112, | |
| "learning_rate": 0.001, | |
| "loss": 4.1768, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 1.6079356825726971, | |
| "grad_norm": 0.4483228027820587, | |
| "learning_rate": 0.001, | |
| "loss": 4.1762, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 1.6119355225790968, | |
| "grad_norm": 0.5833166241645813, | |
| "learning_rate": 0.001, | |
| "loss": 4.178, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 1.6159353625854966, | |
| "grad_norm": 0.4804055988788605, | |
| "learning_rate": 0.001, | |
| "loss": 4.1796, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 1.6199352025918963, | |
| "grad_norm": 0.46036186814308167, | |
| "learning_rate": 0.001, | |
| "loss": 4.1773, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 1.623935042598296, | |
| "grad_norm": 0.43077051639556885, | |
| "learning_rate": 0.001, | |
| "loss": 4.1768, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 1.6279348826046958, | |
| "grad_norm": 0.5465964674949646, | |
| "learning_rate": 0.001, | |
| "loss": 4.1765, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 1.6319347226110956, | |
| "grad_norm": 0.469560444355011, | |
| "learning_rate": 0.001, | |
| "loss": 4.178, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 1.6359345626174953, | |
| "grad_norm": 0.48708251118659973, | |
| "learning_rate": 0.001, | |
| "loss": 4.1791, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 1.639934402623895, | |
| "grad_norm": 0.43754613399505615, | |
| "learning_rate": 0.001, | |
| "loss": 4.1763, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 1.6439342426302948, | |
| "grad_norm": 0.392625629901886, | |
| "learning_rate": 0.001, | |
| "loss": 4.1813, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 1.6479340826366946, | |
| "grad_norm": 0.46056312322616577, | |
| "learning_rate": 0.001, | |
| "loss": 4.1798, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 1.6519339226430942, | |
| "grad_norm": 0.4411376118659973, | |
| "learning_rate": 0.001, | |
| "loss": 4.1727, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 1.6559337626494939, | |
| "grad_norm": 0.5168668031692505, | |
| "learning_rate": 0.001, | |
| "loss": 4.1765, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 1.6599336026558937, | |
| "grad_norm": 0.4493384063243866, | |
| "learning_rate": 0.001, | |
| "loss": 4.1726, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 1.6639334426622936, | |
| "grad_norm": 0.502347469329834, | |
| "learning_rate": 0.001, | |
| "loss": 4.1793, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 1.6679332826686932, | |
| "grad_norm": 0.4458249509334564, | |
| "learning_rate": 0.001, | |
| "loss": 4.1787, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 1.6719331226750929, | |
| "grad_norm": 0.4660811126232147, | |
| "learning_rate": 0.001, | |
| "loss": 4.1791, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 1.6759329626814927, | |
| "grad_norm": 0.5625722408294678, | |
| "learning_rate": 0.001, | |
| "loss": 4.1754, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 1.6799328026878926, | |
| "grad_norm": 0.47896459698677063, | |
| "learning_rate": 0.001, | |
| "loss": 4.1733, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.6839326426942922, | |
| "grad_norm": 0.42776668071746826, | |
| "learning_rate": 0.001, | |
| "loss": 4.1725, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 1.6879324827006918, | |
| "grad_norm": 0.47714999318122864, | |
| "learning_rate": 0.001, | |
| "loss": 4.1746, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 1.6919323227070917, | |
| "grad_norm": 0.5495074987411499, | |
| "learning_rate": 0.001, | |
| "loss": 4.1771, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 1.6959321627134916, | |
| "grad_norm": 0.48492980003356934, | |
| "learning_rate": 0.001, | |
| "loss": 4.1737, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 1.6999320027198912, | |
| "grad_norm": 0.45363664627075195, | |
| "learning_rate": 0.001, | |
| "loss": 4.1751, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 1.7039318427262908, | |
| "grad_norm": 0.4112115800380707, | |
| "learning_rate": 0.001, | |
| "loss": 4.1751, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 1.7079316827326907, | |
| "grad_norm": 0.4674376845359802, | |
| "learning_rate": 0.001, | |
| "loss": 4.1755, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 1.7119315227390905, | |
| "grad_norm": 0.4602874219417572, | |
| "learning_rate": 0.001, | |
| "loss": 4.1748, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 1.7159313627454902, | |
| "grad_norm": 0.46376627683639526, | |
| "learning_rate": 0.001, | |
| "loss": 4.1768, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 1.7199312027518898, | |
| "grad_norm": 0.7872702479362488, | |
| "learning_rate": 0.001, | |
| "loss": 4.1786, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 1.7239310427582897, | |
| "grad_norm": 0.4959052801132202, | |
| "learning_rate": 0.001, | |
| "loss": 4.1758, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 1.7279308827646895, | |
| "grad_norm": 0.47499415278434753, | |
| "learning_rate": 0.001, | |
| "loss": 4.1766, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 1.7319307227710892, | |
| "grad_norm": 0.37570834159851074, | |
| "learning_rate": 0.001, | |
| "loss": 4.1761, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 1.7359305627774888, | |
| "grad_norm": 0.5071618556976318, | |
| "learning_rate": 0.001, | |
| "loss": 4.1759, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 1.7399304027838887, | |
| "grad_norm": 0.4444867670536041, | |
| "learning_rate": 0.001, | |
| "loss": 4.1724, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 1.7439302427902885, | |
| "grad_norm": 0.4530576467514038, | |
| "learning_rate": 0.001, | |
| "loss": 4.1734, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 1.7479300827966882, | |
| "grad_norm": 0.39011409878730774, | |
| "learning_rate": 0.001, | |
| "loss": 4.1773, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 1.7519299228030878, | |
| "grad_norm": 0.4495677351951599, | |
| "learning_rate": 0.001, | |
| "loss": 4.175, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 1.7559297628094876, | |
| "grad_norm": 0.5421786308288574, | |
| "learning_rate": 0.001, | |
| "loss": 4.1754, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 1.7599296028158875, | |
| "grad_norm": 0.4947051405906677, | |
| "learning_rate": 0.001, | |
| "loss": 4.1744, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 1.7639294428222871, | |
| "grad_norm": 0.42439621686935425, | |
| "learning_rate": 0.001, | |
| "loss": 4.175, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 1.7679292828286868, | |
| "grad_norm": 0.4526050090789795, | |
| "learning_rate": 0.001, | |
| "loss": 4.172, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 1.7719291228350866, | |
| "grad_norm": 0.4238271117210388, | |
| "learning_rate": 0.001, | |
| "loss": 4.1733, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 1.7759289628414865, | |
| "grad_norm": 0.4912482500076294, | |
| "learning_rate": 0.001, | |
| "loss": 4.1764, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 1.7799288028478861, | |
| "grad_norm": 0.4627314805984497, | |
| "learning_rate": 0.001, | |
| "loss": 4.1745, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 1.7839286428542858, | |
| "grad_norm": 0.4460492432117462, | |
| "learning_rate": 0.001, | |
| "loss": 4.1772, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 1.7879284828606856, | |
| "grad_norm": 0.46068111062049866, | |
| "learning_rate": 0.001, | |
| "loss": 4.175, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 1.7919283228670855, | |
| "grad_norm": 0.5168552994728088, | |
| "learning_rate": 0.001, | |
| "loss": 4.1764, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 1.7959281628734851, | |
| "grad_norm": 0.5711122155189514, | |
| "learning_rate": 0.001, | |
| "loss": 4.1745, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 1.7999280028798847, | |
| "grad_norm": 0.48340123891830444, | |
| "learning_rate": 0.001, | |
| "loss": 4.1734, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 1.8039278428862846, | |
| "grad_norm": 0.45124703645706177, | |
| "learning_rate": 0.001, | |
| "loss": 4.1692, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 1.8079276828926842, | |
| "grad_norm": 0.4612937271595001, | |
| "learning_rate": 0.001, | |
| "loss": 4.176, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 1.8119275228990839, | |
| "grad_norm": 0.45633766055107117, | |
| "learning_rate": 0.001, | |
| "loss": 4.1734, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 1.8159273629054837, | |
| "grad_norm": 0.44668149948120117, | |
| "learning_rate": 0.001, | |
| "loss": 4.1735, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 1.8199272029118836, | |
| "grad_norm": 0.47320279479026794, | |
| "learning_rate": 0.001, | |
| "loss": 4.1727, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 1.8239270429182832, | |
| "grad_norm": 0.4252322018146515, | |
| "learning_rate": 0.001, | |
| "loss": 4.1743, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 1.8279268829246829, | |
| "grad_norm": 0.4853968620300293, | |
| "learning_rate": 0.001, | |
| "loss": 4.1732, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 1.8319267229310827, | |
| "grad_norm": 0.5151093006134033, | |
| "learning_rate": 0.001, | |
| "loss": 4.1741, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 1.8359265629374826, | |
| "grad_norm": 0.473300039768219, | |
| "learning_rate": 0.001, | |
| "loss": 4.1745, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 1.8399264029438822, | |
| "grad_norm": 0.48538273572921753, | |
| "learning_rate": 0.001, | |
| "loss": 4.1747, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 1.8439262429502818, | |
| "grad_norm": 0.42796286940574646, | |
| "learning_rate": 0.001, | |
| "loss": 4.175, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 1.8479260829566817, | |
| "grad_norm": 0.44311732053756714, | |
| "learning_rate": 0.001, | |
| "loss": 4.1724, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 1.8519259229630816, | |
| "grad_norm": 0.45130372047424316, | |
| "learning_rate": 0.001, | |
| "loss": 4.1757, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 1.8559257629694812, | |
| "grad_norm": 0.4500294327735901, | |
| "learning_rate": 0.001, | |
| "loss": 4.1762, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 1.8599256029758808, | |
| "grad_norm": 0.47864317893981934, | |
| "learning_rate": 0.001, | |
| "loss": 4.1725, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 1.8639254429822807, | |
| "grad_norm": 0.5632477402687073, | |
| "learning_rate": 0.001, | |
| "loss": 4.1747, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 1.8679252829886805, | |
| "grad_norm": 0.48071813583374023, | |
| "learning_rate": 0.001, | |
| "loss": 4.1705, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 1.8719251229950802, | |
| "grad_norm": 0.453741192817688, | |
| "learning_rate": 0.001, | |
| "loss": 4.1727, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 1.8759249630014798, | |
| "grad_norm": 0.45912396907806396, | |
| "learning_rate": 0.001, | |
| "loss": 4.1748, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 1.8799248030078797, | |
| "grad_norm": 0.48008185625076294, | |
| "learning_rate": 0.001, | |
| "loss": 4.1734, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 1.8839246430142795, | |
| "grad_norm": 0.4684300422668457, | |
| "learning_rate": 0.001, | |
| "loss": 4.1688, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 1.8879244830206792, | |
| "grad_norm": 0.49745339155197144, | |
| "learning_rate": 0.001, | |
| "loss": 4.1712, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 1.8919243230270788, | |
| "grad_norm": 0.4778960049152374, | |
| "learning_rate": 0.001, | |
| "loss": 4.1693, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 1.8959241630334787, | |
| "grad_norm": 0.46429726481437683, | |
| "learning_rate": 0.001, | |
| "loss": 4.1676, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 1.8999240030398785, | |
| "grad_norm": 0.46908000111579895, | |
| "learning_rate": 0.001, | |
| "loss": 4.1711, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 1.9039238430462782, | |
| "grad_norm": 0.4794583320617676, | |
| "learning_rate": 0.001, | |
| "loss": 4.1689, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 1.9079236830526778, | |
| "grad_norm": 0.5767402648925781, | |
| "learning_rate": 0.001, | |
| "loss": 4.1719, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 1.9119235230590776, | |
| "grad_norm": 0.45899704098701477, | |
| "learning_rate": 0.001, | |
| "loss": 4.1729, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 1.9159233630654775, | |
| "grad_norm": 0.47999170422554016, | |
| "learning_rate": 0.001, | |
| "loss": 4.1722, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 1.9199232030718771, | |
| "grad_norm": 0.4326845109462738, | |
| "learning_rate": 0.001, | |
| "loss": 4.1701, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 1.9239230430782768, | |
| "grad_norm": 0.563529372215271, | |
| "learning_rate": 0.001, | |
| "loss": 4.1712, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 1.9279228830846766, | |
| "grad_norm": 0.4267251491546631, | |
| "learning_rate": 0.001, | |
| "loss": 4.1711, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 1.9319227230910765, | |
| "grad_norm": 0.4583933651447296, | |
| "learning_rate": 0.001, | |
| "loss": 4.1728, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 1.9359225630974761, | |
| "grad_norm": 0.41948413848876953, | |
| "learning_rate": 0.001, | |
| "loss": 4.1728, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 1.9399224031038758, | |
| "grad_norm": 0.4663727879524231, | |
| "learning_rate": 0.001, | |
| "loss": 4.1758, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 1.9439222431102756, | |
| "grad_norm": 0.49384939670562744, | |
| "learning_rate": 0.001, | |
| "loss": 4.1728, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 1.9479220831166755, | |
| "grad_norm": 0.4137873351573944, | |
| "learning_rate": 0.001, | |
| "loss": 4.1673, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 1.951921923123075, | |
| "grad_norm": 0.4351732134819031, | |
| "learning_rate": 0.001, | |
| "loss": 4.1698, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 1.9559217631294747, | |
| "grad_norm": 0.4443551301956177, | |
| "learning_rate": 0.001, | |
| "loss": 4.1694, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 1.9599216031358746, | |
| "grad_norm": 0.4084385931491852, | |
| "learning_rate": 0.001, | |
| "loss": 4.1711, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 1.9639214431422745, | |
| "grad_norm": 0.4777480661869049, | |
| "learning_rate": 0.001, | |
| "loss": 4.1707, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 1.967921283148674, | |
| "grad_norm": 0.5114396214485168, | |
| "learning_rate": 0.001, | |
| "loss": 4.1681, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 1.9719211231550737, | |
| "grad_norm": 0.4695410132408142, | |
| "learning_rate": 0.001, | |
| "loss": 4.1705, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 1.9759209631614736, | |
| "grad_norm": 0.4276166558265686, | |
| "learning_rate": 0.001, | |
| "loss": 4.1699, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 1.9799208031678734, | |
| "grad_norm": 0.4987983703613281, | |
| "learning_rate": 0.001, | |
| "loss": 4.1706, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 1.983920643174273, | |
| "grad_norm": 0.4121693968772888, | |
| "learning_rate": 0.001, | |
| "loss": 4.1679, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 1.9879204831806727, | |
| "grad_norm": 0.47886762022972107, | |
| "learning_rate": 0.001, | |
| "loss": 4.1718, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 1.9919203231870726, | |
| "grad_norm": 0.4255962073802948, | |
| "learning_rate": 0.001, | |
| "loss": 4.1671, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 1.9959201631934722, | |
| "grad_norm": 0.5012271404266357, | |
| "learning_rate": 0.001, | |
| "loss": 4.1682, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 1.9999200031998718, | |
| "grad_norm": 0.44093242287635803, | |
| "learning_rate": 0.001, | |
| "loss": 4.1726, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 2.0039198432062717, | |
| "grad_norm": 0.49300047755241394, | |
| "learning_rate": 0.001, | |
| "loss": 4.1668, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 2.0079196832126716, | |
| "grad_norm": 0.44728681445121765, | |
| "learning_rate": 0.001, | |
| "loss": 4.168, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 2.0119195232190714, | |
| "grad_norm": 0.5188434720039368, | |
| "learning_rate": 0.001, | |
| "loss": 4.1672, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 2.015919363225471, | |
| "grad_norm": 0.517851710319519, | |
| "learning_rate": 0.001, | |
| "loss": 4.1668, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 2.0199192032318707, | |
| "grad_norm": 0.47993385791778564, | |
| "learning_rate": 0.001, | |
| "loss": 4.1704, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 2.0239190432382705, | |
| "grad_norm": 0.4726385772228241, | |
| "learning_rate": 0.001, | |
| "loss": 4.1688, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 2.0279188832446704, | |
| "grad_norm": 0.5576769709587097, | |
| "learning_rate": 0.001, | |
| "loss": 4.1687, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 2.03191872325107, | |
| "grad_norm": 0.5270803570747375, | |
| "learning_rate": 0.001, | |
| "loss": 4.1684, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 2.0359185632574697, | |
| "grad_norm": 0.45349547266960144, | |
| "learning_rate": 0.001, | |
| "loss": 4.1687, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 2.0399184032638695, | |
| "grad_norm": 0.5263473987579346, | |
| "learning_rate": 0.001, | |
| "loss": 4.1702, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 2.0439182432702694, | |
| "grad_norm": 0.494325190782547, | |
| "learning_rate": 0.001, | |
| "loss": 4.1665, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 2.047918083276669, | |
| "grad_norm": 0.5202022790908813, | |
| "learning_rate": 0.001, | |
| "loss": 4.165, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 2.0519179232830687, | |
| "grad_norm": 0.4353752136230469, | |
| "learning_rate": 0.001, | |
| "loss": 4.1684, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 2.0559177632894685, | |
| "grad_norm": 0.46369001269340515, | |
| "learning_rate": 0.001, | |
| "loss": 4.1652, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 2.0599176032958684, | |
| "grad_norm": 0.4611663222312927, | |
| "learning_rate": 0.001, | |
| "loss": 4.1686, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 2.063917443302268, | |
| "grad_norm": 0.44690844416618347, | |
| "learning_rate": 0.001, | |
| "loss": 4.1709, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 2.0679172833086676, | |
| "grad_norm": 0.4432712495326996, | |
| "learning_rate": 0.001, | |
| "loss": 4.1686, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 2.0719171233150675, | |
| "grad_norm": 0.46799278259277344, | |
| "learning_rate": 0.001, | |
| "loss": 4.169, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 2.0759169633214674, | |
| "grad_norm": 0.488779217004776, | |
| "learning_rate": 0.001, | |
| "loss": 4.1661, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 2.0799168033278668, | |
| "grad_norm": 0.44497257471084595, | |
| "learning_rate": 0.001, | |
| "loss": 4.1641, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 2.0839166433342666, | |
| "grad_norm": 0.42999890446662903, | |
| "learning_rate": 0.001, | |
| "loss": 4.169, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 2.0879164833406665, | |
| "grad_norm": 0.4540679454803467, | |
| "learning_rate": 0.001, | |
| "loss": 4.1607, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 2.091916323347066, | |
| "grad_norm": 0.43836355209350586, | |
| "learning_rate": 0.001, | |
| "loss": 4.1697, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 2.0959161633534658, | |
| "grad_norm": 0.46834954619407654, | |
| "learning_rate": 0.001, | |
| "loss": 4.1675, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 2.0999160033598656, | |
| "grad_norm": 0.39395639300346375, | |
| "learning_rate": 0.001, | |
| "loss": 4.167, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 2.1039158433662655, | |
| "grad_norm": 0.47284603118896484, | |
| "learning_rate": 0.001, | |
| "loss": 4.1701, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 2.107915683372665, | |
| "grad_norm": 0.5229921936988831, | |
| "learning_rate": 0.001, | |
| "loss": 4.1692, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 2.1119155233790647, | |
| "grad_norm": 0.4998793303966522, | |
| "learning_rate": 0.001, | |
| "loss": 4.1692, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 2.1159153633854646, | |
| "grad_norm": 0.5066671967506409, | |
| "learning_rate": 0.001, | |
| "loss": 4.1652, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 2.1199152033918645, | |
| "grad_norm": 0.4590517580509186, | |
| "learning_rate": 0.001, | |
| "loss": 4.1668, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 2.123915043398264, | |
| "grad_norm": 0.49296894669532776, | |
| "learning_rate": 0.001, | |
| "loss": 4.1678, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 2.1279148834046637, | |
| "grad_norm": 0.43287187814712524, | |
| "learning_rate": 0.001, | |
| "loss": 4.1635, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 2.1319147234110636, | |
| "grad_norm": 0.5368506908416748, | |
| "learning_rate": 0.001, | |
| "loss": 4.168, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 2.1359145634174634, | |
| "grad_norm": 0.47554171085357666, | |
| "learning_rate": 0.001, | |
| "loss": 4.1681, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 2.139914403423863, | |
| "grad_norm": 0.47026315331459045, | |
| "learning_rate": 0.001, | |
| "loss": 4.1687, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 2.1439142434302627, | |
| "grad_norm": 0.4864146411418915, | |
| "learning_rate": 0.001, | |
| "loss": 4.1646, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 2.1479140834366626, | |
| "grad_norm": 0.45245715975761414, | |
| "learning_rate": 0.001, | |
| "loss": 4.164, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 2.1519139234430624, | |
| "grad_norm": 0.5358317494392395, | |
| "learning_rate": 0.001, | |
| "loss": 4.1656, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 2.155913763449462, | |
| "grad_norm": 0.47510644793510437, | |
| "learning_rate": 0.001, | |
| "loss": 4.169, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 2.1599136034558617, | |
| "grad_norm": 0.518865168094635, | |
| "learning_rate": 0.001, | |
| "loss": 4.1712, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 2.1639134434622616, | |
| "grad_norm": 0.49107488989830017, | |
| "learning_rate": 0.001, | |
| "loss": 4.1664, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 2.1679132834686614, | |
| "grad_norm": 0.4293051064014435, | |
| "learning_rate": 0.001, | |
| "loss": 4.1623, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 2.171913123475061, | |
| "grad_norm": 0.48307546973228455, | |
| "learning_rate": 0.001, | |
| "loss": 4.1671, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 2.1759129634814607, | |
| "grad_norm": 0.49982860684394836, | |
| "learning_rate": 0.001, | |
| "loss": 4.1673, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 2.1799128034878605, | |
| "grad_norm": 0.4217018187046051, | |
| "learning_rate": 0.001, | |
| "loss": 4.1636, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 2.1839126434942604, | |
| "grad_norm": 0.4675614833831787, | |
| "learning_rate": 0.001, | |
| "loss": 4.1679, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 2.18791248350066, | |
| "grad_norm": 0.46770352125167847, | |
| "learning_rate": 0.001, | |
| "loss": 4.1633, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 2.1919123235070597, | |
| "grad_norm": 0.46287262439727783, | |
| "learning_rate": 0.001, | |
| "loss": 4.1665, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 2.1959121635134595, | |
| "grad_norm": 0.42776986956596375, | |
| "learning_rate": 0.001, | |
| "loss": 4.1664, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 2.1999120035198594, | |
| "grad_norm": 0.4742175042629242, | |
| "learning_rate": 0.001, | |
| "loss": 4.1655, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 2.203911843526259, | |
| "grad_norm": 0.4570881128311157, | |
| "learning_rate": 0.001, | |
| "loss": 4.1659, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 2.2079116835326587, | |
| "grad_norm": 0.4609364867210388, | |
| "learning_rate": 0.001, | |
| "loss": 4.1669, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 2.2119115235390585, | |
| "grad_norm": 0.5724889039993286, | |
| "learning_rate": 0.001, | |
| "loss": 4.1644, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 2.2159113635454584, | |
| "grad_norm": 0.4681205451488495, | |
| "learning_rate": 0.001, | |
| "loss": 4.1614, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 2.219911203551858, | |
| "grad_norm": 0.5061549544334412, | |
| "learning_rate": 0.001, | |
| "loss": 4.168, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 2.2239110435582576, | |
| "grad_norm": 0.4458412826061249, | |
| "learning_rate": 0.001, | |
| "loss": 4.1629, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 2.2279108835646575, | |
| "grad_norm": 0.4831654131412506, | |
| "learning_rate": 0.001, | |
| "loss": 4.1668, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 2.2319107235710574, | |
| "grad_norm": 0.5010032653808594, | |
| "learning_rate": 0.001, | |
| "loss": 4.1656, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 2.2359105635774568, | |
| "grad_norm": 0.4242647886276245, | |
| "learning_rate": 0.001, | |
| "loss": 4.1653, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 2.2399104035838566, | |
| "grad_norm": 0.40968021750450134, | |
| "learning_rate": 0.001, | |
| "loss": 4.168, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 2.2439102435902565, | |
| "grad_norm": 0.4865590035915375, | |
| "learning_rate": 0.001, | |
| "loss": 4.1675, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 2.2479100835966563, | |
| "grad_norm": 0.4834771156311035, | |
| "learning_rate": 0.001, | |
| "loss": 4.1637, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 2.2519099236030558, | |
| "grad_norm": 0.41941970586776733, | |
| "learning_rate": 0.001, | |
| "loss": 4.1631, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 2.2559097636094556, | |
| "grad_norm": 0.48071053624153137, | |
| "learning_rate": 0.001, | |
| "loss": 4.1675, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 2.2599096036158555, | |
| "grad_norm": 0.4841105043888092, | |
| "learning_rate": 0.001, | |
| "loss": 4.165, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 2.2639094436222553, | |
| "grad_norm": 0.44922900199890137, | |
| "learning_rate": 0.001, | |
| "loss": 4.1621, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 2.2679092836286547, | |
| "grad_norm": 0.4089633524417877, | |
| "learning_rate": 0.001, | |
| "loss": 4.1707, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 2.2719091236350546, | |
| "grad_norm": 0.5155735015869141, | |
| "learning_rate": 0.001, | |
| "loss": 4.1675, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 2.2759089636414545, | |
| "grad_norm": 0.44511187076568604, | |
| "learning_rate": 0.001, | |
| "loss": 4.1626, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 2.279908803647854, | |
| "grad_norm": 0.45319080352783203, | |
| "learning_rate": 0.001, | |
| "loss": 4.1639, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 2.2839086436542537, | |
| "grad_norm": 0.4893025755882263, | |
| "learning_rate": 0.001, | |
| "loss": 4.1625, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 2.2879084836606536, | |
| "grad_norm": 0.4628910720348358, | |
| "learning_rate": 0.001, | |
| "loss": 4.1635, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 2.2919083236670534, | |
| "grad_norm": 0.5497888326644897, | |
| "learning_rate": 0.001, | |
| "loss": 4.1627, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 2.2959081636734533, | |
| "grad_norm": 0.45991674065589905, | |
| "learning_rate": 0.001, | |
| "loss": 4.1666, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 2.2999080036798527, | |
| "grad_norm": 0.5311243534088135, | |
| "learning_rate": 0.001, | |
| "loss": 4.1602, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 2.3039078436862526, | |
| "grad_norm": 0.4266692101955414, | |
| "learning_rate": 0.001, | |
| "loss": 4.1605, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 2.3079076836926524, | |
| "grad_norm": 0.5123735070228577, | |
| "learning_rate": 0.001, | |
| "loss": 4.164, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 2.311907523699052, | |
| "grad_norm": 0.5435347557067871, | |
| "learning_rate": 0.001, | |
| "loss": 4.163, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 2.3159073637054517, | |
| "grad_norm": 0.5728914737701416, | |
| "learning_rate": 0.001, | |
| "loss": 4.1661, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 2.3199072037118516, | |
| "grad_norm": 0.5435721278190613, | |
| "learning_rate": 0.001, | |
| "loss": 4.1632, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 2.3239070437182514, | |
| "grad_norm": 0.5009971261024475, | |
| "learning_rate": 0.001, | |
| "loss": 4.163, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 2.327906883724651, | |
| "grad_norm": 0.47658857703208923, | |
| "learning_rate": 0.001, | |
| "loss": 4.1621, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 2.3319067237310507, | |
| "grad_norm": 0.5185097455978394, | |
| "learning_rate": 0.001, | |
| "loss": 4.1616, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 2.3359065637374505, | |
| "grad_norm": 0.43645840883255005, | |
| "learning_rate": 0.001, | |
| "loss": 4.1667, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 2.3399064037438504, | |
| "grad_norm": 0.4473995566368103, | |
| "learning_rate": 0.001, | |
| "loss": 4.1658, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 2.34390624375025, | |
| "grad_norm": 0.4278011918067932, | |
| "learning_rate": 0.001, | |
| "loss": 4.1678, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 2.3479060837566497, | |
| "grad_norm": 0.47076526284217834, | |
| "learning_rate": 0.001, | |
| "loss": 4.1665, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 2.3519059237630495, | |
| "grad_norm": 0.5503517985343933, | |
| "learning_rate": 0.001, | |
| "loss": 4.1628, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 2.3559057637694494, | |
| "grad_norm": 0.41893520951271057, | |
| "learning_rate": 0.001, | |
| "loss": 4.1602, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 2.359905603775849, | |
| "grad_norm": 0.4245523512363434, | |
| "learning_rate": 0.001, | |
| "loss": 4.1616, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 2.3639054437822486, | |
| "grad_norm": 0.4149760603904724, | |
| "learning_rate": 0.001, | |
| "loss": 4.1676, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 2.3679052837886485, | |
| "grad_norm": 0.5713924169540405, | |
| "learning_rate": 0.001, | |
| "loss": 4.1636, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 2.3719051237950484, | |
| "grad_norm": 0.4798339009284973, | |
| "learning_rate": 0.001, | |
| "loss": 4.162, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 2.3759049638014478, | |
| "grad_norm": 0.42810848355293274, | |
| "learning_rate": 0.001, | |
| "loss": 4.1651, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 2.3799048038078476, | |
| "grad_norm": 0.5690004229545593, | |
| "learning_rate": 0.001, | |
| "loss": 4.1633, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 2.3839046438142475, | |
| "grad_norm": 0.48632410168647766, | |
| "learning_rate": 0.001, | |
| "loss": 4.1668, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 2.3879044838206474, | |
| "grad_norm": 0.4375806152820587, | |
| "learning_rate": 0.001, | |
| "loss": 4.1638, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 2.3919043238270468, | |
| "grad_norm": 0.44997647404670715, | |
| "learning_rate": 0.001, | |
| "loss": 4.1614, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 2.3959041638334466, | |
| "grad_norm": 0.5309412479400635, | |
| "learning_rate": 0.001, | |
| "loss": 4.1612, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 2.3999040038398465, | |
| "grad_norm": 0.560085654258728, | |
| "learning_rate": 0.001, | |
| "loss": 4.1633, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 2.4039038438462463, | |
| "grad_norm": 0.4551568925380707, | |
| "learning_rate": 0.001, | |
| "loss": 4.1618, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 2.4079036838526457, | |
| "grad_norm": 0.4853755533695221, | |
| "learning_rate": 0.001, | |
| "loss": 4.164, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 2.4119035238590456, | |
| "grad_norm": 0.47059595584869385, | |
| "learning_rate": 0.001, | |
| "loss": 4.1631, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 2.4159033638654455, | |
| "grad_norm": 0.5161297917366028, | |
| "learning_rate": 0.001, | |
| "loss": 4.1636, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 2.4199032038718453, | |
| "grad_norm": 0.4607383608818054, | |
| "learning_rate": 0.001, | |
| "loss": 4.1641, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 2.4239030438782447, | |
| "grad_norm": 0.4741229712963104, | |
| "learning_rate": 0.001, | |
| "loss": 4.1621, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 2.4279028838846446, | |
| "grad_norm": 0.4276678264141083, | |
| "learning_rate": 0.001, | |
| "loss": 4.1654, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 2.4319027238910444, | |
| "grad_norm": 0.45867425203323364, | |
| "learning_rate": 0.001, | |
| "loss": 4.1639, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 2.435902563897444, | |
| "grad_norm": 0.5171924233436584, | |
| "learning_rate": 0.001, | |
| "loss": 4.1617, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 2.4399024039038437, | |
| "grad_norm": 0.4670430123806, | |
| "learning_rate": 0.001, | |
| "loss": 4.1625, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 2.4439022439102436, | |
| "grad_norm": 0.4531850516796112, | |
| "learning_rate": 0.001, | |
| "loss": 4.1638, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 2.4479020839166434, | |
| "grad_norm": 0.5091714262962341, | |
| "learning_rate": 0.001, | |
| "loss": 4.1636, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 2.4519019239230433, | |
| "grad_norm": 0.48586076498031616, | |
| "learning_rate": 0.001, | |
| "loss": 4.1586, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 2.4559017639294427, | |
| "grad_norm": 0.44302985072135925, | |
| "learning_rate": 0.001, | |
| "loss": 4.1601, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 2.4599016039358426, | |
| "grad_norm": 0.4628585875034332, | |
| "learning_rate": 0.001, | |
| "loss": 4.163, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 2.4639014439422424, | |
| "grad_norm": 0.5455500483512878, | |
| "learning_rate": 0.001, | |
| "loss": 4.1634, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 2.467901283948642, | |
| "grad_norm": 0.5075648427009583, | |
| "learning_rate": 0.001, | |
| "loss": 4.1617, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 2.4719011239550417, | |
| "grad_norm": 0.44180813431739807, | |
| "learning_rate": 0.001, | |
| "loss": 4.1665, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 2.4759009639614415, | |
| "grad_norm": 0.5144279599189758, | |
| "learning_rate": 0.001, | |
| "loss": 4.1606, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 2.4799008039678414, | |
| "grad_norm": 0.5430071353912354, | |
| "learning_rate": 0.001, | |
| "loss": 4.1612, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 2.4839006439742413, | |
| "grad_norm": 0.4144330322742462, | |
| "learning_rate": 0.001, | |
| "loss": 4.1609, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 2.4879004839806407, | |
| "grad_norm": 0.5358167290687561, | |
| "learning_rate": 0.001, | |
| "loss": 4.1565, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 2.4919003239870405, | |
| "grad_norm": 0.5273513793945312, | |
| "learning_rate": 0.001, | |
| "loss": 4.1623, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 2.4959001639934404, | |
| "grad_norm": 0.4575463533401489, | |
| "learning_rate": 0.001, | |
| "loss": 4.1635, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 2.49990000399984, | |
| "grad_norm": 0.47879844903945923, | |
| "learning_rate": 0.001, | |
| "loss": 4.1629, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 2.5038998440062397, | |
| "grad_norm": 0.40959274768829346, | |
| "learning_rate": 0.001, | |
| "loss": 4.1623, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 2.5078996840126395, | |
| "grad_norm": 0.5272637009620667, | |
| "learning_rate": 0.001, | |
| "loss": 4.1621, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 2.5118995240190394, | |
| "grad_norm": 0.40802329778671265, | |
| "learning_rate": 0.001, | |
| "loss": 4.162, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 2.5158993640254392, | |
| "grad_norm": 0.4832558035850525, | |
| "learning_rate": 0.001, | |
| "loss": 4.1644, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 2.5198992040318386, | |
| "grad_norm": 0.4066709280014038, | |
| "learning_rate": 0.001, | |
| "loss": 4.1644, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 2.5238990440382385, | |
| "grad_norm": 0.42834344506263733, | |
| "learning_rate": 0.001, | |
| "loss": 4.1621, | |
| "step": 63100 | |
| }, | |
| { | |
| "epoch": 2.5278988840446384, | |
| "grad_norm": 0.5093958377838135, | |
| "learning_rate": 0.001, | |
| "loss": 4.161, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 2.5318987240510378, | |
| "grad_norm": 0.5456981658935547, | |
| "learning_rate": 0.001, | |
| "loss": 4.164, | |
| "step": 63300 | |
| }, | |
| { | |
| "epoch": 2.5358985640574376, | |
| "grad_norm": 0.47444722056388855, | |
| "learning_rate": 0.001, | |
| "loss": 4.1582, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 2.5398984040638375, | |
| "grad_norm": 0.49098628759384155, | |
| "learning_rate": 0.001, | |
| "loss": 4.1614, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 2.5438982440702373, | |
| "grad_norm": 0.5193818807601929, | |
| "learning_rate": 0.001, | |
| "loss": 4.1603, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 2.547898084076637, | |
| "grad_norm": 0.4925576150417328, | |
| "learning_rate": 0.001, | |
| "loss": 4.1603, | |
| "step": 63700 | |
| }, | |
| { | |
| "epoch": 2.5518979240830366, | |
| "grad_norm": 0.4549446403980255, | |
| "learning_rate": 0.001, | |
| "loss": 4.1554, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 2.5558977640894365, | |
| "grad_norm": 0.5603616237640381, | |
| "learning_rate": 0.001, | |
| "loss": 4.1607, | |
| "step": 63900 | |
| }, | |
| { | |
| "epoch": 2.5598976040958363, | |
| "grad_norm": 0.429360955953598, | |
| "learning_rate": 0.001, | |
| "loss": 4.1628, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 2.5638974441022357, | |
| "grad_norm": 0.4182247519493103, | |
| "learning_rate": 0.001, | |
| "loss": 4.164, | |
| "step": 64100 | |
| }, | |
| { | |
| "epoch": 2.5678972841086356, | |
| "grad_norm": 0.5610747337341309, | |
| "learning_rate": 0.001, | |
| "loss": 4.1641, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 2.5718971241150355, | |
| "grad_norm": 0.39815935492515564, | |
| "learning_rate": 0.001, | |
| "loss": 4.158, | |
| "step": 64300 | |
| }, | |
| { | |
| "epoch": 2.5758969641214353, | |
| "grad_norm": 0.4976713955402374, | |
| "learning_rate": 0.001, | |
| "loss": 4.1603, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 2.5798968041278347, | |
| "grad_norm": 0.46783262491226196, | |
| "learning_rate": 0.001, | |
| "loss": 4.1656, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 2.5838966441342346, | |
| "grad_norm": 0.41783684492111206, | |
| "learning_rate": 0.001, | |
| "loss": 4.161, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 2.5878964841406344, | |
| "grad_norm": 0.4668773412704468, | |
| "learning_rate": 0.001, | |
| "loss": 4.1637, | |
| "step": 64700 | |
| }, | |
| { | |
| "epoch": 2.591896324147034, | |
| "grad_norm": 0.4740975797176361, | |
| "learning_rate": 0.001, | |
| "loss": 4.1598, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 2.5958961641534337, | |
| "grad_norm": 0.4560339152812958, | |
| "learning_rate": 0.001, | |
| "loss": 4.1584, | |
| "step": 64900 | |
| }, | |
| { | |
| "epoch": 2.5998960041598336, | |
| "grad_norm": 0.4181240200996399, | |
| "learning_rate": 0.001, | |
| "loss": 4.1586, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 2.6038958441662334, | |
| "grad_norm": 0.5197315812110901, | |
| "learning_rate": 0.001, | |
| "loss": 4.1594, | |
| "step": 65100 | |
| }, | |
| { | |
| "epoch": 2.6078956841726333, | |
| "grad_norm": 0.45271509885787964, | |
| "learning_rate": 0.001, | |
| "loss": 4.1605, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 2.6118955241790327, | |
| "grad_norm": 0.521769642829895, | |
| "learning_rate": 0.001, | |
| "loss": 4.1631, | |
| "step": 65300 | |
| }, | |
| { | |
| "epoch": 2.6158953641854326, | |
| "grad_norm": 0.4969339370727539, | |
| "learning_rate": 0.001, | |
| "loss": 4.1601, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 2.6198952041918324, | |
| "grad_norm": 0.41261184215545654, | |
| "learning_rate": 0.001, | |
| "loss": 4.1627, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 2.623895044198232, | |
| "grad_norm": 0.4068695306777954, | |
| "learning_rate": 0.001, | |
| "loss": 4.1583, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 2.6278948842046317, | |
| "grad_norm": 0.5288635492324829, | |
| "learning_rate": 0.001, | |
| "loss": 4.1586, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 2.6318947242110315, | |
| "grad_norm": 0.5345166921615601, | |
| "learning_rate": 0.001, | |
| "loss": 4.1637, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 2.6358945642174314, | |
| "grad_norm": 0.5781984329223633, | |
| "learning_rate": 0.001, | |
| "loss": 4.1595, | |
| "step": 65900 | |
| }, | |
| { | |
| "epoch": 2.6398944042238313, | |
| "grad_norm": 0.4580060541629791, | |
| "learning_rate": 0.001, | |
| "loss": 4.1658, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 2.6438942442302307, | |
| "grad_norm": 0.4711572825908661, | |
| "learning_rate": 0.001, | |
| "loss": 4.1555, | |
| "step": 66100 | |
| }, | |
| { | |
| "epoch": 2.6478940842366305, | |
| "grad_norm": 0.4615152180194855, | |
| "learning_rate": 0.001, | |
| "loss": 4.1619, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 2.6518939242430304, | |
| "grad_norm": 0.45307356119155884, | |
| "learning_rate": 0.001, | |
| "loss": 4.1565, | |
| "step": 66300 | |
| }, | |
| { | |
| "epoch": 2.65589376424943, | |
| "grad_norm": 0.4266311526298523, | |
| "learning_rate": 0.001, | |
| "loss": 4.1625, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 2.6598936042558297, | |
| "grad_norm": 0.43120697140693665, | |
| "learning_rate": 0.001, | |
| "loss": 4.1637, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 2.6638934442622295, | |
| "grad_norm": 0.49627289175987244, | |
| "learning_rate": 0.001, | |
| "loss": 4.1599, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 2.6678932842686294, | |
| "grad_norm": 0.4489138126373291, | |
| "learning_rate": 0.001, | |
| "loss": 4.1557, | |
| "step": 66700 | |
| }, | |
| { | |
| "epoch": 2.6718931242750292, | |
| "grad_norm": 0.5802924633026123, | |
| "learning_rate": 0.001, | |
| "loss": 4.161, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 2.6758929642814286, | |
| "grad_norm": 0.4587540924549103, | |
| "learning_rate": 0.001, | |
| "loss": 4.1605, | |
| "step": 66900 | |
| }, | |
| { | |
| "epoch": 2.6798928042878285, | |
| "grad_norm": 0.5906481742858887, | |
| "learning_rate": 0.001, | |
| "loss": 4.1578, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 2.6838926442942284, | |
| "grad_norm": 0.4712335169315338, | |
| "learning_rate": 0.001, | |
| "loss": 4.1596, | |
| "step": 67100 | |
| }, | |
| { | |
| "epoch": 2.6878924843006278, | |
| "grad_norm": 0.4989967942237854, | |
| "learning_rate": 0.001, | |
| "loss": 4.1572, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 2.6918923243070276, | |
| "grad_norm": 0.49959269165992737, | |
| "learning_rate": 0.001, | |
| "loss": 4.1574, | |
| "step": 67300 | |
| }, | |
| { | |
| "epoch": 2.6958921643134275, | |
| "grad_norm": 0.4110835790634155, | |
| "learning_rate": 0.001, | |
| "loss": 4.1605, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 2.6998920043198273, | |
| "grad_norm": 0.45519450306892395, | |
| "learning_rate": 0.001, | |
| "loss": 4.1545, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 2.703891844326227, | |
| "grad_norm": 0.41701555252075195, | |
| "learning_rate": 0.001, | |
| "loss": 4.1591, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 2.7078916843326266, | |
| "grad_norm": 0.46233931183815, | |
| "learning_rate": 0.001, | |
| "loss": 4.1578, | |
| "step": 67700 | |
| }, | |
| { | |
| "epoch": 2.7118915243390265, | |
| "grad_norm": 0.4422828257083893, | |
| "learning_rate": 0.001, | |
| "loss": 4.1584, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 2.7158913643454263, | |
| "grad_norm": 0.4062967598438263, | |
| "learning_rate": 0.001, | |
| "loss": 4.163, | |
| "step": 67900 | |
| }, | |
| { | |
| "epoch": 2.7198912043518257, | |
| "grad_norm": 0.45407694578170776, | |
| "learning_rate": 0.001, | |
| "loss": 4.1592, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 2.7238910443582256, | |
| "grad_norm": 0.4106515347957611, | |
| "learning_rate": 0.001, | |
| "loss": 4.1577, | |
| "step": 68100 | |
| }, | |
| { | |
| "epoch": 2.7278908843646255, | |
| "grad_norm": 0.4356382191181183, | |
| "learning_rate": 0.001, | |
| "loss": 4.1603, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 2.7318907243710253, | |
| "grad_norm": 0.5237522721290588, | |
| "learning_rate": 0.001, | |
| "loss": 4.1598, | |
| "step": 68300 | |
| }, | |
| { | |
| "epoch": 2.735890564377425, | |
| "grad_norm": 0.48187971115112305, | |
| "learning_rate": 0.001, | |
| "loss": 4.159, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 2.7398904043838246, | |
| "grad_norm": 0.4024539887905121, | |
| "learning_rate": 0.001, | |
| "loss": 4.1535, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 2.7438902443902244, | |
| "grad_norm": 0.49014198780059814, | |
| "learning_rate": 0.001, | |
| "loss": 4.1555, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 2.7478900843966243, | |
| "grad_norm": 0.4648239016532898, | |
| "learning_rate": 0.001, | |
| "loss": 4.1573, | |
| "step": 68700 | |
| }, | |
| { | |
| "epoch": 2.7518899244030237, | |
| "grad_norm": 0.53783118724823, | |
| "learning_rate": 0.001, | |
| "loss": 4.1578, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 2.7558897644094236, | |
| "grad_norm": 0.4142454266548157, | |
| "learning_rate": 0.001, | |
| "loss": 4.1567, | |
| "step": 68900 | |
| }, | |
| { | |
| "epoch": 2.7598896044158234, | |
| "grad_norm": 0.5513470768928528, | |
| "learning_rate": 0.001, | |
| "loss": 4.1607, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 2.7638894444222233, | |
| "grad_norm": 0.45765164494514465, | |
| "learning_rate": 0.001, | |
| "loss": 4.1615, | |
| "step": 69100 | |
| }, | |
| { | |
| "epoch": 2.7678892844286227, | |
| "grad_norm": 0.5637156367301941, | |
| "learning_rate": 0.001, | |
| "loss": 4.1588, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 2.7718891244350226, | |
| "grad_norm": 0.41710424423217773, | |
| "learning_rate": 0.001, | |
| "loss": 4.1586, | |
| "step": 69300 | |
| }, | |
| { | |
| "epoch": 2.7758889644414224, | |
| "grad_norm": 0.5280339121818542, | |
| "learning_rate": 0.001, | |
| "loss": 4.1615, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 2.779888804447822, | |
| "grad_norm": 0.6178783178329468, | |
| "learning_rate": 0.001, | |
| "loss": 4.1582, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 2.7838886444542217, | |
| "grad_norm": 0.4407796859741211, | |
| "learning_rate": 0.001, | |
| "loss": 4.1616, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 2.7878884844606215, | |
| "grad_norm": 0.4791260361671448, | |
| "learning_rate": 0.001, | |
| "loss": 4.1584, | |
| "step": 69700 | |
| }, | |
| { | |
| "epoch": 2.7918883244670214, | |
| "grad_norm": 0.7081926465034485, | |
| "learning_rate": 0.001, | |
| "loss": 4.1551, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 2.7958881644734213, | |
| "grad_norm": 0.46901920437812805, | |
| "learning_rate": 0.001, | |
| "loss": 4.159, | |
| "step": 69900 | |
| }, | |
| { | |
| "epoch": 2.7998880044798207, | |
| "grad_norm": 0.5519617795944214, | |
| "learning_rate": 0.001, | |
| "loss": 4.1603, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 2.8038878444862205, | |
| "grad_norm": 0.5594943165779114, | |
| "learning_rate": 0.001, | |
| "loss": 4.1582, | |
| "step": 70100 | |
| }, | |
| { | |
| "epoch": 2.8078876844926204, | |
| "grad_norm": 0.4514610171318054, | |
| "learning_rate": 0.001, | |
| "loss": 4.1565, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 2.81188752449902, | |
| "grad_norm": 0.5486029982566833, | |
| "learning_rate": 0.001, | |
| "loss": 4.1568, | |
| "step": 70300 | |
| }, | |
| { | |
| "epoch": 2.8158873645054197, | |
| "grad_norm": 0.4768097698688507, | |
| "learning_rate": 0.001, | |
| "loss": 4.1618, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 2.8198872045118195, | |
| "grad_norm": 0.49742165207862854, | |
| "learning_rate": 0.001, | |
| "loss": 4.162, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 2.8238870445182194, | |
| "grad_norm": 0.49774202704429626, | |
| "learning_rate": 0.001, | |
| "loss": 4.1565, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 2.8278868845246192, | |
| "grad_norm": 0.5217127799987793, | |
| "learning_rate": 0.001, | |
| "loss": 4.1585, | |
| "step": 70700 | |
| }, | |
| { | |
| "epoch": 2.8318867245310186, | |
| "grad_norm": 0.44911012053489685, | |
| "learning_rate": 0.001, | |
| "loss": 4.1573, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 2.8358865645374185, | |
| "grad_norm": 0.47019949555397034, | |
| "learning_rate": 0.001, | |
| "loss": 4.1572, | |
| "step": 70900 | |
| }, | |
| { | |
| "epoch": 2.8398864045438184, | |
| "grad_norm": 0.47618111968040466, | |
| "learning_rate": 0.001, | |
| "loss": 4.1596, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 2.8438862445502178, | |
| "grad_norm": 0.5387282967567444, | |
| "learning_rate": 0.001, | |
| "loss": 4.1552, | |
| "step": 71100 | |
| }, | |
| { | |
| "epoch": 2.8478860845566176, | |
| "grad_norm": 0.5334316492080688, | |
| "learning_rate": 0.001, | |
| "loss": 4.154, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 2.8518859245630175, | |
| "grad_norm": 0.4238860607147217, | |
| "learning_rate": 0.001, | |
| "loss": 4.1582, | |
| "step": 71300 | |
| }, | |
| { | |
| "epoch": 2.8558857645694173, | |
| "grad_norm": 0.42178449034690857, | |
| "learning_rate": 0.001, | |
| "loss": 4.1582, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 2.859885604575817, | |
| "grad_norm": 0.5171424746513367, | |
| "learning_rate": 0.001, | |
| "loss": 4.1578, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 2.8638854445822166, | |
| "grad_norm": 0.47590920329093933, | |
| "learning_rate": 0.001, | |
| "loss": 4.1577, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 2.8678852845886165, | |
| "grad_norm": 0.4355865716934204, | |
| "learning_rate": 0.001, | |
| "loss": 4.1601, | |
| "step": 71700 | |
| }, | |
| { | |
| "epoch": 2.8718851245950163, | |
| "grad_norm": 0.45468640327453613, | |
| "learning_rate": 0.001, | |
| "loss": 4.1588, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 2.8758849646014157, | |
| "grad_norm": 0.4616718292236328, | |
| "learning_rate": 0.001, | |
| "loss": 4.1584, | |
| "step": 71900 | |
| }, | |
| { | |
| "epoch": 2.8798848046078156, | |
| "grad_norm": 0.4420863389968872, | |
| "learning_rate": 0.001, | |
| "loss": 4.1566, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 2.8838846446142155, | |
| "grad_norm": 0.6559464335441589, | |
| "learning_rate": 0.001, | |
| "loss": 4.1554, | |
| "step": 72100 | |
| }, | |
| { | |
| "epoch": 2.8878844846206153, | |
| "grad_norm": 0.41194289922714233, | |
| "learning_rate": 0.001, | |
| "loss": 4.1572, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 2.891884324627015, | |
| "grad_norm": 0.4040292501449585, | |
| "learning_rate": 0.001, | |
| "loss": 4.1588, | |
| "step": 72300 | |
| }, | |
| { | |
| "epoch": 2.8958841646334146, | |
| "grad_norm": 0.45008403062820435, | |
| "learning_rate": 0.001, | |
| "loss": 4.1596, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 2.8998840046398144, | |
| "grad_norm": 0.46512535214424133, | |
| "learning_rate": 0.001, | |
| "loss": 4.1518, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 2.9038838446462143, | |
| "grad_norm": 0.4376848638057709, | |
| "learning_rate": 0.001, | |
| "loss": 4.1563, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 2.9078836846526137, | |
| "grad_norm": 0.38827189803123474, | |
| "learning_rate": 0.001, | |
| "loss": 4.1556, | |
| "step": 72700 | |
| }, | |
| { | |
| "epoch": 2.9118835246590136, | |
| "grad_norm": 0.494261234998703, | |
| "learning_rate": 0.001, | |
| "loss": 4.1587, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 2.9158833646654134, | |
| "grad_norm": 0.4950977861881256, | |
| "learning_rate": 0.001, | |
| "loss": 4.1596, | |
| "step": 72900 | |
| }, | |
| { | |
| "epoch": 2.9198832046718133, | |
| "grad_norm": 0.5697283744812012, | |
| "learning_rate": 0.001, | |
| "loss": 4.1547, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 2.923883044678213, | |
| "grad_norm": 0.4631339907646179, | |
| "learning_rate": 0.001, | |
| "loss": 4.1594, | |
| "step": 73100 | |
| }, | |
| { | |
| "epoch": 2.9278828846846126, | |
| "grad_norm": 0.4409984052181244, | |
| "learning_rate": 0.001, | |
| "loss": 4.1562, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 2.9318827246910124, | |
| "grad_norm": 0.49438488483428955, | |
| "learning_rate": 0.001, | |
| "loss": 4.1568, | |
| "step": 73300 | |
| }, | |
| { | |
| "epoch": 2.9358825646974123, | |
| "grad_norm": 0.45631879568099976, | |
| "learning_rate": 0.001, | |
| "loss": 4.1576, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 2.9398824047038117, | |
| "grad_norm": 0.5139431357383728, | |
| "learning_rate": 0.001, | |
| "loss": 4.1583, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 2.9438822447102115, | |
| "grad_norm": 0.5125510096549988, | |
| "learning_rate": 0.001, | |
| "loss": 4.1563, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 2.9478820847166114, | |
| "grad_norm": 0.44619888067245483, | |
| "learning_rate": 0.001, | |
| "loss": 4.1548, | |
| "step": 73700 | |
| }, | |
| { | |
| "epoch": 2.9518819247230113, | |
| "grad_norm": 0.4973961114883423, | |
| "learning_rate": 0.001, | |
| "loss": 4.1563, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 2.9558817647294107, | |
| "grad_norm": 0.4607144892215729, | |
| "learning_rate": 0.001, | |
| "loss": 4.1569, | |
| "step": 73900 | |
| }, | |
| { | |
| "epoch": 2.9598816047358105, | |
| "grad_norm": 0.5176932215690613, | |
| "learning_rate": 0.001, | |
| "loss": 4.1556, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 2.9638814447422104, | |
| "grad_norm": 0.4734891653060913, | |
| "learning_rate": 0.001, | |
| "loss": 4.155, | |
| "step": 74100 | |
| }, | |
| { | |
| "epoch": 2.96788128474861, | |
| "grad_norm": 0.5034810900688171, | |
| "learning_rate": 0.001, | |
| "loss": 4.1572, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 2.9718811247550097, | |
| "grad_norm": 0.4262826144695282, | |
| "learning_rate": 0.001, | |
| "loss": 4.1591, | |
| "step": 74300 | |
| }, | |
| { | |
| "epoch": 2.9758809647614095, | |
| "grad_norm": 0.46682965755462646, | |
| "learning_rate": 0.001, | |
| "loss": 4.1586, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 2.9798808047678094, | |
| "grad_norm": 0.5210826992988586, | |
| "learning_rate": 0.001, | |
| "loss": 4.1586, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 2.9838806447742092, | |
| "grad_norm": 0.42274290323257446, | |
| "learning_rate": 0.001, | |
| "loss": 4.1551, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 2.9878804847806086, | |
| "grad_norm": 0.48334646224975586, | |
| "learning_rate": 0.001, | |
| "loss": 4.1543, | |
| "step": 74700 | |
| }, | |
| { | |
| "epoch": 2.9918803247870085, | |
| "grad_norm": 0.4629717469215393, | |
| "learning_rate": 0.001, | |
| "loss": 4.1555, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 2.9958801647934084, | |
| "grad_norm": 0.40987247228622437, | |
| "learning_rate": 0.001, | |
| "loss": 4.1594, | |
| "step": 74900 | |
| }, | |
| { | |
| "epoch": 2.9998800047998078, | |
| "grad_norm": 0.48736295104026794, | |
| "learning_rate": 0.001, | |
| "loss": 4.1586, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 3.0038798448062076, | |
| "grad_norm": 0.5455026626586914, | |
| "learning_rate": 0.001, | |
| "loss": 4.1533, | |
| "step": 75100 | |
| }, | |
| { | |
| "epoch": 3.0078796848126075, | |
| "grad_norm": 0.4381161630153656, | |
| "learning_rate": 0.001, | |
| "loss": 4.1543, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 3.0118795248190073, | |
| "grad_norm": 0.48587241768836975, | |
| "learning_rate": 0.001, | |
| "loss": 4.1544, | |
| "step": 75300 | |
| }, | |
| { | |
| "epoch": 3.015879364825407, | |
| "grad_norm": 0.47117552161216736, | |
| "learning_rate": 0.001, | |
| "loss": 4.1539, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 3.0198792048318066, | |
| "grad_norm": 0.4904659688472748, | |
| "learning_rate": 0.001, | |
| "loss": 4.1543, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 3.0238790448382065, | |
| "grad_norm": 0.5004945397377014, | |
| "learning_rate": 0.001, | |
| "loss": 4.1548, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 3.0278788848446063, | |
| "grad_norm": 0.42175960540771484, | |
| "learning_rate": 0.001, | |
| "loss": 4.1515, | |
| "step": 75700 | |
| }, | |
| { | |
| "epoch": 3.0318787248510057, | |
| "grad_norm": 0.5447273850440979, | |
| "learning_rate": 0.001, | |
| "loss": 4.1531, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 3.0358785648574056, | |
| "grad_norm": 0.4705660939216614, | |
| "learning_rate": 0.001, | |
| "loss": 4.1498, | |
| "step": 75900 | |
| }, | |
| { | |
| "epoch": 3.0398784048638055, | |
| "grad_norm": 0.5477752685546875, | |
| "learning_rate": 0.001, | |
| "loss": 4.1533, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 3.0438782448702053, | |
| "grad_norm": 0.47758665680885315, | |
| "learning_rate": 0.001, | |
| "loss": 4.1538, | |
| "step": 76100 | |
| }, | |
| { | |
| "epoch": 3.0478780848766047, | |
| "grad_norm": 0.39745718240737915, | |
| "learning_rate": 0.001, | |
| "loss": 4.1558, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 3.0518779248830046, | |
| "grad_norm": 0.4436202049255371, | |
| "learning_rate": 0.001, | |
| "loss": 4.1532, | |
| "step": 76300 | |
| }, | |
| { | |
| "epoch": 3.0558777648894044, | |
| "grad_norm": 0.5916080474853516, | |
| "learning_rate": 0.001, | |
| "loss": 4.154, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 3.0598776048958043, | |
| "grad_norm": 0.5111138224601746, | |
| "learning_rate": 0.001, | |
| "loss": 4.15, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 3.0638774449022037, | |
| "grad_norm": 0.48636212944984436, | |
| "learning_rate": 0.001, | |
| "loss": 4.1509, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 3.0678772849086036, | |
| "grad_norm": 0.4602707326412201, | |
| "learning_rate": 0.001, | |
| "loss": 4.1529, | |
| "step": 76700 | |
| }, | |
| { | |
| "epoch": 3.0718771249150034, | |
| "grad_norm": 0.4185924828052521, | |
| "learning_rate": 0.001, | |
| "loss": 4.1583, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 3.0758769649214033, | |
| "grad_norm": 0.4830791652202606, | |
| "learning_rate": 0.001, | |
| "loss": 4.1498, | |
| "step": 76900 | |
| }, | |
| { | |
| "epoch": 3.0798768049278027, | |
| "grad_norm": 0.4777405858039856, | |
| "learning_rate": 0.001, | |
| "loss": 4.1546, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 3.0838766449342025, | |
| "grad_norm": 0.4124826192855835, | |
| "learning_rate": 0.001, | |
| "loss": 4.1537, | |
| "step": 77100 | |
| }, | |
| { | |
| "epoch": 3.0878764849406024, | |
| "grad_norm": 0.387603759765625, | |
| "learning_rate": 0.001, | |
| "loss": 4.1574, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 3.0918763249470023, | |
| "grad_norm": 0.43888458609580994, | |
| "learning_rate": 0.001, | |
| "loss": 4.1528, | |
| "step": 77300 | |
| }, | |
| { | |
| "epoch": 3.0958761649534017, | |
| "grad_norm": 0.5398756265640259, | |
| "learning_rate": 0.001, | |
| "loss": 4.156, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 3.0998760049598015, | |
| "grad_norm": 0.4512723982334137, | |
| "learning_rate": 0.001, | |
| "loss": 4.15, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 3.1038758449662014, | |
| "grad_norm": 0.4444531500339508, | |
| "learning_rate": 0.001, | |
| "loss": 4.1544, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 3.1078756849726012, | |
| "grad_norm": 0.5301286578178406, | |
| "learning_rate": 0.001, | |
| "loss": 4.153, | |
| "step": 77700 | |
| }, | |
| { | |
| "epoch": 3.1118755249790007, | |
| "grad_norm": 0.45263248682022095, | |
| "learning_rate": 0.001, | |
| "loss": 4.157, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 3.1158753649854005, | |
| "grad_norm": 0.46157121658325195, | |
| "learning_rate": 0.001, | |
| "loss": 4.1527, | |
| "step": 77900 | |
| }, | |
| { | |
| "epoch": 3.1198752049918004, | |
| "grad_norm": 0.43224167823791504, | |
| "learning_rate": 0.001, | |
| "loss": 4.1547, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 3.1238750449982002, | |
| "grad_norm": 0.42079129815101624, | |
| "learning_rate": 0.001, | |
| "loss": 4.1553, | |
| "step": 78100 | |
| }, | |
| { | |
| "epoch": 3.1278748850045996, | |
| "grad_norm": 0.4730684161186218, | |
| "learning_rate": 0.001, | |
| "loss": 4.1531, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 3.1318747250109995, | |
| "grad_norm": 0.48231276869773865, | |
| "learning_rate": 0.001, | |
| "loss": 4.1579, | |
| "step": 78300 | |
| }, | |
| { | |
| "epoch": 3.1358745650173994, | |
| "grad_norm": 0.4426518380641937, | |
| "learning_rate": 0.001, | |
| "loss": 4.1538, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 3.139874405023799, | |
| "grad_norm": 0.5078949928283691, | |
| "learning_rate": 0.001, | |
| "loss": 4.1521, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 3.1438742450301986, | |
| "grad_norm": 0.4642763137817383, | |
| "learning_rate": 0.001, | |
| "loss": 4.1543, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 3.1478740850365985, | |
| "grad_norm": 0.43856772780418396, | |
| "learning_rate": 0.001, | |
| "loss": 4.1555, | |
| "step": 78700 | |
| }, | |
| { | |
| "epoch": 3.1518739250429983, | |
| "grad_norm": 0.49219101667404175, | |
| "learning_rate": 0.001, | |
| "loss": 4.1534, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 3.155873765049398, | |
| "grad_norm": 0.4498043954372406, | |
| "learning_rate": 0.001, | |
| "loss": 4.153, | |
| "step": 78900 | |
| }, | |
| { | |
| "epoch": 3.1598736050557976, | |
| "grad_norm": 0.48328474164009094, | |
| "learning_rate": 0.001, | |
| "loss": 4.1548, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 3.1638734450621975, | |
| "grad_norm": 0.510409951210022, | |
| "learning_rate": 0.001, | |
| "loss": 4.1513, | |
| "step": 79100 | |
| }, | |
| { | |
| "epoch": 3.1678732850685973, | |
| "grad_norm": 0.5685029625892639, | |
| "learning_rate": 0.001, | |
| "loss": 4.1528, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 3.171873125074997, | |
| "grad_norm": 0.5003494620323181, | |
| "learning_rate": 0.001, | |
| "loss": 4.153, | |
| "step": 79300 | |
| }, | |
| { | |
| "epoch": 3.1758729650813966, | |
| "grad_norm": 0.42618006467819214, | |
| "learning_rate": 0.001, | |
| "loss": 4.1522, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 3.1798728050877965, | |
| "grad_norm": 0.5276270508766174, | |
| "learning_rate": 0.001, | |
| "loss": 4.1562, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 3.1838726450941963, | |
| "grad_norm": 0.4651762545108795, | |
| "learning_rate": 0.001, | |
| "loss": 4.1545, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 3.187872485100596, | |
| "grad_norm": 0.5323458313941956, | |
| "learning_rate": 0.001, | |
| "loss": 4.1568, | |
| "step": 79700 | |
| }, | |
| { | |
| "epoch": 3.1918723251069956, | |
| "grad_norm": 0.48029133677482605, | |
| "learning_rate": 0.001, | |
| "loss": 4.1571, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 3.1958721651133954, | |
| "grad_norm": 0.40680381655693054, | |
| "learning_rate": 0.001, | |
| "loss": 4.1521, | |
| "step": 79900 | |
| }, | |
| { | |
| "epoch": 3.1998720051197953, | |
| "grad_norm": 0.5401845574378967, | |
| "learning_rate": 0.001, | |
| "loss": 4.1551, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 3.203871845126195, | |
| "grad_norm": 0.4607747197151184, | |
| "learning_rate": 0.001, | |
| "loss": 4.1503, | |
| "step": 80100 | |
| }, | |
| { | |
| "epoch": 3.2078716851325946, | |
| "grad_norm": 0.4039115011692047, | |
| "learning_rate": 0.001, | |
| "loss": 4.1514, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 3.2118715251389944, | |
| "grad_norm": 0.47579532861709595, | |
| "learning_rate": 0.001, | |
| "loss": 4.1514, | |
| "step": 80300 | |
| }, | |
| { | |
| "epoch": 3.2158713651453943, | |
| "grad_norm": 0.43037813901901245, | |
| "learning_rate": 0.001, | |
| "loss": 4.1555, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 3.2198712051517937, | |
| "grad_norm": 0.44526252150535583, | |
| "learning_rate": 0.001, | |
| "loss": 4.1538, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 3.2238710451581936, | |
| "grad_norm": 0.43144863843917847, | |
| "learning_rate": 0.001, | |
| "loss": 4.1521, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 3.2278708851645934, | |
| "grad_norm": 0.5244666934013367, | |
| "learning_rate": 0.001, | |
| "loss": 4.1526, | |
| "step": 80700 | |
| }, | |
| { | |
| "epoch": 3.2318707251709933, | |
| "grad_norm": 0.569317638874054, | |
| "learning_rate": 0.001, | |
| "loss": 4.155, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 3.235870565177393, | |
| "grad_norm": 0.48165997862815857, | |
| "learning_rate": 0.001, | |
| "loss": 4.1549, | |
| "step": 80900 | |
| }, | |
| { | |
| "epoch": 3.2398704051837925, | |
| "grad_norm": 0.3605559170246124, | |
| "learning_rate": 0.001, | |
| "loss": 4.1521, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 3.2438702451901924, | |
| "grad_norm": 0.4918169379234314, | |
| "learning_rate": 0.001, | |
| "loss": 4.1509, | |
| "step": 81100 | |
| }, | |
| { | |
| "epoch": 3.2478700851965923, | |
| "grad_norm": 0.4783216714859009, | |
| "learning_rate": 0.001, | |
| "loss": 4.1556, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 3.2518699252029917, | |
| "grad_norm": 0.44440028071403503, | |
| "learning_rate": 0.001, | |
| "loss": 4.1521, | |
| "step": 81300 | |
| }, | |
| { | |
| "epoch": 3.2558697652093915, | |
| "grad_norm": 0.5073365569114685, | |
| "learning_rate": 0.001, | |
| "loss": 4.1559, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 3.2598696052157914, | |
| "grad_norm": 0.47869718074798584, | |
| "learning_rate": 0.001, | |
| "loss": 4.1554, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 3.2638694452221912, | |
| "grad_norm": 0.4230363667011261, | |
| "learning_rate": 0.001, | |
| "loss": 4.1493, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 3.267869285228591, | |
| "grad_norm": 0.4750482738018036, | |
| "learning_rate": 0.001, | |
| "loss": 4.1528, | |
| "step": 81700 | |
| }, | |
| { | |
| "epoch": 3.2718691252349905, | |
| "grad_norm": 0.5645243525505066, | |
| "learning_rate": 0.001, | |
| "loss": 4.1526, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 3.2758689652413904, | |
| "grad_norm": 0.41942843794822693, | |
| "learning_rate": 0.001, | |
| "loss": 4.1512, | |
| "step": 81900 | |
| }, | |
| { | |
| "epoch": 3.2798688052477902, | |
| "grad_norm": 0.4255695044994354, | |
| "learning_rate": 0.001, | |
| "loss": 4.1518, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 3.2838686452541896, | |
| "grad_norm": 0.4215909242630005, | |
| "learning_rate": 0.001, | |
| "loss": 4.1568, | |
| "step": 82100 | |
| }, | |
| { | |
| "epoch": 3.2878684852605895, | |
| "grad_norm": 0.4049839973449707, | |
| "learning_rate": 0.001, | |
| "loss": 4.1534, | |
| "step": 82200 | |
| }, | |
| { | |
| "epoch": 3.2918683252669894, | |
| "grad_norm": 0.514345109462738, | |
| "learning_rate": 0.001, | |
| "loss": 4.1492, | |
| "step": 82300 | |
| }, | |
| { | |
| "epoch": 3.295868165273389, | |
| "grad_norm": 0.43098345398902893, | |
| "learning_rate": 0.001, | |
| "loss": 4.1522, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 3.2998680052797886, | |
| "grad_norm": 0.4352331757545471, | |
| "learning_rate": 0.001, | |
| "loss": 4.1524, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 3.3038678452861885, | |
| "grad_norm": 0.4635871946811676, | |
| "learning_rate": 0.001, | |
| "loss": 4.1526, | |
| "step": 82600 | |
| }, | |
| { | |
| "epoch": 3.3078676852925883, | |
| "grad_norm": 0.41384679079055786, | |
| "learning_rate": 0.001, | |
| "loss": 4.1548, | |
| "step": 82700 | |
| }, | |
| { | |
| "epoch": 3.311867525298988, | |
| "grad_norm": 0.39796626567840576, | |
| "learning_rate": 0.001, | |
| "loss": 4.1544, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 3.3158673653053876, | |
| "grad_norm": 0.40175408124923706, | |
| "learning_rate": 0.001, | |
| "loss": 4.155, | |
| "step": 82900 | |
| }, | |
| { | |
| "epoch": 3.3198672053117875, | |
| "grad_norm": 0.4152776598930359, | |
| "learning_rate": 0.001, | |
| "loss": 4.1555, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 3.3238670453181873, | |
| "grad_norm": 0.5190226435661316, | |
| "learning_rate": 0.001, | |
| "loss": 4.1571, | |
| "step": 83100 | |
| }, | |
| { | |
| "epoch": 3.327866885324587, | |
| "grad_norm": 0.43292152881622314, | |
| "learning_rate": 0.001, | |
| "loss": 4.1502, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 3.3318667253309866, | |
| "grad_norm": 0.4904835522174835, | |
| "learning_rate": 0.001, | |
| "loss": 4.1515, | |
| "step": 83300 | |
| }, | |
| { | |
| "epoch": 3.3358665653373865, | |
| "grad_norm": 0.5600055456161499, | |
| "learning_rate": 0.001, | |
| "loss": 4.1497, | |
| "step": 83400 | |
| }, | |
| { | |
| "epoch": 3.3398664053437863, | |
| "grad_norm": 0.5315993428230286, | |
| "learning_rate": 0.001, | |
| "loss": 4.1564, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 3.343866245350186, | |
| "grad_norm": 0.4802263081073761, | |
| "learning_rate": 0.001, | |
| "loss": 4.1557, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 3.3478660853565856, | |
| "grad_norm": 0.3861168324947357, | |
| "learning_rate": 0.001, | |
| "loss": 4.1547, | |
| "step": 83700 | |
| }, | |
| { | |
| "epoch": 3.3518659253629854, | |
| "grad_norm": 0.5466539263725281, | |
| "learning_rate": 0.001, | |
| "loss": 4.1539, | |
| "step": 83800 | |
| }, | |
| { | |
| "epoch": 3.3558657653693853, | |
| "grad_norm": 0.506841242313385, | |
| "learning_rate": 0.001, | |
| "loss": 4.1531, | |
| "step": 83900 | |
| }, | |
| { | |
| "epoch": 3.359865605375785, | |
| "grad_norm": 0.5451818704605103, | |
| "learning_rate": 0.001, | |
| "loss": 4.1548, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 3.3638654453821846, | |
| "grad_norm": 0.48338380455970764, | |
| "learning_rate": 0.001, | |
| "loss": 4.1526, | |
| "step": 84100 | |
| }, | |
| { | |
| "epoch": 3.3678652853885844, | |
| "grad_norm": 0.5230739116668701, | |
| "learning_rate": 0.001, | |
| "loss": 4.1523, | |
| "step": 84200 | |
| }, | |
| { | |
| "epoch": 3.3718651253949843, | |
| "grad_norm": 0.433020681142807, | |
| "learning_rate": 0.001, | |
| "loss": 4.1514, | |
| "step": 84300 | |
| }, | |
| { | |
| "epoch": 3.3758649654013837, | |
| "grad_norm": 0.45081406831741333, | |
| "learning_rate": 0.001, | |
| "loss": 4.1533, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 3.3798648054077836, | |
| "grad_norm": 0.5491710305213928, | |
| "learning_rate": 0.001, | |
| "loss": 4.1544, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 3.3838646454141834, | |
| "grad_norm": 0.46437957882881165, | |
| "learning_rate": 0.001, | |
| "loss": 4.1538, | |
| "step": 84600 | |
| }, | |
| { | |
| "epoch": 3.3878644854205833, | |
| "grad_norm": 0.5560771822929382, | |
| "learning_rate": 0.001, | |
| "loss": 4.1527, | |
| "step": 84700 | |
| }, | |
| { | |
| "epoch": 3.391864325426983, | |
| "grad_norm": 0.5027199983596802, | |
| "learning_rate": 0.001, | |
| "loss": 4.1487, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 3.3958641654333825, | |
| "grad_norm": 0.42762041091918945, | |
| "learning_rate": 0.001, | |
| "loss": 4.1525, | |
| "step": 84900 | |
| }, | |
| { | |
| "epoch": 3.3998640054397824, | |
| "grad_norm": 0.5043109655380249, | |
| "learning_rate": 0.001, | |
| "loss": 4.152, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 3.4038638454461823, | |
| "grad_norm": 0.4508378207683563, | |
| "learning_rate": 0.001, | |
| "loss": 4.1499, | |
| "step": 85100 | |
| }, | |
| { | |
| "epoch": 3.4078636854525817, | |
| "grad_norm": 0.3978097140789032, | |
| "learning_rate": 0.001, | |
| "loss": 4.152, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 3.4118635254589815, | |
| "grad_norm": 0.5391075015068054, | |
| "learning_rate": 0.001, | |
| "loss": 4.1558, | |
| "step": 85300 | |
| }, | |
| { | |
| "epoch": 3.4158633654653814, | |
| "grad_norm": 0.5179737210273743, | |
| "learning_rate": 0.001, | |
| "loss": 4.1554, | |
| "step": 85400 | |
| }, | |
| { | |
| "epoch": 3.4198632054717812, | |
| "grad_norm": 0.451274037361145, | |
| "learning_rate": 0.001, | |
| "loss": 4.1521, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 3.423863045478181, | |
| "grad_norm": 0.46372953057289124, | |
| "learning_rate": 0.001, | |
| "loss": 4.1499, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 3.4278628854845805, | |
| "grad_norm": 0.44644874334335327, | |
| "learning_rate": 0.001, | |
| "loss": 4.1542, | |
| "step": 85700 | |
| }, | |
| { | |
| "epoch": 3.4318627254909804, | |
| "grad_norm": 0.45447978377342224, | |
| "learning_rate": 0.001, | |
| "loss": 4.1551, | |
| "step": 85800 | |
| }, | |
| { | |
| "epoch": 3.4358625654973802, | |
| "grad_norm": 0.47864270210266113, | |
| "learning_rate": 0.001, | |
| "loss": 4.1539, | |
| "step": 85900 | |
| }, | |
| { | |
| "epoch": 3.4398624055037796, | |
| "grad_norm": 0.48053839802742004, | |
| "learning_rate": 0.001, | |
| "loss": 4.1525, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 3.4438622455101795, | |
| "grad_norm": 0.42985475063323975, | |
| "learning_rate": 0.001, | |
| "loss": 4.1518, | |
| "step": 86100 | |
| }, | |
| { | |
| "epoch": 3.4478620855165794, | |
| "grad_norm": 0.4910486042499542, | |
| "learning_rate": 0.001, | |
| "loss": 4.1515, | |
| "step": 86200 | |
| }, | |
| { | |
| "epoch": 3.451861925522979, | |
| "grad_norm": 0.4845552146434784, | |
| "learning_rate": 0.001, | |
| "loss": 4.1533, | |
| "step": 86300 | |
| }, | |
| { | |
| "epoch": 3.455861765529379, | |
| "grad_norm": 0.42696115374565125, | |
| "learning_rate": 0.001, | |
| "loss": 4.1563, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 3.4598616055357785, | |
| "grad_norm": 0.45997726917266846, | |
| "learning_rate": 0.001, | |
| "loss": 4.1504, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 3.4638614455421783, | |
| "grad_norm": 0.47192636132240295, | |
| "learning_rate": 0.001, | |
| "loss": 4.1512, | |
| "step": 86600 | |
| }, | |
| { | |
| "epoch": 3.467861285548578, | |
| "grad_norm": 0.4781351387500763, | |
| "learning_rate": 0.001, | |
| "loss": 4.1568, | |
| "step": 86700 | |
| }, | |
| { | |
| "epoch": 3.4718611255549776, | |
| "grad_norm": 0.47357264161109924, | |
| "learning_rate": 0.001, | |
| "loss": 4.1512, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 3.4758609655613775, | |
| "grad_norm": 0.500704288482666, | |
| "learning_rate": 0.001, | |
| "loss": 4.1511, | |
| "step": 86900 | |
| }, | |
| { | |
| "epoch": 3.4798608055677773, | |
| "grad_norm": 0.5305373668670654, | |
| "learning_rate": 0.001, | |
| "loss": 4.1556, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 3.483860645574177, | |
| "grad_norm": 0.5612720251083374, | |
| "learning_rate": 0.001, | |
| "loss": 4.1538, | |
| "step": 87100 | |
| }, | |
| { | |
| "epoch": 3.4878604855805766, | |
| "grad_norm": 0.47861745953559875, | |
| "learning_rate": 0.001, | |
| "loss": 4.1543, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 3.4918603255869765, | |
| "grad_norm": 0.4624346196651459, | |
| "learning_rate": 0.001, | |
| "loss": 4.1479, | |
| "step": 87300 | |
| }, | |
| { | |
| "epoch": 3.4958601655933763, | |
| "grad_norm": 0.4761544167995453, | |
| "learning_rate": 0.001, | |
| "loss": 4.1535, | |
| "step": 87400 | |
| }, | |
| { | |
| "epoch": 3.499860005599776, | |
| "grad_norm": 0.4098283648490906, | |
| "learning_rate": 0.001, | |
| "loss": 4.1534, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 3.5038598456061756, | |
| "grad_norm": 0.4998922646045685, | |
| "learning_rate": 0.001, | |
| "loss": 4.1471, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 3.5078596856125754, | |
| "grad_norm": 0.4544636607170105, | |
| "learning_rate": 0.001, | |
| "loss": 4.1505, | |
| "step": 87700 | |
| }, | |
| { | |
| "epoch": 3.5118595256189753, | |
| "grad_norm": 0.4588119387626648, | |
| "learning_rate": 0.001, | |
| "loss": 4.1544, | |
| "step": 87800 | |
| }, | |
| { | |
| "epoch": 3.515859365625375, | |
| "grad_norm": 0.5002636313438416, | |
| "learning_rate": 0.001, | |
| "loss": 4.1502, | |
| "step": 87900 | |
| }, | |
| { | |
| "epoch": 3.5198592056317746, | |
| "grad_norm": 0.4344749450683594, | |
| "learning_rate": 0.001, | |
| "loss": 4.1506, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 3.5238590456381744, | |
| "grad_norm": 0.5412445664405823, | |
| "learning_rate": 0.001, | |
| "loss": 4.1506, | |
| "step": 88100 | |
| }, | |
| { | |
| "epoch": 3.5278588856445743, | |
| "grad_norm": 0.45813220739364624, | |
| "learning_rate": 0.001, | |
| "loss": 4.1501, | |
| "step": 88200 | |
| }, | |
| { | |
| "epoch": 3.5318587256509737, | |
| "grad_norm": 0.43678992986679077, | |
| "learning_rate": 0.001, | |
| "loss": 4.1528, | |
| "step": 88300 | |
| }, | |
| { | |
| "epoch": 3.5358585656573736, | |
| "grad_norm": 0.5613416433334351, | |
| "learning_rate": 0.001, | |
| "loss": 4.1513, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 3.5398584056637734, | |
| "grad_norm": 0.6545833945274353, | |
| "learning_rate": 0.001, | |
| "loss": 4.1531, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 3.5438582456701733, | |
| "grad_norm": 0.4009111821651459, | |
| "learning_rate": 0.001, | |
| "loss": 4.1476, | |
| "step": 88600 | |
| }, | |
| { | |
| "epoch": 3.547858085676573, | |
| "grad_norm": 0.5212500095367432, | |
| "learning_rate": 0.001, | |
| "loss": 4.1516, | |
| "step": 88700 | |
| }, | |
| { | |
| "epoch": 3.5518579256829725, | |
| "grad_norm": 0.4273383915424347, | |
| "learning_rate": 0.001, | |
| "loss": 4.1499, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 3.5558577656893724, | |
| "grad_norm": 0.4782946705818176, | |
| "learning_rate": 0.001, | |
| "loss": 4.1545, | |
| "step": 88900 | |
| }, | |
| { | |
| "epoch": 3.5598576056957723, | |
| "grad_norm": 0.41619250178337097, | |
| "learning_rate": 0.001, | |
| "loss": 4.1534, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 3.5638574457021717, | |
| "grad_norm": 0.4649808704853058, | |
| "learning_rate": 0.001, | |
| "loss": 4.154, | |
| "step": 89100 | |
| }, | |
| { | |
| "epoch": 3.5678572857085715, | |
| "grad_norm": 0.44941842555999756, | |
| "learning_rate": 0.001, | |
| "loss": 4.1506, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 3.5718571257149714, | |
| "grad_norm": 0.5763667821884155, | |
| "learning_rate": 0.001, | |
| "loss": 4.1527, | |
| "step": 89300 | |
| }, | |
| { | |
| "epoch": 3.5758569657213712, | |
| "grad_norm": 0.5648437142372131, | |
| "learning_rate": 0.001, | |
| "loss": 4.1563, | |
| "step": 89400 | |
| }, | |
| { | |
| "epoch": 3.579856805727771, | |
| "grad_norm": 0.5101251006126404, | |
| "learning_rate": 0.001, | |
| "loss": 4.1547, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 3.5838566457341705, | |
| "grad_norm": 0.434222936630249, | |
| "learning_rate": 0.001, | |
| "loss": 4.1537, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 3.5878564857405704, | |
| "grad_norm": 0.4032537341117859, | |
| "learning_rate": 0.001, | |
| "loss": 4.1495, | |
| "step": 89700 | |
| }, | |
| { | |
| "epoch": 3.5918563257469702, | |
| "grad_norm": 0.4581545889377594, | |
| "learning_rate": 0.001, | |
| "loss": 4.1474, | |
| "step": 89800 | |
| }, | |
| { | |
| "epoch": 3.5958561657533696, | |
| "grad_norm": 0.6091015338897705, | |
| "learning_rate": 0.001, | |
| "loss": 4.1492, | |
| "step": 89900 | |
| }, | |
| { | |
| "epoch": 3.5998560057597695, | |
| "grad_norm": 0.5093620419502258, | |
| "learning_rate": 0.001, | |
| "loss": 4.1496, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 3.6038558457661694, | |
| "grad_norm": 0.4329790771007538, | |
| "learning_rate": 0.001, | |
| "loss": 4.1494, | |
| "step": 90100 | |
| }, | |
| { | |
| "epoch": 3.607855685772569, | |
| "grad_norm": 0.5041528344154358, | |
| "learning_rate": 0.001, | |
| "loss": 4.154, | |
| "step": 90200 | |
| }, | |
| { | |
| "epoch": 3.611855525778969, | |
| "grad_norm": 0.3949008285999298, | |
| "learning_rate": 0.001, | |
| "loss": 4.1527, | |
| "step": 90300 | |
| }, | |
| { | |
| "epoch": 3.6158553657853685, | |
| "grad_norm": 0.40398308634757996, | |
| "learning_rate": 0.001, | |
| "loss": 4.1507, | |
| "step": 90400 | |
| }, | |
| { | |
| "epoch": 3.6198552057917683, | |
| "grad_norm": 0.4658049941062927, | |
| "learning_rate": 0.001, | |
| "loss": 4.1488, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 3.623855045798168, | |
| "grad_norm": 0.4312227666378021, | |
| "learning_rate": 0.001, | |
| "loss": 4.1503, | |
| "step": 90600 | |
| }, | |
| { | |
| "epoch": 3.6278548858045676, | |
| "grad_norm": 0.39520397782325745, | |
| "learning_rate": 0.001, | |
| "loss": 4.1486, | |
| "step": 90700 | |
| }, | |
| { | |
| "epoch": 3.6318547258109675, | |
| "grad_norm": 0.4321967363357544, | |
| "learning_rate": 0.001, | |
| "loss": 4.1485, | |
| "step": 90800 | |
| }, | |
| { | |
| "epoch": 3.6358545658173673, | |
| "grad_norm": 0.5055027604103088, | |
| "learning_rate": 0.001, | |
| "loss": 4.1507, | |
| "step": 90900 | |
| }, | |
| { | |
| "epoch": 3.639854405823767, | |
| "grad_norm": 0.44665881991386414, | |
| "learning_rate": 0.001, | |
| "loss": 4.149, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 3.643854245830167, | |
| "grad_norm": 0.48920923471450806, | |
| "learning_rate": 0.001, | |
| "loss": 4.1483, | |
| "step": 91100 | |
| }, | |
| { | |
| "epoch": 3.6478540858365665, | |
| "grad_norm": 0.5461563467979431, | |
| "learning_rate": 0.001, | |
| "loss": 4.1524, | |
| "step": 91200 | |
| }, | |
| { | |
| "epoch": 3.6518539258429663, | |
| "grad_norm": 0.47327253222465515, | |
| "learning_rate": 0.001, | |
| "loss": 4.1502, | |
| "step": 91300 | |
| }, | |
| { | |
| "epoch": 3.655853765849366, | |
| "grad_norm": 0.478876531124115, | |
| "learning_rate": 0.001, | |
| "loss": 4.1512, | |
| "step": 91400 | |
| }, | |
| { | |
| "epoch": 3.6598536058557656, | |
| "grad_norm": 0.42543497681617737, | |
| "learning_rate": 0.001, | |
| "loss": 4.1532, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 3.6638534458621654, | |
| "grad_norm": 0.4441344738006592, | |
| "learning_rate": 0.001, | |
| "loss": 4.1484, | |
| "step": 91600 | |
| }, | |
| { | |
| "epoch": 3.6678532858685653, | |
| "grad_norm": 0.4607570469379425, | |
| "learning_rate": 0.001, | |
| "loss": 4.1479, | |
| "step": 91700 | |
| }, | |
| { | |
| "epoch": 3.671853125874965, | |
| "grad_norm": 0.45845574140548706, | |
| "learning_rate": 0.001, | |
| "loss": 4.1518, | |
| "step": 91800 | |
| }, | |
| { | |
| "epoch": 3.675852965881365, | |
| "grad_norm": 0.5007107853889465, | |
| "learning_rate": 0.001, | |
| "loss": 4.1481, | |
| "step": 91900 | |
| }, | |
| { | |
| "epoch": 3.6798528058877644, | |
| "grad_norm": 0.5496152639389038, | |
| "learning_rate": 0.001, | |
| "loss": 4.1514, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 3.6838526458941643, | |
| "grad_norm": 0.49697238206863403, | |
| "learning_rate": 0.001, | |
| "loss": 4.1462, | |
| "step": 92100 | |
| }, | |
| { | |
| "epoch": 3.687852485900564, | |
| "grad_norm": 0.46701979637145996, | |
| "learning_rate": 0.001, | |
| "loss": 4.149, | |
| "step": 92200 | |
| }, | |
| { | |
| "epoch": 3.6918523259069636, | |
| "grad_norm": 0.4277583360671997, | |
| "learning_rate": 0.001, | |
| "loss": 4.1531, | |
| "step": 92300 | |
| }, | |
| { | |
| "epoch": 3.6958521659133634, | |
| "grad_norm": 0.4613393545150757, | |
| "learning_rate": 0.001, | |
| "loss": 4.1477, | |
| "step": 92400 | |
| }, | |
| { | |
| "epoch": 3.6998520059197633, | |
| "grad_norm": 0.5120405554771423, | |
| "learning_rate": 0.001, | |
| "loss": 4.1487, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 3.703851845926163, | |
| "grad_norm": 0.5001824498176575, | |
| "learning_rate": 0.001, | |
| "loss": 4.1518, | |
| "step": 92600 | |
| }, | |
| { | |
| "epoch": 3.7078516859325625, | |
| "grad_norm": 0.4255179762840271, | |
| "learning_rate": 0.001, | |
| "loss": 4.1511, | |
| "step": 92700 | |
| }, | |
| { | |
| "epoch": 3.7118515259389624, | |
| "grad_norm": 0.48289310932159424, | |
| "learning_rate": 0.001, | |
| "loss": 4.1509, | |
| "step": 92800 | |
| }, | |
| { | |
| "epoch": 3.7158513659453623, | |
| "grad_norm": 0.536251962184906, | |
| "learning_rate": 0.001, | |
| "loss": 4.1479, | |
| "step": 92900 | |
| }, | |
| { | |
| "epoch": 3.7198512059517617, | |
| "grad_norm": 0.5439473390579224, | |
| "learning_rate": 0.001, | |
| "loss": 4.1511, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 3.7238510459581615, | |
| "grad_norm": 0.4157579839229584, | |
| "learning_rate": 0.001, | |
| "loss": 4.1502, | |
| "step": 93100 | |
| }, | |
| { | |
| "epoch": 3.7278508859645614, | |
| "grad_norm": 0.4220696985721588, | |
| "learning_rate": 0.001, | |
| "loss": 4.1461, | |
| "step": 93200 | |
| }, | |
| { | |
| "epoch": 3.7318507259709612, | |
| "grad_norm": 0.4461278021335602, | |
| "learning_rate": 0.001, | |
| "loss": 4.1525, | |
| "step": 93300 | |
| }, | |
| { | |
| "epoch": 3.735850565977361, | |
| "grad_norm": 0.5958127379417419, | |
| "learning_rate": 0.001, | |
| "loss": 4.1529, | |
| "step": 93400 | |
| }, | |
| { | |
| "epoch": 3.7398504059837605, | |
| "grad_norm": 0.3829163908958435, | |
| "learning_rate": 0.001, | |
| "loss": 4.1517, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 3.7438502459901604, | |
| "grad_norm": 0.43108895421028137, | |
| "learning_rate": 0.001, | |
| "loss": 4.1471, | |
| "step": 93600 | |
| }, | |
| { | |
| "epoch": 3.74785008599656, | |
| "grad_norm": 0.5303171873092651, | |
| "learning_rate": 0.001, | |
| "loss": 4.1497, | |
| "step": 93700 | |
| }, | |
| { | |
| "epoch": 3.7518499260029596, | |
| "grad_norm": 0.6245208382606506, | |
| "learning_rate": 0.001, | |
| "loss": 4.1461, | |
| "step": 93800 | |
| }, | |
| { | |
| "epoch": 3.7558497660093595, | |
| "grad_norm": 0.4686441421508789, | |
| "learning_rate": 0.001, | |
| "loss": 4.1511, | |
| "step": 93900 | |
| }, | |
| { | |
| "epoch": 3.7598496060157593, | |
| "grad_norm": 0.5424903035163879, | |
| "learning_rate": 0.001, | |
| "loss": 4.1473, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 3.763849446022159, | |
| "grad_norm": 0.42156532406806946, | |
| "learning_rate": 0.001, | |
| "loss": 4.1499, | |
| "step": 94100 | |
| }, | |
| { | |
| "epoch": 3.767849286028559, | |
| "grad_norm": 0.4944685399532318, | |
| "learning_rate": 0.001, | |
| "loss": 4.1486, | |
| "step": 94200 | |
| }, | |
| { | |
| "epoch": 3.7718491260349585, | |
| "grad_norm": 0.46695804595947266, | |
| "learning_rate": 0.001, | |
| "loss": 4.1495, | |
| "step": 94300 | |
| }, | |
| { | |
| "epoch": 3.7758489660413583, | |
| "grad_norm": 0.4274919033050537, | |
| "learning_rate": 0.001, | |
| "loss": 4.149, | |
| "step": 94400 | |
| }, | |
| { | |
| "epoch": 3.779848806047758, | |
| "grad_norm": 0.5031160116195679, | |
| "learning_rate": 0.001, | |
| "loss": 4.1464, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 3.7838486460541576, | |
| "grad_norm": 0.4584692716598511, | |
| "learning_rate": 0.001, | |
| "loss": 4.1483, | |
| "step": 94600 | |
| }, | |
| { | |
| "epoch": 3.7878484860605575, | |
| "grad_norm": 0.4695260524749756, | |
| "learning_rate": 0.001, | |
| "loss": 4.1496, | |
| "step": 94700 | |
| }, | |
| { | |
| "epoch": 3.7918483260669573, | |
| "grad_norm": 0.4564335346221924, | |
| "learning_rate": 0.001, | |
| "loss": 4.1472, | |
| "step": 94800 | |
| }, | |
| { | |
| "epoch": 3.795848166073357, | |
| "grad_norm": 0.47409653663635254, | |
| "learning_rate": 0.001, | |
| "loss": 4.15, | |
| "step": 94900 | |
| }, | |
| { | |
| "epoch": 3.799848006079757, | |
| "grad_norm": 0.4272071123123169, | |
| "learning_rate": 0.001, | |
| "loss": 4.1465, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 3.8038478460861564, | |
| "grad_norm": 0.5578600764274597, | |
| "learning_rate": 0.001, | |
| "loss": 4.1486, | |
| "step": 95100 | |
| }, | |
| { | |
| "epoch": 3.8078476860925563, | |
| "grad_norm": 0.503226637840271, | |
| "learning_rate": 0.001, | |
| "loss": 4.1463, | |
| "step": 95200 | |
| }, | |
| { | |
| "epoch": 3.811847526098956, | |
| "grad_norm": 0.4407929480075836, | |
| "learning_rate": 0.001, | |
| "loss": 4.1503, | |
| "step": 95300 | |
| }, | |
| { | |
| "epoch": 3.8158473661053556, | |
| "grad_norm": 0.3911983370780945, | |
| "learning_rate": 0.001, | |
| "loss": 4.1478, | |
| "step": 95400 | |
| }, | |
| { | |
| "epoch": 3.8198472061117554, | |
| "grad_norm": 0.4753795564174652, | |
| "learning_rate": 0.001, | |
| "loss": 4.1465, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 3.8238470461181553, | |
| "grad_norm": 0.5648890733718872, | |
| "learning_rate": 0.001, | |
| "loss": 4.1505, | |
| "step": 95600 | |
| }, | |
| { | |
| "epoch": 3.827846886124555, | |
| "grad_norm": 0.5674782991409302, | |
| "learning_rate": 0.001, | |
| "loss": 4.15, | |
| "step": 95700 | |
| }, | |
| { | |
| "epoch": 3.831846726130955, | |
| "grad_norm": 0.45150429010391235, | |
| "learning_rate": 0.001, | |
| "loss": 4.1479, | |
| "step": 95800 | |
| }, | |
| { | |
| "epoch": 3.8358465661373544, | |
| "grad_norm": 0.44328397512435913, | |
| "learning_rate": 0.001, | |
| "loss": 4.1486, | |
| "step": 95900 | |
| }, | |
| { | |
| "epoch": 3.8398464061437543, | |
| "grad_norm": 0.5726007223129272, | |
| "learning_rate": 0.001, | |
| "loss": 4.1518, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 3.843846246150154, | |
| "grad_norm": 0.4201109707355499, | |
| "learning_rate": 0.001, | |
| "loss": 4.1511, | |
| "step": 96100 | |
| }, | |
| { | |
| "epoch": 3.8478460861565535, | |
| "grad_norm": 0.4402865469455719, | |
| "learning_rate": 0.001, | |
| "loss": 4.1442, | |
| "step": 96200 | |
| }, | |
| { | |
| "epoch": 3.8518459261629534, | |
| "grad_norm": 0.41505661606788635, | |
| "learning_rate": 0.001, | |
| "loss": 4.1523, | |
| "step": 96300 | |
| }, | |
| { | |
| "epoch": 3.8558457661693533, | |
| "grad_norm": 0.4434868097305298, | |
| "learning_rate": 0.001, | |
| "loss": 4.1508, | |
| "step": 96400 | |
| }, | |
| { | |
| "epoch": 3.859845606175753, | |
| "grad_norm": 0.44477754831314087, | |
| "learning_rate": 0.001, | |
| "loss": 4.1495, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 3.863845446182153, | |
| "grad_norm": 0.44696667790412903, | |
| "learning_rate": 0.001, | |
| "loss": 4.1491, | |
| "step": 96600 | |
| }, | |
| { | |
| "epoch": 3.8678452861885524, | |
| "grad_norm": 0.4718262553215027, | |
| "learning_rate": 0.001, | |
| "loss": 4.1483, | |
| "step": 96700 | |
| }, | |
| { | |
| "epoch": 3.8718451261949522, | |
| "grad_norm": 0.48947885632514954, | |
| "learning_rate": 0.001, | |
| "loss": 4.1486, | |
| "step": 96800 | |
| }, | |
| { | |
| "epoch": 3.875844966201352, | |
| "grad_norm": 0.42789411544799805, | |
| "learning_rate": 0.001, | |
| "loss": 4.1496, | |
| "step": 96900 | |
| }, | |
| { | |
| "epoch": 3.8798448062077515, | |
| "grad_norm": 0.46309271454811096, | |
| "learning_rate": 0.001, | |
| "loss": 4.1467, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 3.8838446462141514, | |
| "grad_norm": 0.5170295834541321, | |
| "learning_rate": 0.001, | |
| "loss": 4.1456, | |
| "step": 97100 | |
| }, | |
| { | |
| "epoch": 3.8878444862205512, | |
| "grad_norm": 0.4399054944515228, | |
| "learning_rate": 0.001, | |
| "loss": 4.1451, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 3.891844326226951, | |
| "grad_norm": 0.5585961937904358, | |
| "learning_rate": 0.001, | |
| "loss": 4.1487, | |
| "step": 97300 | |
| }, | |
| { | |
| "epoch": 3.8958441662333505, | |
| "grad_norm": 0.4246786832809448, | |
| "learning_rate": 0.001, | |
| "loss": 4.1491, | |
| "step": 97400 | |
| }, | |
| { | |
| "epoch": 3.8998440062397504, | |
| "grad_norm": 0.44548454880714417, | |
| "learning_rate": 0.001, | |
| "loss": 4.1479, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 3.90384384624615, | |
| "grad_norm": 0.43676796555519104, | |
| "learning_rate": 0.001, | |
| "loss": 4.1451, | |
| "step": 97600 | |
| }, | |
| { | |
| "epoch": 3.9078436862525496, | |
| "grad_norm": 0.494795024394989, | |
| "learning_rate": 0.001, | |
| "loss": 4.1469, | |
| "step": 97700 | |
| }, | |
| { | |
| "epoch": 3.9118435262589495, | |
| "grad_norm": 0.5050995349884033, | |
| "learning_rate": 0.001, | |
| "loss": 4.1478, | |
| "step": 97800 | |
| }, | |
| { | |
| "epoch": 3.9158433662653493, | |
| "grad_norm": 0.38782063126564026, | |
| "learning_rate": 0.001, | |
| "loss": 4.1497, | |
| "step": 97900 | |
| }, | |
| { | |
| "epoch": 3.919843206271749, | |
| "grad_norm": 0.4937835931777954, | |
| "learning_rate": 0.001, | |
| "loss": 4.1456, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 3.923843046278149, | |
| "grad_norm": 0.45296600461006165, | |
| "learning_rate": 0.001, | |
| "loss": 4.1495, | |
| "step": 98100 | |
| }, | |
| { | |
| "epoch": 3.9278428862845485, | |
| "grad_norm": 0.40895891189575195, | |
| "learning_rate": 0.001, | |
| "loss": 4.1471, | |
| "step": 98200 | |
| }, | |
| { | |
| "epoch": 3.9318427262909483, | |
| "grad_norm": 0.4339890480041504, | |
| "learning_rate": 0.001, | |
| "loss": 4.1465, | |
| "step": 98300 | |
| }, | |
| { | |
| "epoch": 3.935842566297348, | |
| "grad_norm": 0.395710825920105, | |
| "learning_rate": 0.001, | |
| "loss": 4.1454, | |
| "step": 98400 | |
| }, | |
| { | |
| "epoch": 3.9398424063037476, | |
| "grad_norm": 0.43159592151641846, | |
| "learning_rate": 0.001, | |
| "loss": 4.1436, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 3.9438422463101475, | |
| "grad_norm": 0.4928899109363556, | |
| "learning_rate": 0.001, | |
| "loss": 4.1465, | |
| "step": 98600 | |
| }, | |
| { | |
| "epoch": 3.9478420863165473, | |
| "grad_norm": 0.5097815990447998, | |
| "learning_rate": 0.001, | |
| "loss": 4.1464, | |
| "step": 98700 | |
| }, | |
| { | |
| "epoch": 3.951841926322947, | |
| "grad_norm": 0.4376477003097534, | |
| "learning_rate": 0.001, | |
| "loss": 4.1485, | |
| "step": 98800 | |
| }, | |
| { | |
| "epoch": 3.955841766329347, | |
| "grad_norm": 0.5436988472938538, | |
| "learning_rate": 0.001, | |
| "loss": 4.1502, | |
| "step": 98900 | |
| }, | |
| { | |
| "epoch": 3.9598416063357464, | |
| "grad_norm": 0.47442567348480225, | |
| "learning_rate": 0.001, | |
| "loss": 4.1452, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 3.9638414463421463, | |
| "grad_norm": 0.512935221195221, | |
| "learning_rate": 0.001, | |
| "loss": 4.1537, | |
| "step": 99100 | |
| }, | |
| { | |
| "epoch": 3.967841286348546, | |
| "grad_norm": 0.44137364625930786, | |
| "learning_rate": 0.001, | |
| "loss": 4.148, | |
| "step": 99200 | |
| }, | |
| { | |
| "epoch": 3.9718411263549456, | |
| "grad_norm": 0.4600997269153595, | |
| "learning_rate": 0.001, | |
| "loss": 4.1469, | |
| "step": 99300 | |
| }, | |
| { | |
| "epoch": 3.9758409663613454, | |
| "grad_norm": 0.44996100664138794, | |
| "learning_rate": 0.001, | |
| "loss": 4.1487, | |
| "step": 99400 | |
| }, | |
| { | |
| "epoch": 3.9798408063677453, | |
| "grad_norm": 0.42435500025749207, | |
| "learning_rate": 0.001, | |
| "loss": 4.148, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 3.983840646374145, | |
| "grad_norm": 0.47722697257995605, | |
| "learning_rate": 0.001, | |
| "loss": 4.1451, | |
| "step": 99600 | |
| }, | |
| { | |
| "epoch": 3.987840486380545, | |
| "grad_norm": 0.6269773244857788, | |
| "learning_rate": 0.001, | |
| "loss": 4.1473, | |
| "step": 99700 | |
| }, | |
| { | |
| "epoch": 3.9918403263869444, | |
| "grad_norm": 0.4844716787338257, | |
| "learning_rate": 0.001, | |
| "loss": 4.146, | |
| "step": 99800 | |
| }, | |
| { | |
| "epoch": 3.9958401663933443, | |
| "grad_norm": 0.43544551730155945, | |
| "learning_rate": 0.001, | |
| "loss": 4.1479, | |
| "step": 99900 | |
| }, | |
| { | |
| "epoch": 3.999840006399744, | |
| "grad_norm": 0.5435088872909546, | |
| "learning_rate": 0.001, | |
| "loss": 4.1454, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 4.0038398464061435, | |
| "grad_norm": 0.39157047867774963, | |
| "learning_rate": 0.001, | |
| "loss": 4.1453, | |
| "step": 100100 | |
| }, | |
| { | |
| "epoch": 4.007839686412543, | |
| "grad_norm": 0.42717739939689636, | |
| "learning_rate": 0.001, | |
| "loss": 4.1492, | |
| "step": 100200 | |
| }, | |
| { | |
| "epoch": 4.011839526418943, | |
| "grad_norm": 0.5479187965393066, | |
| "learning_rate": 0.001, | |
| "loss": 4.1474, | |
| "step": 100300 | |
| }, | |
| { | |
| "epoch": 4.015839366425343, | |
| "grad_norm": 0.39487773180007935, | |
| "learning_rate": 0.001, | |
| "loss": 4.148, | |
| "step": 100400 | |
| }, | |
| { | |
| "epoch": 4.019839206431743, | |
| "grad_norm": 0.49917787313461304, | |
| "learning_rate": 0.001, | |
| "loss": 4.1477, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 4.023839046438143, | |
| "grad_norm": 0.5411247611045837, | |
| "learning_rate": 0.001, | |
| "loss": 4.1453, | |
| "step": 100600 | |
| }, | |
| { | |
| "epoch": 4.027838886444542, | |
| "grad_norm": 0.4550989866256714, | |
| "learning_rate": 0.001, | |
| "loss": 4.1474, | |
| "step": 100700 | |
| }, | |
| { | |
| "epoch": 4.031838726450942, | |
| "grad_norm": 0.44234633445739746, | |
| "learning_rate": 0.001, | |
| "loss": 4.1453, | |
| "step": 100800 | |
| }, | |
| { | |
| "epoch": 4.0358385664573415, | |
| "grad_norm": 0.42147624492645264, | |
| "learning_rate": 0.001, | |
| "loss": 4.1424, | |
| "step": 100900 | |
| }, | |
| { | |
| "epoch": 4.039838406463741, | |
| "grad_norm": 0.43127307295799255, | |
| "learning_rate": 0.001, | |
| "loss": 4.1434, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 4.043838246470141, | |
| "grad_norm": 0.5709433555603027, | |
| "learning_rate": 0.001, | |
| "loss": 4.1443, | |
| "step": 101100 | |
| }, | |
| { | |
| "epoch": 4.047838086476541, | |
| "grad_norm": 0.5325762033462524, | |
| "learning_rate": 0.001, | |
| "loss": 4.1414, | |
| "step": 101200 | |
| }, | |
| { | |
| "epoch": 4.051837926482941, | |
| "grad_norm": 0.5265848636627197, | |
| "learning_rate": 0.001, | |
| "loss": 4.1492, | |
| "step": 101300 | |
| }, | |
| { | |
| "epoch": 4.055837766489341, | |
| "grad_norm": 0.4727579653263092, | |
| "learning_rate": 0.001, | |
| "loss": 4.1522, | |
| "step": 101400 | |
| }, | |
| { | |
| "epoch": 4.05983760649574, | |
| "grad_norm": 0.4549713432788849, | |
| "learning_rate": 0.001, | |
| "loss": 4.1404, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 4.06383744650214, | |
| "grad_norm": 0.4658414125442505, | |
| "learning_rate": 0.001, | |
| "loss": 4.1482, | |
| "step": 101600 | |
| }, | |
| { | |
| "epoch": 4.0678372865085395, | |
| "grad_norm": 0.4108069837093353, | |
| "learning_rate": 0.001, | |
| "loss": 4.1426, | |
| "step": 101700 | |
| }, | |
| { | |
| "epoch": 4.071837126514939, | |
| "grad_norm": 0.5544761419296265, | |
| "learning_rate": 0.001, | |
| "loss": 4.1447, | |
| "step": 101800 | |
| }, | |
| { | |
| "epoch": 4.075836966521339, | |
| "grad_norm": 0.3932327628135681, | |
| "learning_rate": 0.001, | |
| "loss": 4.1436, | |
| "step": 101900 | |
| }, | |
| { | |
| "epoch": 4.079836806527739, | |
| "grad_norm": 0.5421287417411804, | |
| "learning_rate": 0.001, | |
| "loss": 4.1473, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 4.083836646534139, | |
| "grad_norm": 0.4574621915817261, | |
| "learning_rate": 0.001, | |
| "loss": 4.1413, | |
| "step": 102100 | |
| }, | |
| { | |
| "epoch": 4.087836486540539, | |
| "grad_norm": 0.47430068254470825, | |
| "learning_rate": 0.001, | |
| "loss": 4.1469, | |
| "step": 102200 | |
| }, | |
| { | |
| "epoch": 4.091836326546938, | |
| "grad_norm": 0.4744085967540741, | |
| "learning_rate": 0.001, | |
| "loss": 4.1456, | |
| "step": 102300 | |
| }, | |
| { | |
| "epoch": 4.095836166553338, | |
| "grad_norm": 0.48127326369285583, | |
| "learning_rate": 0.001, | |
| "loss": 4.1443, | |
| "step": 102400 | |
| }, | |
| { | |
| "epoch": 4.0998360065597375, | |
| "grad_norm": 0.4618822932243347, | |
| "learning_rate": 0.001, | |
| "loss": 4.1474, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 4.103835846566137, | |
| "grad_norm": 0.43074139952659607, | |
| "learning_rate": 0.001, | |
| "loss": 4.1454, | |
| "step": 102600 | |
| }, | |
| { | |
| "epoch": 4.107835686572537, | |
| "grad_norm": 0.47091934084892273, | |
| "learning_rate": 0.001, | |
| "loss": 4.1447, | |
| "step": 102700 | |
| }, | |
| { | |
| "epoch": 4.111835526578937, | |
| "grad_norm": 0.3798442780971527, | |
| "learning_rate": 0.001, | |
| "loss": 4.1447, | |
| "step": 102800 | |
| }, | |
| { | |
| "epoch": 4.115835366585337, | |
| "grad_norm": 0.4601074755191803, | |
| "learning_rate": 0.001, | |
| "loss": 4.1504, | |
| "step": 102900 | |
| }, | |
| { | |
| "epoch": 4.119835206591737, | |
| "grad_norm": 0.43777865171432495, | |
| "learning_rate": 0.001, | |
| "loss": 4.1473, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 4.123835046598136, | |
| "grad_norm": 0.45747852325439453, | |
| "learning_rate": 0.001, | |
| "loss": 4.1429, | |
| "step": 103100 | |
| }, | |
| { | |
| "epoch": 4.127834886604536, | |
| "grad_norm": 0.5466395020484924, | |
| "learning_rate": 0.001, | |
| "loss": 4.1458, | |
| "step": 103200 | |
| }, | |
| { | |
| "epoch": 4.131834726610935, | |
| "grad_norm": 0.5395704507827759, | |
| "learning_rate": 0.001, | |
| "loss": 4.1456, | |
| "step": 103300 | |
| }, | |
| { | |
| "epoch": 4.135834566617335, | |
| "grad_norm": 0.4724808931350708, | |
| "learning_rate": 0.001, | |
| "loss": 4.1468, | |
| "step": 103400 | |
| }, | |
| { | |
| "epoch": 4.139834406623735, | |
| "grad_norm": 0.47559893131256104, | |
| "learning_rate": 0.001, | |
| "loss": 4.1452, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 4.143834246630135, | |
| "grad_norm": 0.4290676712989807, | |
| "learning_rate": 0.001, | |
| "loss": 4.1469, | |
| "step": 103600 | |
| }, | |
| { | |
| "epoch": 4.147834086636535, | |
| "grad_norm": 0.39940178394317627, | |
| "learning_rate": 0.001, | |
| "loss": 4.147, | |
| "step": 103700 | |
| }, | |
| { | |
| "epoch": 4.151833926642935, | |
| "grad_norm": 0.45378994941711426, | |
| "learning_rate": 0.001, | |
| "loss": 4.1441, | |
| "step": 103800 | |
| }, | |
| { | |
| "epoch": 4.155833766649334, | |
| "grad_norm": 0.46410059928894043, | |
| "learning_rate": 0.001, | |
| "loss": 4.1481, | |
| "step": 103900 | |
| }, | |
| { | |
| "epoch": 4.1598336066557335, | |
| "grad_norm": 0.45726585388183594, | |
| "learning_rate": 0.001, | |
| "loss": 4.1477, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 4.163833446662133, | |
| "grad_norm": 0.42764076590538025, | |
| "learning_rate": 0.001, | |
| "loss": 4.1468, | |
| "step": 104100 | |
| }, | |
| { | |
| "epoch": 4.167833286668533, | |
| "grad_norm": 0.47908028960227966, | |
| "learning_rate": 0.001, | |
| "loss": 4.1426, | |
| "step": 104200 | |
| }, | |
| { | |
| "epoch": 4.171833126674933, | |
| "grad_norm": 0.5179200172424316, | |
| "learning_rate": 0.001, | |
| "loss": 4.1402, | |
| "step": 104300 | |
| }, | |
| { | |
| "epoch": 4.175832966681333, | |
| "grad_norm": 0.46754130721092224, | |
| "learning_rate": 0.001, | |
| "loss": 4.146, | |
| "step": 104400 | |
| }, | |
| { | |
| "epoch": 4.179832806687733, | |
| "grad_norm": 0.45480966567993164, | |
| "learning_rate": 0.001, | |
| "loss": 4.1457, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 4.183832646694132, | |
| "grad_norm": 0.42622312903404236, | |
| "learning_rate": 0.001, | |
| "loss": 4.1406, | |
| "step": 104600 | |
| }, | |
| { | |
| "epoch": 4.187832486700532, | |
| "grad_norm": 0.37732553482055664, | |
| "learning_rate": 0.001, | |
| "loss": 4.1425, | |
| "step": 104700 | |
| }, | |
| { | |
| "epoch": 4.1918323267069315, | |
| "grad_norm": 0.5029783844947815, | |
| "learning_rate": 0.001, | |
| "loss": 4.1434, | |
| "step": 104800 | |
| }, | |
| { | |
| "epoch": 4.195832166713331, | |
| "grad_norm": 0.4873427152633667, | |
| "learning_rate": 0.001, | |
| "loss": 4.1432, | |
| "step": 104900 | |
| }, | |
| { | |
| "epoch": 4.199832006719731, | |
| "grad_norm": 0.4739370048046112, | |
| "learning_rate": 0.001, | |
| "loss": 4.1463, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 4.203831846726131, | |
| "grad_norm": 0.4919280409812927, | |
| "learning_rate": 0.001, | |
| "loss": 4.145, | |
| "step": 105100 | |
| }, | |
| { | |
| "epoch": 4.207831686732531, | |
| "grad_norm": 0.44847992062568665, | |
| "learning_rate": 0.001, | |
| "loss": 4.1447, | |
| "step": 105200 | |
| }, | |
| { | |
| "epoch": 4.211831526738931, | |
| "grad_norm": 0.5035785436630249, | |
| "learning_rate": 0.001, | |
| "loss": 4.1418, | |
| "step": 105300 | |
| }, | |
| { | |
| "epoch": 4.21583136674533, | |
| "grad_norm": 0.5161508321762085, | |
| "learning_rate": 0.001, | |
| "loss": 4.1436, | |
| "step": 105400 | |
| }, | |
| { | |
| "epoch": 4.21983120675173, | |
| "grad_norm": 0.44143009185791016, | |
| "learning_rate": 0.001, | |
| "loss": 4.1476, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 4.2238310467581295, | |
| "grad_norm": 0.38801082968711853, | |
| "learning_rate": 0.001, | |
| "loss": 4.1484, | |
| "step": 105600 | |
| }, | |
| { | |
| "epoch": 4.227830886764529, | |
| "grad_norm": 0.46244215965270996, | |
| "learning_rate": 0.001, | |
| "loss": 4.1457, | |
| "step": 105700 | |
| }, | |
| { | |
| "epoch": 4.231830726770929, | |
| "grad_norm": 0.4244415760040283, | |
| "learning_rate": 0.001, | |
| "loss": 4.1458, | |
| "step": 105800 | |
| }, | |
| { | |
| "epoch": 4.235830566777329, | |
| "grad_norm": 0.4295971095561981, | |
| "learning_rate": 0.001, | |
| "loss": 4.1489, | |
| "step": 105900 | |
| }, | |
| { | |
| "epoch": 4.239830406783729, | |
| "grad_norm": 0.4482729136943817, | |
| "learning_rate": 0.001, | |
| "loss": 4.1448, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 4.243830246790129, | |
| "grad_norm": 0.4174524247646332, | |
| "learning_rate": 0.001, | |
| "loss": 4.1432, | |
| "step": 106100 | |
| }, | |
| { | |
| "epoch": 4.247830086796528, | |
| "grad_norm": 0.37934377789497375, | |
| "learning_rate": 0.001, | |
| "loss": 4.1404, | |
| "step": 106200 | |
| }, | |
| { | |
| "epoch": 4.251829926802928, | |
| "grad_norm": 0.439449280500412, | |
| "learning_rate": 0.001, | |
| "loss": 4.1437, | |
| "step": 106300 | |
| }, | |
| { | |
| "epoch": 4.2558297668093275, | |
| "grad_norm": 0.42520901560783386, | |
| "learning_rate": 0.001, | |
| "loss": 4.1464, | |
| "step": 106400 | |
| }, | |
| { | |
| "epoch": 4.259829606815727, | |
| "grad_norm": 0.4702022671699524, | |
| "learning_rate": 0.001, | |
| "loss": 4.1434, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 4.263829446822127, | |
| "grad_norm": 0.4666096568107605, | |
| "learning_rate": 0.001, | |
| "loss": 4.1453, | |
| "step": 106600 | |
| }, | |
| { | |
| "epoch": 4.267829286828527, | |
| "grad_norm": 0.47754520177841187, | |
| "learning_rate": 0.001, | |
| "loss": 4.1441, | |
| "step": 106700 | |
| }, | |
| { | |
| "epoch": 4.271829126834927, | |
| "grad_norm": 0.438281387090683, | |
| "learning_rate": 0.001, | |
| "loss": 4.1477, | |
| "step": 106800 | |
| }, | |
| { | |
| "epoch": 4.275828966841327, | |
| "grad_norm": 0.4417981505393982, | |
| "learning_rate": 0.001, | |
| "loss": 4.1446, | |
| "step": 106900 | |
| }, | |
| { | |
| "epoch": 4.279828806847726, | |
| "grad_norm": 0.4685574173927307, | |
| "learning_rate": 0.001, | |
| "loss": 4.1458, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 4.283828646854126, | |
| "grad_norm": 0.53219074010849, | |
| "learning_rate": 0.001, | |
| "loss": 4.146, | |
| "step": 107100 | |
| }, | |
| { | |
| "epoch": 4.287828486860525, | |
| "grad_norm": 0.453630268573761, | |
| "learning_rate": 0.001, | |
| "loss": 4.1466, | |
| "step": 107200 | |
| }, | |
| { | |
| "epoch": 4.291828326866925, | |
| "grad_norm": 0.40888792276382446, | |
| "learning_rate": 0.001, | |
| "loss": 4.1471, | |
| "step": 107300 | |
| }, | |
| { | |
| "epoch": 4.295828166873325, | |
| "grad_norm": 0.48033514618873596, | |
| "learning_rate": 0.001, | |
| "loss": 4.146, | |
| "step": 107400 | |
| }, | |
| { | |
| "epoch": 4.299828006879725, | |
| "grad_norm": 0.4497097134590149, | |
| "learning_rate": 0.001, | |
| "loss": 4.1485, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 4.303827846886125, | |
| "grad_norm": 0.4512811601161957, | |
| "learning_rate": 0.001, | |
| "loss": 4.1461, | |
| "step": 107600 | |
| }, | |
| { | |
| "epoch": 4.307827686892525, | |
| "grad_norm": 0.40005770325660706, | |
| "learning_rate": 0.001, | |
| "loss": 4.1433, | |
| "step": 107700 | |
| }, | |
| { | |
| "epoch": 4.311827526898924, | |
| "grad_norm": 0.44940298795700073, | |
| "learning_rate": 0.001, | |
| "loss": 4.1433, | |
| "step": 107800 | |
| }, | |
| { | |
| "epoch": 4.3158273669053235, | |
| "grad_norm": 0.4794534742832184, | |
| "learning_rate": 0.001, | |
| "loss": 4.1471, | |
| "step": 107900 | |
| }, | |
| { | |
| "epoch": 4.319827206911723, | |
| "grad_norm": 0.5258973836898804, | |
| "learning_rate": 0.001, | |
| "loss": 4.1424, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 4.323827046918123, | |
| "grad_norm": 0.4339228868484497, | |
| "learning_rate": 0.001, | |
| "loss": 4.1466, | |
| "step": 108100 | |
| }, | |
| { | |
| "epoch": 4.327826886924523, | |
| "grad_norm": 0.41444161534309387, | |
| "learning_rate": 0.001, | |
| "loss": 4.1418, | |
| "step": 108200 | |
| }, | |
| { | |
| "epoch": 4.331826726930923, | |
| "grad_norm": 0.47487524151802063, | |
| "learning_rate": 0.001, | |
| "loss": 4.1484, | |
| "step": 108300 | |
| }, | |
| { | |
| "epoch": 4.335826566937323, | |
| "grad_norm": 0.39907756447792053, | |
| "learning_rate": 0.001, | |
| "loss": 4.1487, | |
| "step": 108400 | |
| }, | |
| { | |
| "epoch": 4.339826406943722, | |
| "grad_norm": 0.5254673957824707, | |
| "learning_rate": 0.001, | |
| "loss": 4.1447, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 4.343826246950122, | |
| "grad_norm": 0.45602646470069885, | |
| "learning_rate": 0.001, | |
| "loss": 4.145, | |
| "step": 108600 | |
| }, | |
| { | |
| "epoch": 4.3478260869565215, | |
| "grad_norm": 0.40609806776046753, | |
| "learning_rate": 0.001, | |
| "loss": 4.1415, | |
| "step": 108700 | |
| }, | |
| { | |
| "epoch": 4.351825926962921, | |
| "grad_norm": 0.5290670394897461, | |
| "learning_rate": 0.001, | |
| "loss": 4.1444, | |
| "step": 108800 | |
| }, | |
| { | |
| "epoch": 4.355825766969321, | |
| "grad_norm": 0.4068310558795929, | |
| "learning_rate": 0.001, | |
| "loss": 4.1416, | |
| "step": 108900 | |
| }, | |
| { | |
| "epoch": 4.359825606975721, | |
| "grad_norm": 0.44302281737327576, | |
| "learning_rate": 0.001, | |
| "loss": 4.1464, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 4.363825446982121, | |
| "grad_norm": 0.46425190567970276, | |
| "learning_rate": 0.001, | |
| "loss": 4.1456, | |
| "step": 109100 | |
| }, | |
| { | |
| "epoch": 4.367825286988521, | |
| "grad_norm": 0.4178661108016968, | |
| "learning_rate": 0.001, | |
| "loss": 4.1456, | |
| "step": 109200 | |
| }, | |
| { | |
| "epoch": 4.371825126994921, | |
| "grad_norm": 0.5556158423423767, | |
| "learning_rate": 0.001, | |
| "loss": 4.1431, | |
| "step": 109300 | |
| }, | |
| { | |
| "epoch": 4.37582496700132, | |
| "grad_norm": 0.4908580780029297, | |
| "learning_rate": 0.001, | |
| "loss": 4.1446, | |
| "step": 109400 | |
| }, | |
| { | |
| "epoch": 4.3798248070077195, | |
| "grad_norm": 0.4489957392215729, | |
| "learning_rate": 0.001, | |
| "loss": 4.1442, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 4.383824647014119, | |
| "grad_norm": 0.5880224108695984, | |
| "learning_rate": 0.001, | |
| "loss": 4.1451, | |
| "step": 109600 | |
| }, | |
| { | |
| "epoch": 4.387824487020519, | |
| "grad_norm": 0.5525696873664856, | |
| "learning_rate": 0.001, | |
| "loss": 4.1462, | |
| "step": 109700 | |
| }, | |
| { | |
| "epoch": 4.391824327026919, | |
| "grad_norm": 0.5361529588699341, | |
| "learning_rate": 0.001, | |
| "loss": 4.1425, | |
| "step": 109800 | |
| }, | |
| { | |
| "epoch": 4.395824167033319, | |
| "grad_norm": 0.48454704880714417, | |
| "learning_rate": 0.001, | |
| "loss": 4.1427, | |
| "step": 109900 | |
| }, | |
| { | |
| "epoch": 4.399824007039719, | |
| "grad_norm": 0.6087040305137634, | |
| "learning_rate": 0.001, | |
| "loss": 4.1437, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 4.403823847046118, | |
| "grad_norm": 0.4859618544578552, | |
| "learning_rate": 0.001, | |
| "loss": 4.1431, | |
| "step": 110100 | |
| }, | |
| { | |
| "epoch": 4.407823687052518, | |
| "grad_norm": 0.4525204598903656, | |
| "learning_rate": 0.001, | |
| "loss": 4.1456, | |
| "step": 110200 | |
| }, | |
| { | |
| "epoch": 4.4118235270589174, | |
| "grad_norm": 0.4616955816745758, | |
| "learning_rate": 0.001, | |
| "loss": 4.1424, | |
| "step": 110300 | |
| }, | |
| { | |
| "epoch": 4.415823367065317, | |
| "grad_norm": 0.5667575597763062, | |
| "learning_rate": 0.001, | |
| "loss": 4.145, | |
| "step": 110400 | |
| }, | |
| { | |
| "epoch": 4.419823207071717, | |
| "grad_norm": 0.527301013469696, | |
| "learning_rate": 0.001, | |
| "loss": 4.1432, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 4.423823047078117, | |
| "grad_norm": 0.5262458324432373, | |
| "learning_rate": 0.001, | |
| "loss": 4.1409, | |
| "step": 110600 | |
| }, | |
| { | |
| "epoch": 4.427822887084517, | |
| "grad_norm": 0.4203338325023651, | |
| "learning_rate": 0.001, | |
| "loss": 4.1443, | |
| "step": 110700 | |
| }, | |
| { | |
| "epoch": 4.431822727090917, | |
| "grad_norm": 0.463851660490036, | |
| "learning_rate": 0.001, | |
| "loss": 4.1458, | |
| "step": 110800 | |
| }, | |
| { | |
| "epoch": 4.435822567097316, | |
| "grad_norm": 0.49283987283706665, | |
| "learning_rate": 0.001, | |
| "loss": 4.1445, | |
| "step": 110900 | |
| }, | |
| { | |
| "epoch": 4.439822407103716, | |
| "grad_norm": 0.4866863787174225, | |
| "learning_rate": 0.001, | |
| "loss": 4.1411, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 4.443822247110115, | |
| "grad_norm": 0.5105231404304504, | |
| "learning_rate": 0.001, | |
| "loss": 4.1414, | |
| "step": 111100 | |
| }, | |
| { | |
| "epoch": 4.447822087116515, | |
| "grad_norm": 0.4239439070224762, | |
| "learning_rate": 0.001, | |
| "loss": 4.1396, | |
| "step": 111200 | |
| }, | |
| { | |
| "epoch": 4.451821927122915, | |
| "grad_norm": 0.4156837463378906, | |
| "learning_rate": 0.001, | |
| "loss": 4.1444, | |
| "step": 111300 | |
| }, | |
| { | |
| "epoch": 4.455821767129315, | |
| "grad_norm": 0.49761706590652466, | |
| "learning_rate": 0.001, | |
| "loss": 4.1413, | |
| "step": 111400 | |
| }, | |
| { | |
| "epoch": 4.459821607135715, | |
| "grad_norm": 0.4880112409591675, | |
| "learning_rate": 0.001, | |
| "loss": 4.1455, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 4.463821447142115, | |
| "grad_norm": 0.38512125611305237, | |
| "learning_rate": 0.001, | |
| "loss": 4.143, | |
| "step": 111600 | |
| }, | |
| { | |
| "epoch": 4.467821287148514, | |
| "grad_norm": 0.5444674491882324, | |
| "learning_rate": 0.001, | |
| "loss": 4.1451, | |
| "step": 111700 | |
| }, | |
| { | |
| "epoch": 4.4718211271549135, | |
| "grad_norm": 0.4214431643486023, | |
| "learning_rate": 0.001, | |
| "loss": 4.144, | |
| "step": 111800 | |
| }, | |
| { | |
| "epoch": 4.475820967161313, | |
| "grad_norm": 0.4738007187843323, | |
| "learning_rate": 0.001, | |
| "loss": 4.142, | |
| "step": 111900 | |
| }, | |
| { | |
| "epoch": 4.479820807167713, | |
| "grad_norm": 0.48944899439811707, | |
| "learning_rate": 0.001, | |
| "loss": 4.1418, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 4.483820647174113, | |
| "grad_norm": 0.4785895347595215, | |
| "learning_rate": 0.001, | |
| "loss": 4.1434, | |
| "step": 112100 | |
| }, | |
| { | |
| "epoch": 4.487820487180513, | |
| "grad_norm": 0.4528314173221588, | |
| "learning_rate": 0.001, | |
| "loss": 4.1382, | |
| "step": 112200 | |
| }, | |
| { | |
| "epoch": 4.491820327186913, | |
| "grad_norm": 0.5328041315078735, | |
| "learning_rate": 0.001, | |
| "loss": 4.1479, | |
| "step": 112300 | |
| }, | |
| { | |
| "epoch": 4.495820167193313, | |
| "grad_norm": 0.49370276927948, | |
| "learning_rate": 0.001, | |
| "loss": 4.142, | |
| "step": 112400 | |
| }, | |
| { | |
| "epoch": 4.499820007199712, | |
| "grad_norm": 0.4953836500644684, | |
| "learning_rate": 0.001, | |
| "loss": 4.1453, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 4.5038198472061115, | |
| "grad_norm": 0.4475695788860321, | |
| "learning_rate": 0.001, | |
| "loss": 4.1436, | |
| "step": 112600 | |
| }, | |
| { | |
| "epoch": 4.507819687212511, | |
| "grad_norm": 0.4099849760532379, | |
| "learning_rate": 0.001, | |
| "loss": 4.1432, | |
| "step": 112700 | |
| }, | |
| { | |
| "epoch": 4.511819527218911, | |
| "grad_norm": 0.45879650115966797, | |
| "learning_rate": 0.001, | |
| "loss": 4.1446, | |
| "step": 112800 | |
| }, | |
| { | |
| "epoch": 4.515819367225311, | |
| "grad_norm": 0.4368346929550171, | |
| "learning_rate": 0.001, | |
| "loss": 4.1445, | |
| "step": 112900 | |
| }, | |
| { | |
| "epoch": 4.519819207231711, | |
| "grad_norm": 0.4217066466808319, | |
| "learning_rate": 0.001, | |
| "loss": 4.1414, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 4.523819047238111, | |
| "grad_norm": 0.42964696884155273, | |
| "learning_rate": 0.001, | |
| "loss": 4.1418, | |
| "step": 113100 | |
| }, | |
| { | |
| "epoch": 4.527818887244511, | |
| "grad_norm": 0.38772520422935486, | |
| "learning_rate": 0.001, | |
| "loss": 4.1426, | |
| "step": 113200 | |
| }, | |
| { | |
| "epoch": 4.53181872725091, | |
| "grad_norm": 0.43408331274986267, | |
| "learning_rate": 0.001, | |
| "loss": 4.1463, | |
| "step": 113300 | |
| }, | |
| { | |
| "epoch": 4.5358185672573095, | |
| "grad_norm": 0.49354737997055054, | |
| "learning_rate": 0.001, | |
| "loss": 4.1416, | |
| "step": 113400 | |
| }, | |
| { | |
| "epoch": 4.539818407263709, | |
| "grad_norm": 0.43434685468673706, | |
| "learning_rate": 0.001, | |
| "loss": 4.146, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 4.543818247270109, | |
| "grad_norm": 0.49511098861694336, | |
| "learning_rate": 0.001, | |
| "loss": 4.145, | |
| "step": 113600 | |
| }, | |
| { | |
| "epoch": 4.547818087276509, | |
| "grad_norm": 0.46711239218711853, | |
| "learning_rate": 0.001, | |
| "loss": 4.1406, | |
| "step": 113700 | |
| }, | |
| { | |
| "epoch": 4.551817927282909, | |
| "grad_norm": 0.6184647083282471, | |
| "learning_rate": 0.001, | |
| "loss": 4.1421, | |
| "step": 113800 | |
| }, | |
| { | |
| "epoch": 4.555817767289309, | |
| "grad_norm": 0.587983250617981, | |
| "learning_rate": 0.001, | |
| "loss": 4.1425, | |
| "step": 113900 | |
| }, | |
| { | |
| "epoch": 4.559817607295708, | |
| "grad_norm": 0.42902278900146484, | |
| "learning_rate": 0.001, | |
| "loss": 4.1431, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 4.563817447302108, | |
| "grad_norm": 0.5256754159927368, | |
| "learning_rate": 0.001, | |
| "loss": 4.1401, | |
| "step": 114100 | |
| }, | |
| { | |
| "epoch": 4.5678172873085074, | |
| "grad_norm": 0.5793132781982422, | |
| "learning_rate": 0.001, | |
| "loss": 4.1413, | |
| "step": 114200 | |
| }, | |
| { | |
| "epoch": 4.571817127314907, | |
| "grad_norm": 0.47969871759414673, | |
| "learning_rate": 0.001, | |
| "loss": 4.1444, | |
| "step": 114300 | |
| }, | |
| { | |
| "epoch": 4.575816967321307, | |
| "grad_norm": 0.4756941795349121, | |
| "learning_rate": 0.001, | |
| "loss": 4.1446, | |
| "step": 114400 | |
| }, | |
| { | |
| "epoch": 4.579816807327707, | |
| "grad_norm": 0.5472639799118042, | |
| "learning_rate": 0.001, | |
| "loss": 4.1393, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 4.583816647334107, | |
| "grad_norm": 0.5101819634437561, | |
| "learning_rate": 0.001, | |
| "loss": 4.1423, | |
| "step": 114600 | |
| }, | |
| { | |
| "epoch": 4.587816487340507, | |
| "grad_norm": 0.47236403822898865, | |
| "learning_rate": 0.001, | |
| "loss": 4.1432, | |
| "step": 114700 | |
| }, | |
| { | |
| "epoch": 4.591816327346907, | |
| "grad_norm": 0.5631404519081116, | |
| "learning_rate": 0.001, | |
| "loss": 4.1446, | |
| "step": 114800 | |
| }, | |
| { | |
| "epoch": 4.595816167353306, | |
| "grad_norm": 0.4453705847263336, | |
| "learning_rate": 0.001, | |
| "loss": 4.1453, | |
| "step": 114900 | |
| }, | |
| { | |
| "epoch": 4.599816007359705, | |
| "grad_norm": 0.49028831720352173, | |
| "learning_rate": 0.001, | |
| "loss": 4.1403, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 4.603815847366105, | |
| "grad_norm": 0.4380851686000824, | |
| "learning_rate": 0.001, | |
| "loss": 4.1448, | |
| "step": 115100 | |
| }, | |
| { | |
| "epoch": 4.607815687372505, | |
| "grad_norm": 0.48736339807510376, | |
| "learning_rate": 0.001, | |
| "loss": 4.1445, | |
| "step": 115200 | |
| }, | |
| { | |
| "epoch": 4.611815527378905, | |
| "grad_norm": 0.43725523352622986, | |
| "learning_rate": 0.001, | |
| "loss": 4.1419, | |
| "step": 115300 | |
| }, | |
| { | |
| "epoch": 4.615815367385305, | |
| "grad_norm": 0.5325472354888916, | |
| "learning_rate": 0.001, | |
| "loss": 4.1444, | |
| "step": 115400 | |
| }, | |
| { | |
| "epoch": 4.619815207391705, | |
| "grad_norm": 0.4731554388999939, | |
| "learning_rate": 0.001, | |
| "loss": 4.1435, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 4.623815047398104, | |
| "grad_norm": 0.4858817160129547, | |
| "learning_rate": 0.001, | |
| "loss": 4.1461, | |
| "step": 115600 | |
| }, | |
| { | |
| "epoch": 4.6278148874045035, | |
| "grad_norm": 0.4575202763080597, | |
| "learning_rate": 0.001, | |
| "loss": 4.1439, | |
| "step": 115700 | |
| }, | |
| { | |
| "epoch": 4.631814727410903, | |
| "grad_norm": 0.44957438111305237, | |
| "learning_rate": 0.001, | |
| "loss": 4.1416, | |
| "step": 115800 | |
| }, | |
| { | |
| "epoch": 4.635814567417303, | |
| "grad_norm": 0.37792956829071045, | |
| "learning_rate": 0.001, | |
| "loss": 4.1451, | |
| "step": 115900 | |
| }, | |
| { | |
| "epoch": 4.639814407423703, | |
| "grad_norm": 0.4588908553123474, | |
| "learning_rate": 0.001, | |
| "loss": 4.1442, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 4.643814247430103, | |
| "grad_norm": 0.4842056334018707, | |
| "learning_rate": 0.001, | |
| "loss": 4.1445, | |
| "step": 116100 | |
| }, | |
| { | |
| "epoch": 4.647814087436503, | |
| "grad_norm": 0.44424474239349365, | |
| "learning_rate": 0.001, | |
| "loss": 4.1418, | |
| "step": 116200 | |
| }, | |
| { | |
| "epoch": 4.651813927442902, | |
| "grad_norm": 0.5730035901069641, | |
| "learning_rate": 0.001, | |
| "loss": 4.1472, | |
| "step": 116300 | |
| }, | |
| { | |
| "epoch": 4.655813767449302, | |
| "grad_norm": 0.5033777356147766, | |
| "learning_rate": 0.001, | |
| "loss": 4.1479, | |
| "step": 116400 | |
| }, | |
| { | |
| "epoch": 4.6598136074557015, | |
| "grad_norm": 0.49093732237815857, | |
| "learning_rate": 0.001, | |
| "loss": 4.1445, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 4.663813447462101, | |
| "grad_norm": 0.4203215539455414, | |
| "learning_rate": 0.001, | |
| "loss": 4.1416, | |
| "step": 116600 | |
| }, | |
| { | |
| "epoch": 4.667813287468501, | |
| "grad_norm": 0.5095353722572327, | |
| "learning_rate": 0.001, | |
| "loss": 4.1418, | |
| "step": 116700 | |
| }, | |
| { | |
| "epoch": 4.671813127474901, | |
| "grad_norm": 0.47804033756256104, | |
| "learning_rate": 0.001, | |
| "loss": 4.1421, | |
| "step": 116800 | |
| }, | |
| { | |
| "epoch": 4.675812967481301, | |
| "grad_norm": 0.4211972653865814, | |
| "learning_rate": 0.001, | |
| "loss": 4.1403, | |
| "step": 116900 | |
| }, | |
| { | |
| "epoch": 4.679812807487701, | |
| "grad_norm": 0.6364486813545227, | |
| "learning_rate": 0.001, | |
| "loss": 4.1456, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 4.683812647494101, | |
| "grad_norm": 0.525810718536377, | |
| "learning_rate": 0.001, | |
| "loss": 4.1436, | |
| "step": 117100 | |
| }, | |
| { | |
| "epoch": 4.6878124875005, | |
| "grad_norm": 0.4373362064361572, | |
| "learning_rate": 0.001, | |
| "loss": 4.1399, | |
| "step": 117200 | |
| }, | |
| { | |
| "epoch": 4.6918123275068995, | |
| "grad_norm": 0.4389038383960724, | |
| "learning_rate": 0.001, | |
| "loss": 4.1418, | |
| "step": 117300 | |
| }, | |
| { | |
| "epoch": 4.695812167513299, | |
| "grad_norm": 0.4124441146850586, | |
| "learning_rate": 0.001, | |
| "loss": 4.1432, | |
| "step": 117400 | |
| }, | |
| { | |
| "epoch": 4.699812007519699, | |
| "grad_norm": 0.4817601442337036, | |
| "learning_rate": 0.001, | |
| "loss": 4.1447, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 4.703811847526099, | |
| "grad_norm": 0.4811069071292877, | |
| "learning_rate": 0.001, | |
| "loss": 4.1391, | |
| "step": 117600 | |
| }, | |
| { | |
| "epoch": 4.707811687532499, | |
| "grad_norm": 0.40754398703575134, | |
| "learning_rate": 0.001, | |
| "loss": 4.1386, | |
| "step": 117700 | |
| }, | |
| { | |
| "epoch": 4.711811527538899, | |
| "grad_norm": 0.468555212020874, | |
| "learning_rate": 0.001, | |
| "loss": 4.1386, | |
| "step": 117800 | |
| }, | |
| { | |
| "epoch": 4.715811367545298, | |
| "grad_norm": 0.4321994185447693, | |
| "learning_rate": 0.001, | |
| "loss": 4.1404, | |
| "step": 117900 | |
| }, | |
| { | |
| "epoch": 4.719811207551698, | |
| "grad_norm": 0.46261945366859436, | |
| "learning_rate": 0.001, | |
| "loss": 4.1427, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 4.723811047558097, | |
| "grad_norm": 0.4801866412162781, | |
| "learning_rate": 0.001, | |
| "loss": 4.1424, | |
| "step": 118100 | |
| }, | |
| { | |
| "epoch": 4.727810887564497, | |
| "grad_norm": 0.4368051588535309, | |
| "learning_rate": 0.001, | |
| "loss": 4.145, | |
| "step": 118200 | |
| }, | |
| { | |
| "epoch": 4.731810727570897, | |
| "grad_norm": 0.45344650745391846, | |
| "learning_rate": 0.001, | |
| "loss": 4.1446, | |
| "step": 118300 | |
| }, | |
| { | |
| "epoch": 4.735810567577297, | |
| "grad_norm": 0.4357813000679016, | |
| "learning_rate": 0.001, | |
| "loss": 4.1395, | |
| "step": 118400 | |
| }, | |
| { | |
| "epoch": 4.739810407583697, | |
| "grad_norm": 0.37463030219078064, | |
| "learning_rate": 0.001, | |
| "loss": 4.1424, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 4.743810247590097, | |
| "grad_norm": 0.4514647126197815, | |
| "learning_rate": 0.001, | |
| "loss": 4.1448, | |
| "step": 118600 | |
| }, | |
| { | |
| "epoch": 4.747810087596497, | |
| "grad_norm": 0.47029080986976624, | |
| "learning_rate": 0.001, | |
| "loss": 4.1437, | |
| "step": 118700 | |
| }, | |
| { | |
| "epoch": 4.7518099276028956, | |
| "grad_norm": 0.39589250087738037, | |
| "learning_rate": 0.001, | |
| "loss": 4.1467, | |
| "step": 118800 | |
| }, | |
| { | |
| "epoch": 4.755809767609295, | |
| "grad_norm": 0.4465102553367615, | |
| "learning_rate": 0.001, | |
| "loss": 4.1438, | |
| "step": 118900 | |
| }, | |
| { | |
| "epoch": 4.759809607615695, | |
| "grad_norm": 0.4341897964477539, | |
| "learning_rate": 0.001, | |
| "loss": 4.1392, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 4.763809447622095, | |
| "grad_norm": 0.45617157220840454, | |
| "learning_rate": 0.001, | |
| "loss": 4.1406, | |
| "step": 119100 | |
| }, | |
| { | |
| "epoch": 4.767809287628495, | |
| "grad_norm": 0.4648855924606323, | |
| "learning_rate": 0.001, | |
| "loss": 4.1428, | |
| "step": 119200 | |
| }, | |
| { | |
| "epoch": 4.771809127634895, | |
| "grad_norm": 0.4467076361179352, | |
| "learning_rate": 0.001, | |
| "loss": 4.1423, | |
| "step": 119300 | |
| }, | |
| { | |
| "epoch": 4.775808967641295, | |
| "grad_norm": 0.45838209986686707, | |
| "learning_rate": 0.001, | |
| "loss": 4.1429, | |
| "step": 119400 | |
| }, | |
| { | |
| "epoch": 4.779808807647694, | |
| "grad_norm": 0.4301731288433075, | |
| "learning_rate": 0.001, | |
| "loss": 4.1427, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 4.7838086476540935, | |
| "grad_norm": 0.4407281279563904, | |
| "learning_rate": 0.001, | |
| "loss": 4.147, | |
| "step": 119600 | |
| }, | |
| { | |
| "epoch": 4.787808487660493, | |
| "grad_norm": 0.49695926904678345, | |
| "learning_rate": 0.001, | |
| "loss": 4.1426, | |
| "step": 119700 | |
| }, | |
| { | |
| "epoch": 4.791808327666893, | |
| "grad_norm": 0.43553370237350464, | |
| "learning_rate": 0.001, | |
| "loss": 4.1449, | |
| "step": 119800 | |
| }, | |
| { | |
| "epoch": 4.795808167673293, | |
| "grad_norm": 0.4836173355579376, | |
| "learning_rate": 0.001, | |
| "loss": 4.1454, | |
| "step": 119900 | |
| }, | |
| { | |
| "epoch": 4.799808007679693, | |
| "grad_norm": 0.4971003234386444, | |
| "learning_rate": 0.001, | |
| "loss": 4.1433, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 4.803807847686093, | |
| "grad_norm": 0.47055745124816895, | |
| "learning_rate": 0.001, | |
| "loss": 4.1423, | |
| "step": 120100 | |
| }, | |
| { | |
| "epoch": 4.807807687692493, | |
| "grad_norm": 0.39940592646598816, | |
| "learning_rate": 0.001, | |
| "loss": 4.1407, | |
| "step": 120200 | |
| }, | |
| { | |
| "epoch": 4.8118075276988925, | |
| "grad_norm": 0.4526260495185852, | |
| "learning_rate": 0.001, | |
| "loss": 4.1399, | |
| "step": 120300 | |
| }, | |
| { | |
| "epoch": 4.8158073677052915, | |
| "grad_norm": 0.5053595304489136, | |
| "learning_rate": 0.001, | |
| "loss": 4.1401, | |
| "step": 120400 | |
| }, | |
| { | |
| "epoch": 4.819807207711691, | |
| "grad_norm": 0.4834200441837311, | |
| "learning_rate": 0.001, | |
| "loss": 4.1419, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 4.823807047718091, | |
| "grad_norm": 0.5198436379432678, | |
| "learning_rate": 0.001, | |
| "loss": 4.1421, | |
| "step": 120600 | |
| }, | |
| { | |
| "epoch": 4.827806887724491, | |
| "grad_norm": 0.46774643659591675, | |
| "learning_rate": 0.001, | |
| "loss": 4.1447, | |
| "step": 120700 | |
| }, | |
| { | |
| "epoch": 4.831806727730891, | |
| "grad_norm": 0.4808708429336548, | |
| "learning_rate": 0.001, | |
| "loss": 4.1391, | |
| "step": 120800 | |
| }, | |
| { | |
| "epoch": 4.835806567737291, | |
| "grad_norm": 0.46363064646720886, | |
| "learning_rate": 0.001, | |
| "loss": 4.1426, | |
| "step": 120900 | |
| }, | |
| { | |
| "epoch": 4.839806407743691, | |
| "grad_norm": 0.4087159335613251, | |
| "learning_rate": 0.001, | |
| "loss": 4.1421, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 4.84380624775009, | |
| "grad_norm": 0.47745776176452637, | |
| "learning_rate": 0.001, | |
| "loss": 4.1419, | |
| "step": 121100 | |
| }, | |
| { | |
| "epoch": 4.8478060877564895, | |
| "grad_norm": 0.4563154876232147, | |
| "learning_rate": 0.001, | |
| "loss": 4.1403, | |
| "step": 121200 | |
| }, | |
| { | |
| "epoch": 4.851805927762889, | |
| "grad_norm": 0.43224015831947327, | |
| "learning_rate": 0.001, | |
| "loss": 4.1436, | |
| "step": 121300 | |
| }, | |
| { | |
| "epoch": 4.855805767769289, | |
| "grad_norm": 0.4743672013282776, | |
| "learning_rate": 0.001, | |
| "loss": 4.1379, | |
| "step": 121400 | |
| }, | |
| { | |
| "epoch": 4.859805607775689, | |
| "grad_norm": 0.44347378611564636, | |
| "learning_rate": 0.001, | |
| "loss": 4.1418, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 4.863805447782089, | |
| "grad_norm": 0.45574894547462463, | |
| "learning_rate": 0.001, | |
| "loss": 4.1465, | |
| "step": 121600 | |
| }, | |
| { | |
| "epoch": 4.867805287788489, | |
| "grad_norm": 0.46385806798934937, | |
| "learning_rate": 0.001, | |
| "loss": 4.1386, | |
| "step": 121700 | |
| }, | |
| { | |
| "epoch": 4.871805127794888, | |
| "grad_norm": 0.42676347494125366, | |
| "learning_rate": 0.001, | |
| "loss": 4.1395, | |
| "step": 121800 | |
| }, | |
| { | |
| "epoch": 4.875804967801288, | |
| "grad_norm": 0.5199492573738098, | |
| "learning_rate": 0.001, | |
| "loss": 4.1381, | |
| "step": 121900 | |
| }, | |
| { | |
| "epoch": 4.879804807807687, | |
| "grad_norm": 0.4669468104839325, | |
| "learning_rate": 0.001, | |
| "loss": 4.1418, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 4.883804647814087, | |
| "grad_norm": 0.5818643569946289, | |
| "learning_rate": 0.001, | |
| "loss": 4.1418, | |
| "step": 122100 | |
| }, | |
| { | |
| "epoch": 4.887804487820487, | |
| "grad_norm": 0.5275595784187317, | |
| "learning_rate": 0.001, | |
| "loss": 4.1439, | |
| "step": 122200 | |
| }, | |
| { | |
| "epoch": 4.891804327826887, | |
| "grad_norm": 0.4505153000354767, | |
| "learning_rate": 0.001, | |
| "loss": 4.1436, | |
| "step": 122300 | |
| }, | |
| { | |
| "epoch": 4.895804167833287, | |
| "grad_norm": 0.4731706380844116, | |
| "learning_rate": 0.001, | |
| "loss": 4.1446, | |
| "step": 122400 | |
| }, | |
| { | |
| "epoch": 4.899804007839687, | |
| "grad_norm": 0.5030878186225891, | |
| "learning_rate": 0.001, | |
| "loss": 4.1374, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 4.903803847846087, | |
| "grad_norm": 0.4733423888683319, | |
| "learning_rate": 0.001, | |
| "loss": 4.1406, | |
| "step": 122600 | |
| }, | |
| { | |
| "epoch": 4.9078036878524856, | |
| "grad_norm": 0.5757489204406738, | |
| "learning_rate": 0.001, | |
| "loss": 4.1407, | |
| "step": 122700 | |
| }, | |
| { | |
| "epoch": 4.911803527858885, | |
| "grad_norm": 0.5101374983787537, | |
| "learning_rate": 0.001, | |
| "loss": 4.1386, | |
| "step": 122800 | |
| }, | |
| { | |
| "epoch": 4.915803367865285, | |
| "grad_norm": 0.467282235622406, | |
| "learning_rate": 0.001, | |
| "loss": 4.1456, | |
| "step": 122900 | |
| }, | |
| { | |
| "epoch": 4.919803207871685, | |
| "grad_norm": 0.4633881449699402, | |
| "learning_rate": 0.001, | |
| "loss": 4.1387, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 4.923803047878085, | |
| "grad_norm": 0.36808955669403076, | |
| "learning_rate": 0.001, | |
| "loss": 4.143, | |
| "step": 123100 | |
| }, | |
| { | |
| "epoch": 4.927802887884485, | |
| "grad_norm": 0.4221063554286957, | |
| "learning_rate": 0.001, | |
| "loss": 4.1463, | |
| "step": 123200 | |
| }, | |
| { | |
| "epoch": 4.931802727890885, | |
| "grad_norm": 0.4532271921634674, | |
| "learning_rate": 0.001, | |
| "loss": 4.1423, | |
| "step": 123300 | |
| }, | |
| { | |
| "epoch": 4.935802567897284, | |
| "grad_norm": 0.4424666166305542, | |
| "learning_rate": 0.001, | |
| "loss": 4.141, | |
| "step": 123400 | |
| }, | |
| { | |
| "epoch": 4.9398024079036835, | |
| "grad_norm": 0.47042563557624817, | |
| "learning_rate": 0.001, | |
| "loss": 4.1386, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 4.943802247910083, | |
| "grad_norm": 0.486246258020401, | |
| "learning_rate": 0.001, | |
| "loss": 4.1393, | |
| "step": 123600 | |
| }, | |
| { | |
| "epoch": 4.947802087916483, | |
| "grad_norm": 0.4523676633834839, | |
| "learning_rate": 0.001, | |
| "loss": 4.1371, | |
| "step": 123700 | |
| }, | |
| { | |
| "epoch": 4.951801927922883, | |
| "grad_norm": 0.5111677646636963, | |
| "learning_rate": 0.001, | |
| "loss": 4.1386, | |
| "step": 123800 | |
| }, | |
| { | |
| "epoch": 4.955801767929283, | |
| "grad_norm": 0.47272786498069763, | |
| "learning_rate": 0.001, | |
| "loss": 4.138, | |
| "step": 123900 | |
| }, | |
| { | |
| "epoch": 4.959801607935683, | |
| "grad_norm": 0.46790051460266113, | |
| "learning_rate": 0.001, | |
| "loss": 4.14, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 4.963801447942083, | |
| "grad_norm": 0.4354240894317627, | |
| "learning_rate": 0.001, | |
| "loss": 4.1445, | |
| "step": 124100 | |
| }, | |
| { | |
| "epoch": 4.9678012879484825, | |
| "grad_norm": 0.42303115129470825, | |
| "learning_rate": 0.001, | |
| "loss": 4.1405, | |
| "step": 124200 | |
| }, | |
| { | |
| "epoch": 4.9718011279548815, | |
| "grad_norm": 0.44789764285087585, | |
| "learning_rate": 0.001, | |
| "loss": 4.1383, | |
| "step": 124300 | |
| }, | |
| { | |
| "epoch": 4.975800967961281, | |
| "grad_norm": 0.46547091007232666, | |
| "learning_rate": 0.001, | |
| "loss": 4.1386, | |
| "step": 124400 | |
| }, | |
| { | |
| "epoch": 4.979800807967681, | |
| "grad_norm": 0.5278778076171875, | |
| "learning_rate": 0.001, | |
| "loss": 4.1391, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 4.983800647974081, | |
| "grad_norm": 0.503690779209137, | |
| "learning_rate": 0.001, | |
| "loss": 4.1416, | |
| "step": 124600 | |
| }, | |
| { | |
| "epoch": 4.987800487980481, | |
| "grad_norm": 0.44487857818603516, | |
| "learning_rate": 0.001, | |
| "loss": 4.1377, | |
| "step": 124700 | |
| }, | |
| { | |
| "epoch": 4.991800327986881, | |
| "grad_norm": 0.5172649621963501, | |
| "learning_rate": 0.001, | |
| "loss": 4.137, | |
| "step": 124800 | |
| }, | |
| { | |
| "epoch": 4.995800167993281, | |
| "grad_norm": 0.39961203932762146, | |
| "learning_rate": 0.001, | |
| "loss": 4.1397, | |
| "step": 124900 | |
| }, | |
| { | |
| "epoch": 4.99980000799968, | |
| "grad_norm": 0.41589173674583435, | |
| "learning_rate": 0.001, | |
| "loss": 4.1411, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 5.0037998480060795, | |
| "grad_norm": 0.39197656512260437, | |
| "learning_rate": 0.001, | |
| "loss": 4.1376, | |
| "step": 125100 | |
| }, | |
| { | |
| "epoch": 5.007799688012479, | |
| "grad_norm": 0.4566977322101593, | |
| "learning_rate": 0.001, | |
| "loss": 4.1384, | |
| "step": 125200 | |
| }, | |
| { | |
| "epoch": 5.011799528018879, | |
| "grad_norm": 0.54954594373703, | |
| "learning_rate": 0.001, | |
| "loss": 4.1395, | |
| "step": 125300 | |
| }, | |
| { | |
| "epoch": 5.015799368025279, | |
| "grad_norm": 0.4543614983558655, | |
| "learning_rate": 0.001, | |
| "loss": 4.1394, | |
| "step": 125400 | |
| }, | |
| { | |
| "epoch": 5.019799208031679, | |
| "grad_norm": 0.5545991063117981, | |
| "learning_rate": 0.001, | |
| "loss": 4.1405, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 5.023799048038079, | |
| "grad_norm": 0.4615522027015686, | |
| "learning_rate": 0.001, | |
| "loss": 4.139, | |
| "step": 125600 | |
| }, | |
| { | |
| "epoch": 5.027798888044479, | |
| "grad_norm": 0.4874088168144226, | |
| "learning_rate": 0.001, | |
| "loss": 4.139, | |
| "step": 125700 | |
| }, | |
| { | |
| "epoch": 5.031798728050878, | |
| "grad_norm": 0.48862114548683167, | |
| "learning_rate": 0.001, | |
| "loss": 4.1366, | |
| "step": 125800 | |
| }, | |
| { | |
| "epoch": 5.035798568057277, | |
| "grad_norm": 0.5121699571609497, | |
| "learning_rate": 0.001, | |
| "loss": 4.1384, | |
| "step": 125900 | |
| }, | |
| { | |
| "epoch": 5.039798408063677, | |
| "grad_norm": 0.4240550398826599, | |
| "learning_rate": 0.001, | |
| "loss": 4.1401, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 5.043798248070077, | |
| "grad_norm": 0.4307209551334381, | |
| "learning_rate": 0.001, | |
| "loss": 4.1408, | |
| "step": 126100 | |
| }, | |
| { | |
| "epoch": 5.047798088076477, | |
| "grad_norm": 0.5086374878883362, | |
| "learning_rate": 0.001, | |
| "loss": 4.1376, | |
| "step": 126200 | |
| }, | |
| { | |
| "epoch": 5.051797928082877, | |
| "grad_norm": 0.5460554361343384, | |
| "learning_rate": 0.001, | |
| "loss": 4.1401, | |
| "step": 126300 | |
| }, | |
| { | |
| "epoch": 5.055797768089277, | |
| "grad_norm": 0.4712692201137543, | |
| "learning_rate": 0.001, | |
| "loss": 4.1368, | |
| "step": 126400 | |
| }, | |
| { | |
| "epoch": 5.059797608095677, | |
| "grad_norm": 0.4204212725162506, | |
| "learning_rate": 0.001, | |
| "loss": 4.1383, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 5.0637974481020755, | |
| "grad_norm": 0.4033453166484833, | |
| "learning_rate": 0.001, | |
| "loss": 4.1389, | |
| "step": 126600 | |
| }, | |
| { | |
| "epoch": 5.067797288108475, | |
| "grad_norm": 0.4524138569831848, | |
| "learning_rate": 0.001, | |
| "loss": 4.1416, | |
| "step": 126700 | |
| }, | |
| { | |
| "epoch": 5.071797128114875, | |
| "grad_norm": 0.5840047597885132, | |
| "learning_rate": 0.001, | |
| "loss": 4.1408, | |
| "step": 126800 | |
| }, | |
| { | |
| "epoch": 5.075796968121275, | |
| "grad_norm": 0.41969379782676697, | |
| "learning_rate": 0.001, | |
| "loss": 4.1396, | |
| "step": 126900 | |
| }, | |
| { | |
| "epoch": 5.079796808127675, | |
| "grad_norm": 0.56402987241745, | |
| "learning_rate": 0.001, | |
| "loss": 4.1362, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 5.083796648134075, | |
| "grad_norm": 0.3998129069805145, | |
| "learning_rate": 0.001, | |
| "loss": 4.1392, | |
| "step": 127100 | |
| }, | |
| { | |
| "epoch": 5.087796488140475, | |
| "grad_norm": 0.443665474653244, | |
| "learning_rate": 0.001, | |
| "loss": 4.1392, | |
| "step": 127200 | |
| }, | |
| { | |
| "epoch": 5.0917963281468746, | |
| "grad_norm": 0.4244501292705536, | |
| "learning_rate": 0.001, | |
| "loss": 4.1396, | |
| "step": 127300 | |
| }, | |
| { | |
| "epoch": 5.0957961681532735, | |
| "grad_norm": 0.5381417274475098, | |
| "learning_rate": 0.001, | |
| "loss": 4.1352, | |
| "step": 127400 | |
| }, | |
| { | |
| "epoch": 5.099796008159673, | |
| "grad_norm": 0.4484384059906006, | |
| "learning_rate": 0.001, | |
| "loss": 4.1416, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 5.103795848166073, | |
| "grad_norm": 0.4542734920978546, | |
| "learning_rate": 0.001, | |
| "loss": 4.1358, | |
| "step": 127600 | |
| }, | |
| { | |
| "epoch": 5.107795688172473, | |
| "grad_norm": 0.5095553398132324, | |
| "learning_rate": 0.001, | |
| "loss": 4.1385, | |
| "step": 127700 | |
| }, | |
| { | |
| "epoch": 5.111795528178873, | |
| "grad_norm": 0.4590986669063568, | |
| "learning_rate": 0.001, | |
| "loss": 4.1381, | |
| "step": 127800 | |
| }, | |
| { | |
| "epoch": 5.115795368185273, | |
| "grad_norm": 0.38399380445480347, | |
| "learning_rate": 0.001, | |
| "loss": 4.1367, | |
| "step": 127900 | |
| }, | |
| { | |
| "epoch": 5.119795208191673, | |
| "grad_norm": 0.3857038915157318, | |
| "learning_rate": 0.001, | |
| "loss": 4.1431, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 5.1237950481980725, | |
| "grad_norm": 0.49884679913520813, | |
| "learning_rate": 0.001, | |
| "loss": 4.138, | |
| "step": 128100 | |
| }, | |
| { | |
| "epoch": 5.1277948882044715, | |
| "grad_norm": 0.5274735689163208, | |
| "learning_rate": 0.001, | |
| "loss": 4.1409, | |
| "step": 128200 | |
| }, | |
| { | |
| "epoch": 5.131794728210871, | |
| "grad_norm": 0.4781200587749481, | |
| "learning_rate": 0.001, | |
| "loss": 4.1354, | |
| "step": 128300 | |
| }, | |
| { | |
| "epoch": 5.135794568217271, | |
| "grad_norm": 0.37105411291122437, | |
| "learning_rate": 0.001, | |
| "loss": 4.1379, | |
| "step": 128400 | |
| }, | |
| { | |
| "epoch": 5.139794408223671, | |
| "grad_norm": 0.4230349063873291, | |
| "learning_rate": 0.001, | |
| "loss": 4.1411, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 5.143794248230071, | |
| "grad_norm": 0.3767766058444977, | |
| "learning_rate": 0.001, | |
| "loss": 4.1417, | |
| "step": 128600 | |
| }, | |
| { | |
| "epoch": 5.147794088236471, | |
| "grad_norm": 0.4571896195411682, | |
| "learning_rate": 0.001, | |
| "loss": 4.1464, | |
| "step": 128700 | |
| }, | |
| { | |
| "epoch": 5.151793928242871, | |
| "grad_norm": 0.4790409803390503, | |
| "learning_rate": 0.001, | |
| "loss": 4.1372, | |
| "step": 128800 | |
| }, | |
| { | |
| "epoch": 5.15579376824927, | |
| "grad_norm": 0.492097407579422, | |
| "learning_rate": 0.001, | |
| "loss": 4.1411, | |
| "step": 128900 | |
| }, | |
| { | |
| "epoch": 5.1597936082556695, | |
| "grad_norm": 0.4251415729522705, | |
| "learning_rate": 0.001, | |
| "loss": 4.1376, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 5.163793448262069, | |
| "grad_norm": 0.4291365146636963, | |
| "learning_rate": 0.001, | |
| "loss": 4.1416, | |
| "step": 129100 | |
| }, | |
| { | |
| "epoch": 5.167793288268469, | |
| "grad_norm": 0.4996872544288635, | |
| "learning_rate": 0.001, | |
| "loss": 4.1397, | |
| "step": 129200 | |
| }, | |
| { | |
| "epoch": 5.171793128274869, | |
| "grad_norm": 0.43896353244781494, | |
| "learning_rate": 0.001, | |
| "loss": 4.1392, | |
| "step": 129300 | |
| }, | |
| { | |
| "epoch": 5.175792968281269, | |
| "grad_norm": 0.4306037127971649, | |
| "learning_rate": 0.001, | |
| "loss": 4.1374, | |
| "step": 129400 | |
| }, | |
| { | |
| "epoch": 5.179792808287669, | |
| "grad_norm": 0.45990708470344543, | |
| "learning_rate": 0.001, | |
| "loss": 4.1381, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 5.183792648294069, | |
| "grad_norm": 0.40390530228614807, | |
| "learning_rate": 0.001, | |
| "loss": 4.1375, | |
| "step": 129600 | |
| }, | |
| { | |
| "epoch": 5.187792488300468, | |
| "grad_norm": 0.48158109188079834, | |
| "learning_rate": 0.001, | |
| "loss": 4.1377, | |
| "step": 129700 | |
| }, | |
| { | |
| "epoch": 5.191792328306867, | |
| "grad_norm": 0.4878210425376892, | |
| "learning_rate": 0.001, | |
| "loss": 4.1376, | |
| "step": 129800 | |
| }, | |
| { | |
| "epoch": 5.195792168313267, | |
| "grad_norm": 0.4516715407371521, | |
| "learning_rate": 0.001, | |
| "loss": 4.1418, | |
| "step": 129900 | |
| }, | |
| { | |
| "epoch": 5.199792008319667, | |
| "grad_norm": 0.41719183325767517, | |
| "learning_rate": 0.001, | |
| "loss": 4.142, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 5.203791848326067, | |
| "grad_norm": 0.4469333291053772, | |
| "learning_rate": 0.001, | |
| "loss": 4.14, | |
| "step": 130100 | |
| }, | |
| { | |
| "epoch": 5.207791688332467, | |
| "grad_norm": 0.45951905846595764, | |
| "learning_rate": 0.001, | |
| "loss": 4.1382, | |
| "step": 130200 | |
| }, | |
| { | |
| "epoch": 5.211791528338867, | |
| "grad_norm": 0.3892884850502014, | |
| "learning_rate": 0.001, | |
| "loss": 4.1367, | |
| "step": 130300 | |
| }, | |
| { | |
| "epoch": 5.215791368345267, | |
| "grad_norm": 0.49934664368629456, | |
| "learning_rate": 0.001, | |
| "loss": 4.14, | |
| "step": 130400 | |
| }, | |
| { | |
| "epoch": 5.2197912083516655, | |
| "grad_norm": 0.3898753523826599, | |
| "learning_rate": 0.001, | |
| "loss": 4.1423, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 5.223791048358065, | |
| "grad_norm": 0.4911145269870758, | |
| "learning_rate": 0.001, | |
| "loss": 4.1422, | |
| "step": 130600 | |
| }, | |
| { | |
| "epoch": 5.227790888364465, | |
| "grad_norm": 0.40083619952201843, | |
| "learning_rate": 0.001, | |
| "loss": 4.1362, | |
| "step": 130700 | |
| }, | |
| { | |
| "epoch": 5.231790728370865, | |
| "grad_norm": 0.45886579155921936, | |
| "learning_rate": 0.001, | |
| "loss": 4.1344, | |
| "step": 130800 | |
| }, | |
| { | |
| "epoch": 5.235790568377265, | |
| "grad_norm": 0.6365372538566589, | |
| "learning_rate": 0.001, | |
| "loss": 4.1392, | |
| "step": 130900 | |
| }, | |
| { | |
| "epoch": 5.239790408383665, | |
| "grad_norm": 0.38311493396759033, | |
| "learning_rate": 0.001, | |
| "loss": 4.1384, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 5.243790248390065, | |
| "grad_norm": 0.5525908470153809, | |
| "learning_rate": 0.001, | |
| "loss": 4.1404, | |
| "step": 131100 | |
| }, | |
| { | |
| "epoch": 5.2477900883964645, | |
| "grad_norm": 0.41158297657966614, | |
| "learning_rate": 0.001, | |
| "loss": 4.1374, | |
| "step": 131200 | |
| }, | |
| { | |
| "epoch": 5.2517899284028635, | |
| "grad_norm": 0.45629459619522095, | |
| "learning_rate": 0.001, | |
| "loss": 4.1405, | |
| "step": 131300 | |
| }, | |
| { | |
| "epoch": 5.255789768409263, | |
| "grad_norm": 0.4971814751625061, | |
| "learning_rate": 0.001, | |
| "loss": 4.1393, | |
| "step": 131400 | |
| }, | |
| { | |
| "epoch": 5.259789608415663, | |
| "grad_norm": 0.39209917187690735, | |
| "learning_rate": 0.001, | |
| "loss": 4.1418, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 5.263789448422063, | |
| "grad_norm": 0.5428614020347595, | |
| "learning_rate": 0.001, | |
| "loss": 4.135, | |
| "step": 131600 | |
| }, | |
| { | |
| "epoch": 5.267789288428463, | |
| "grad_norm": 0.3522048890590668, | |
| "learning_rate": 0.001, | |
| "loss": 4.1423, | |
| "step": 131700 | |
| }, | |
| { | |
| "epoch": 5.271789128434863, | |
| "grad_norm": 0.40293794870376587, | |
| "learning_rate": 0.001, | |
| "loss": 4.137, | |
| "step": 131800 | |
| }, | |
| { | |
| "epoch": 5.275788968441263, | |
| "grad_norm": 0.45241355895996094, | |
| "learning_rate": 0.001, | |
| "loss": 4.1394, | |
| "step": 131900 | |
| }, | |
| { | |
| "epoch": 5.2797888084476625, | |
| "grad_norm": 0.44267144799232483, | |
| "learning_rate": 0.001, | |
| "loss": 4.1408, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 5.2837886484540615, | |
| "grad_norm": 0.46244797110557556, | |
| "learning_rate": 0.001, | |
| "loss": 4.1341, | |
| "step": 132100 | |
| }, | |
| { | |
| "epoch": 5.287788488460461, | |
| "grad_norm": 0.40171074867248535, | |
| "learning_rate": 0.001, | |
| "loss": 4.1397, | |
| "step": 132200 | |
| }, | |
| { | |
| "epoch": 5.291788328466861, | |
| "grad_norm": 0.4446714222431183, | |
| "learning_rate": 0.001, | |
| "loss": 4.1385, | |
| "step": 132300 | |
| }, | |
| { | |
| "epoch": 5.295788168473261, | |
| "grad_norm": 0.5090124011039734, | |
| "learning_rate": 0.001, | |
| "loss": 4.1379, | |
| "step": 132400 | |
| }, | |
| { | |
| "epoch": 5.299788008479661, | |
| "grad_norm": 0.45481523871421814, | |
| "learning_rate": 0.001, | |
| "loss": 4.1374, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 5.303787848486061, | |
| "grad_norm": 0.5546725988388062, | |
| "learning_rate": 0.001, | |
| "loss": 4.1383, | |
| "step": 132600 | |
| }, | |
| { | |
| "epoch": 5.307787688492461, | |
| "grad_norm": 0.46896442770957947, | |
| "learning_rate": 0.001, | |
| "loss": 4.1392, | |
| "step": 132700 | |
| }, | |
| { | |
| "epoch": 5.31178752849886, | |
| "grad_norm": 0.39645591378211975, | |
| "learning_rate": 0.001, | |
| "loss": 4.1387, | |
| "step": 132800 | |
| }, | |
| { | |
| "epoch": 5.3157873685052595, | |
| "grad_norm": 0.5071548819541931, | |
| "learning_rate": 0.001, | |
| "loss": 4.1376, | |
| "step": 132900 | |
| }, | |
| { | |
| "epoch": 5.319787208511659, | |
| "grad_norm": 0.4763946533203125, | |
| "learning_rate": 0.001, | |
| "loss": 4.1405, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 5.323787048518059, | |
| "grad_norm": 0.4352118968963623, | |
| "learning_rate": 0.001, | |
| "loss": 4.1351, | |
| "step": 133100 | |
| }, | |
| { | |
| "epoch": 5.327786888524459, | |
| "grad_norm": 0.4797314405441284, | |
| "learning_rate": 0.001, | |
| "loss": 4.136, | |
| "step": 133200 | |
| }, | |
| { | |
| "epoch": 5.331786728530859, | |
| "grad_norm": 0.4417945444583893, | |
| "learning_rate": 0.001, | |
| "loss": 4.1386, | |
| "step": 133300 | |
| }, | |
| { | |
| "epoch": 5.335786568537259, | |
| "grad_norm": 0.4554136395454407, | |
| "learning_rate": 0.001, | |
| "loss": 4.137, | |
| "step": 133400 | |
| }, | |
| { | |
| "epoch": 5.339786408543659, | |
| "grad_norm": 0.40435078740119934, | |
| "learning_rate": 0.001, | |
| "loss": 4.1408, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 5.3437862485500585, | |
| "grad_norm": 0.43502071499824524, | |
| "learning_rate": 0.001, | |
| "loss": 4.1412, | |
| "step": 133600 | |
| }, | |
| { | |
| "epoch": 5.347786088556457, | |
| "grad_norm": 0.5281967520713806, | |
| "learning_rate": 0.001, | |
| "loss": 4.1373, | |
| "step": 133700 | |
| }, | |
| { | |
| "epoch": 5.351785928562857, | |
| "grad_norm": 0.5273252129554749, | |
| "learning_rate": 0.001, | |
| "loss": 4.1373, | |
| "step": 133800 | |
| }, | |
| { | |
| "epoch": 5.355785768569257, | |
| "grad_norm": 0.4398253262042999, | |
| "learning_rate": 0.001, | |
| "loss": 4.1369, | |
| "step": 133900 | |
| }, | |
| { | |
| "epoch": 5.359785608575657, | |
| "grad_norm": 0.4958343505859375, | |
| "learning_rate": 0.001, | |
| "loss": 4.1413, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 5.363785448582057, | |
| "grad_norm": 0.4647107720375061, | |
| "learning_rate": 0.001, | |
| "loss": 4.138, | |
| "step": 134100 | |
| }, | |
| { | |
| "epoch": 5.367785288588457, | |
| "grad_norm": 0.4397704005241394, | |
| "learning_rate": 0.001, | |
| "loss": 4.1364, | |
| "step": 134200 | |
| }, | |
| { | |
| "epoch": 5.371785128594857, | |
| "grad_norm": 0.4742043912410736, | |
| "learning_rate": 0.001, | |
| "loss": 4.138, | |
| "step": 134300 | |
| }, | |
| { | |
| "epoch": 5.3757849686012555, | |
| "grad_norm": 0.4011693000793457, | |
| "learning_rate": 0.001, | |
| "loss": 4.1386, | |
| "step": 134400 | |
| }, | |
| { | |
| "epoch": 5.379784808607655, | |
| "grad_norm": 0.3930029273033142, | |
| "learning_rate": 0.001, | |
| "loss": 4.141, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 5.383784648614055, | |
| "grad_norm": 0.39063316583633423, | |
| "learning_rate": 0.001, | |
| "loss": 4.1398, | |
| "step": 134600 | |
| }, | |
| { | |
| "epoch": 5.387784488620455, | |
| "grad_norm": 0.44939857721328735, | |
| "learning_rate": 0.001, | |
| "loss": 4.1366, | |
| "step": 134700 | |
| }, | |
| { | |
| "epoch": 5.391784328626855, | |
| "grad_norm": 0.5439363121986389, | |
| "learning_rate": 0.001, | |
| "loss": 4.1369, | |
| "step": 134800 | |
| }, | |
| { | |
| "epoch": 5.395784168633255, | |
| "grad_norm": 0.42577844858169556, | |
| "learning_rate": 0.001, | |
| "loss": 4.1402, | |
| "step": 134900 | |
| }, | |
| { | |
| "epoch": 5.399784008639655, | |
| "grad_norm": 0.4027504622936249, | |
| "learning_rate": 0.001, | |
| "loss": 4.1366, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 5.4037838486460545, | |
| "grad_norm": 0.6525794863700867, | |
| "learning_rate": 0.001, | |
| "loss": 4.1389, | |
| "step": 135100 | |
| }, | |
| { | |
| "epoch": 5.4077836886524535, | |
| "grad_norm": 0.4911954700946808, | |
| "learning_rate": 0.001, | |
| "loss": 4.1385, | |
| "step": 135200 | |
| }, | |
| { | |
| "epoch": 5.411783528658853, | |
| "grad_norm": 0.4476899802684784, | |
| "learning_rate": 0.001, | |
| "loss": 4.139, | |
| "step": 135300 | |
| }, | |
| { | |
| "epoch": 5.415783368665253, | |
| "grad_norm": 0.4557499885559082, | |
| "learning_rate": 0.001, | |
| "loss": 4.1364, | |
| "step": 135400 | |
| }, | |
| { | |
| "epoch": 5.419783208671653, | |
| "grad_norm": 0.39908871054649353, | |
| "learning_rate": 0.001, | |
| "loss": 4.1368, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 5.423783048678053, | |
| "grad_norm": 0.4525020122528076, | |
| "learning_rate": 0.001, | |
| "loss": 4.1385, | |
| "step": 135600 | |
| }, | |
| { | |
| "epoch": 5.427782888684453, | |
| "grad_norm": 0.45615649223327637, | |
| "learning_rate": 0.001, | |
| "loss": 4.1399, | |
| "step": 135700 | |
| }, | |
| { | |
| "epoch": 5.431782728690853, | |
| "grad_norm": 0.4389837086200714, | |
| "learning_rate": 0.001, | |
| "loss": 4.1374, | |
| "step": 135800 | |
| }, | |
| { | |
| "epoch": 5.4357825686972525, | |
| "grad_norm": 0.5461357831954956, | |
| "learning_rate": 0.001, | |
| "loss": 4.1372, | |
| "step": 135900 | |
| }, | |
| { | |
| "epoch": 5.4397824087036515, | |
| "grad_norm": 0.4126543402671814, | |
| "learning_rate": 0.001, | |
| "loss": 4.1364, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 5.443782248710051, | |
| "grad_norm": 0.39160358905792236, | |
| "learning_rate": 0.001, | |
| "loss": 4.1412, | |
| "step": 136100 | |
| }, | |
| { | |
| "epoch": 5.447782088716451, | |
| "grad_norm": 0.4216913878917694, | |
| "learning_rate": 0.001, | |
| "loss": 4.1367, | |
| "step": 136200 | |
| }, | |
| { | |
| "epoch": 5.451781928722851, | |
| "grad_norm": 0.4482346177101135, | |
| "learning_rate": 0.001, | |
| "loss": 4.1366, | |
| "step": 136300 | |
| }, | |
| { | |
| "epoch": 5.455781768729251, | |
| "grad_norm": 0.5682035684585571, | |
| "learning_rate": 0.001, | |
| "loss": 4.1387, | |
| "step": 136400 | |
| }, | |
| { | |
| "epoch": 5.459781608735651, | |
| "grad_norm": 0.5753220319747925, | |
| "learning_rate": 0.001, | |
| "loss": 4.1376, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 5.463781448742051, | |
| "grad_norm": 0.49236294627189636, | |
| "learning_rate": 0.001, | |
| "loss": 4.1374, | |
| "step": 136600 | |
| }, | |
| { | |
| "epoch": 5.46778128874845, | |
| "grad_norm": 0.4295791685581207, | |
| "learning_rate": 0.001, | |
| "loss": 4.1382, | |
| "step": 136700 | |
| }, | |
| { | |
| "epoch": 5.4717811287548495, | |
| "grad_norm": 0.4442785978317261, | |
| "learning_rate": 0.001, | |
| "loss": 4.1383, | |
| "step": 136800 | |
| }, | |
| { | |
| "epoch": 5.475780968761249, | |
| "grad_norm": 0.5207620859146118, | |
| "learning_rate": 0.001, | |
| "loss": 4.1382, | |
| "step": 136900 | |
| }, | |
| { | |
| "epoch": 5.479780808767649, | |
| "grad_norm": 0.45256859064102173, | |
| "learning_rate": 0.001, | |
| "loss": 4.1385, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 5.483780648774049, | |
| "grad_norm": 0.4170093536376953, | |
| "learning_rate": 0.001, | |
| "loss": 4.14, | |
| "step": 137100 | |
| }, | |
| { | |
| "epoch": 5.487780488780449, | |
| "grad_norm": 0.47792255878448486, | |
| "learning_rate": 0.001, | |
| "loss": 4.1347, | |
| "step": 137200 | |
| }, | |
| { | |
| "epoch": 5.491780328786849, | |
| "grad_norm": 0.4334956705570221, | |
| "learning_rate": 0.001, | |
| "loss": 4.1368, | |
| "step": 137300 | |
| }, | |
| { | |
| "epoch": 5.495780168793249, | |
| "grad_norm": 0.47183749079704285, | |
| "learning_rate": 0.001, | |
| "loss": 4.1389, | |
| "step": 137400 | |
| }, | |
| { | |
| "epoch": 5.4997800087996485, | |
| "grad_norm": 0.492654412984848, | |
| "learning_rate": 0.001, | |
| "loss": 4.135, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 5.503779848806047, | |
| "grad_norm": 0.474648654460907, | |
| "learning_rate": 0.001, | |
| "loss": 4.1391, | |
| "step": 137600 | |
| }, | |
| { | |
| "epoch": 5.507779688812447, | |
| "grad_norm": 0.40896373987197876, | |
| "learning_rate": 0.001, | |
| "loss": 4.1379, | |
| "step": 137700 | |
| }, | |
| { | |
| "epoch": 5.511779528818847, | |
| "grad_norm": 0.45079365372657776, | |
| "learning_rate": 0.001, | |
| "loss": 4.1399, | |
| "step": 137800 | |
| }, | |
| { | |
| "epoch": 5.515779368825247, | |
| "grad_norm": 0.5783036351203918, | |
| "learning_rate": 0.001, | |
| "loss": 4.1359, | |
| "step": 137900 | |
| }, | |
| { | |
| "epoch": 5.519779208831647, | |
| "grad_norm": 0.4422449469566345, | |
| "learning_rate": 0.001, | |
| "loss": 4.1375, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 5.523779048838047, | |
| "grad_norm": 0.5112189054489136, | |
| "learning_rate": 0.001, | |
| "loss": 4.1374, | |
| "step": 138100 | |
| }, | |
| { | |
| "epoch": 5.527778888844447, | |
| "grad_norm": 0.40671586990356445, | |
| "learning_rate": 0.001, | |
| "loss": 4.1391, | |
| "step": 138200 | |
| }, | |
| { | |
| "epoch": 5.5317787288508455, | |
| "grad_norm": 0.5037602186203003, | |
| "learning_rate": 0.001, | |
| "loss": 4.1383, | |
| "step": 138300 | |
| }, | |
| { | |
| "epoch": 5.535778568857245, | |
| "grad_norm": 0.46466997265815735, | |
| "learning_rate": 0.001, | |
| "loss": 4.1379, | |
| "step": 138400 | |
| }, | |
| { | |
| "epoch": 5.539778408863645, | |
| "grad_norm": 0.4853058159351349, | |
| "learning_rate": 0.001, | |
| "loss": 4.135, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 5.543778248870045, | |
| "grad_norm": 0.3657609820365906, | |
| "learning_rate": 0.001, | |
| "loss": 4.1356, | |
| "step": 138600 | |
| }, | |
| { | |
| "epoch": 5.547778088876445, | |
| "grad_norm": 0.49444639682769775, | |
| "learning_rate": 0.001, | |
| "loss": 4.1401, | |
| "step": 138700 | |
| }, | |
| { | |
| "epoch": 5.551777928882845, | |
| "grad_norm": 0.4573862850666046, | |
| "learning_rate": 0.001, | |
| "loss": 4.1392, | |
| "step": 138800 | |
| }, | |
| { | |
| "epoch": 5.555777768889245, | |
| "grad_norm": 0.5398617386817932, | |
| "learning_rate": 0.001, | |
| "loss": 4.1349, | |
| "step": 138900 | |
| }, | |
| { | |
| "epoch": 5.5597776088956445, | |
| "grad_norm": 0.44698962569236755, | |
| "learning_rate": 0.001, | |
| "loss": 4.1378, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 5.563777448902044, | |
| "grad_norm": 0.37685704231262207, | |
| "learning_rate": 0.001, | |
| "loss": 4.1367, | |
| "step": 139100 | |
| }, | |
| { | |
| "epoch": 5.567777288908443, | |
| "grad_norm": 0.40856295824050903, | |
| "learning_rate": 0.001, | |
| "loss": 4.1417, | |
| "step": 139200 | |
| }, | |
| { | |
| "epoch": 5.571777128914843, | |
| "grad_norm": 0.36752602458000183, | |
| "learning_rate": 0.001, | |
| "loss": 4.1339, | |
| "step": 139300 | |
| }, | |
| { | |
| "epoch": 5.575776968921243, | |
| "grad_norm": 0.4708743095397949, | |
| "learning_rate": 0.001, | |
| "loss": 4.137, | |
| "step": 139400 | |
| }, | |
| { | |
| "epoch": 5.579776808927643, | |
| "grad_norm": 0.4223979413509369, | |
| "learning_rate": 0.001, | |
| "loss": 4.135, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 5.583776648934043, | |
| "grad_norm": 0.4208683371543884, | |
| "learning_rate": 0.001, | |
| "loss": 4.1346, | |
| "step": 139600 | |
| }, | |
| { | |
| "epoch": 5.587776488940443, | |
| "grad_norm": 0.47049957513809204, | |
| "learning_rate": 0.001, | |
| "loss": 4.1367, | |
| "step": 139700 | |
| }, | |
| { | |
| "epoch": 5.5917763289468425, | |
| "grad_norm": 0.44872990250587463, | |
| "learning_rate": 0.001, | |
| "loss": 4.139, | |
| "step": 139800 | |
| }, | |
| { | |
| "epoch": 5.5957761689532415, | |
| "grad_norm": 0.43615269660949707, | |
| "learning_rate": 0.001, | |
| "loss": 4.1379, | |
| "step": 139900 | |
| }, | |
| { | |
| "epoch": 5.599776008959641, | |
| "grad_norm": 0.5177183151245117, | |
| "learning_rate": 0.001, | |
| "loss": 4.1402, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 5.603775848966041, | |
| "grad_norm": 0.4234861731529236, | |
| "learning_rate": 0.001, | |
| "loss": 4.1423, | |
| "step": 140100 | |
| }, | |
| { | |
| "epoch": 5.607775688972441, | |
| "grad_norm": 0.39408451318740845, | |
| "learning_rate": 0.001, | |
| "loss": 4.1374, | |
| "step": 140200 | |
| }, | |
| { | |
| "epoch": 5.611775528978841, | |
| "grad_norm": 0.5079990029335022, | |
| "learning_rate": 0.001, | |
| "loss": 4.1363, | |
| "step": 140300 | |
| }, | |
| { | |
| "epoch": 5.615775368985241, | |
| "grad_norm": 0.48556408286094666, | |
| "learning_rate": 0.001, | |
| "loss": 4.138, | |
| "step": 140400 | |
| }, | |
| { | |
| "epoch": 5.619775208991641, | |
| "grad_norm": 0.4212859570980072, | |
| "learning_rate": 0.001, | |
| "loss": 4.1349, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 5.62377504899804, | |
| "grad_norm": 0.3887998163700104, | |
| "learning_rate": 0.001, | |
| "loss": 4.1354, | |
| "step": 140600 | |
| }, | |
| { | |
| "epoch": 5.6277748890044395, | |
| "grad_norm": 0.41680628061294556, | |
| "learning_rate": 0.001, | |
| "loss": 4.1354, | |
| "step": 140700 | |
| }, | |
| { | |
| "epoch": 5.631774729010839, | |
| "grad_norm": 0.4846498370170593, | |
| "learning_rate": 0.001, | |
| "loss": 4.1359, | |
| "step": 140800 | |
| }, | |
| { | |
| "epoch": 5.635774569017239, | |
| "grad_norm": 0.45596760511398315, | |
| "learning_rate": 0.001, | |
| "loss": 4.1375, | |
| "step": 140900 | |
| }, | |
| { | |
| "epoch": 5.639774409023639, | |
| "grad_norm": 0.484160840511322, | |
| "learning_rate": 0.001, | |
| "loss": 4.1346, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 5.643774249030039, | |
| "grad_norm": 0.4429890811443329, | |
| "learning_rate": 0.001, | |
| "loss": 4.1369, | |
| "step": 141100 | |
| }, | |
| { | |
| "epoch": 5.647774089036439, | |
| "grad_norm": 0.436334490776062, | |
| "learning_rate": 0.001, | |
| "loss": 4.1365, | |
| "step": 141200 | |
| }, | |
| { | |
| "epoch": 5.651773929042839, | |
| "grad_norm": 0.5436973571777344, | |
| "learning_rate": 0.001, | |
| "loss": 4.1372, | |
| "step": 141300 | |
| }, | |
| { | |
| "epoch": 5.6557737690492385, | |
| "grad_norm": 0.46049728989601135, | |
| "learning_rate": 0.001, | |
| "loss": 4.1339, | |
| "step": 141400 | |
| }, | |
| { | |
| "epoch": 5.659773609055637, | |
| "grad_norm": 0.45588213205337524, | |
| "learning_rate": 0.001, | |
| "loss": 4.1348, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 5.663773449062037, | |
| "grad_norm": 0.4084899425506592, | |
| "learning_rate": 0.001, | |
| "loss": 4.137, | |
| "step": 141600 | |
| }, | |
| { | |
| "epoch": 5.667773289068437, | |
| "grad_norm": 0.5410123467445374, | |
| "learning_rate": 0.001, | |
| "loss": 4.1376, | |
| "step": 141700 | |
| }, | |
| { | |
| "epoch": 5.671773129074837, | |
| "grad_norm": 0.4420919120311737, | |
| "learning_rate": 0.001, | |
| "loss": 4.1376, | |
| "step": 141800 | |
| }, | |
| { | |
| "epoch": 5.675772969081237, | |
| "grad_norm": 0.48726823925971985, | |
| "learning_rate": 0.001, | |
| "loss": 4.1366, | |
| "step": 141900 | |
| }, | |
| { | |
| "epoch": 5.679772809087637, | |
| "grad_norm": 0.425656259059906, | |
| "learning_rate": 0.001, | |
| "loss": 4.1385, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 5.683772649094037, | |
| "grad_norm": 0.481614887714386, | |
| "learning_rate": 0.001, | |
| "loss": 4.1342, | |
| "step": 142100 | |
| }, | |
| { | |
| "epoch": 5.6877724891004355, | |
| "grad_norm": 0.41768065094947815, | |
| "learning_rate": 0.001, | |
| "loss": 4.1374, | |
| "step": 142200 | |
| }, | |
| { | |
| "epoch": 5.691772329106835, | |
| "grad_norm": 0.42194467782974243, | |
| "learning_rate": 0.001, | |
| "loss": 4.1339, | |
| "step": 142300 | |
| }, | |
| { | |
| "epoch": 5.695772169113235, | |
| "grad_norm": 0.49403807520866394, | |
| "learning_rate": 0.001, | |
| "loss": 4.1373, | |
| "step": 142400 | |
| }, | |
| { | |
| "epoch": 5.699772009119635, | |
| "grad_norm": 0.496571809053421, | |
| "learning_rate": 0.001, | |
| "loss": 4.1376, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 5.703771849126035, | |
| "grad_norm": 0.43960630893707275, | |
| "learning_rate": 0.001, | |
| "loss": 4.132, | |
| "step": 142600 | |
| }, | |
| { | |
| "epoch": 5.707771689132435, | |
| "grad_norm": 0.43595102429389954, | |
| "learning_rate": 0.001, | |
| "loss": 4.1372, | |
| "step": 142700 | |
| }, | |
| { | |
| "epoch": 5.711771529138835, | |
| "grad_norm": 0.421332448720932, | |
| "learning_rate": 0.001, | |
| "loss": 4.139, | |
| "step": 142800 | |
| }, | |
| { | |
| "epoch": 5.7157713691452345, | |
| "grad_norm": 0.4697113037109375, | |
| "learning_rate": 0.001, | |
| "loss": 4.1363, | |
| "step": 142900 | |
| }, | |
| { | |
| "epoch": 5.719771209151634, | |
| "grad_norm": 0.4212019443511963, | |
| "learning_rate": 0.001, | |
| "loss": 4.1385, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 5.723771049158033, | |
| "grad_norm": 0.5039213299751282, | |
| "learning_rate": 0.001, | |
| "loss": 4.1376, | |
| "step": 143100 | |
| }, | |
| { | |
| "epoch": 5.727770889164433, | |
| "grad_norm": 0.4202103614807129, | |
| "learning_rate": 0.001, | |
| "loss": 4.139, | |
| "step": 143200 | |
| }, | |
| { | |
| "epoch": 5.731770729170833, | |
| "grad_norm": 0.48808401823043823, | |
| "learning_rate": 0.001, | |
| "loss": 4.1343, | |
| "step": 143300 | |
| }, | |
| { | |
| "epoch": 5.735770569177233, | |
| "grad_norm": 0.484749972820282, | |
| "learning_rate": 0.001, | |
| "loss": 4.1377, | |
| "step": 143400 | |
| }, | |
| { | |
| "epoch": 5.739770409183633, | |
| "grad_norm": 0.37245190143585205, | |
| "learning_rate": 0.001, | |
| "loss": 4.1386, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 5.743770249190033, | |
| "grad_norm": 0.49025431275367737, | |
| "learning_rate": 0.001, | |
| "loss": 4.1392, | |
| "step": 143600 | |
| }, | |
| { | |
| "epoch": 5.7477700891964325, | |
| "grad_norm": 0.4268828332424164, | |
| "learning_rate": 0.001, | |
| "loss": 4.1373, | |
| "step": 143700 | |
| }, | |
| { | |
| "epoch": 5.7517699292028315, | |
| "grad_norm": 0.4585922360420227, | |
| "learning_rate": 0.001, | |
| "loss": 4.137, | |
| "step": 143800 | |
| }, | |
| { | |
| "epoch": 5.755769769209231, | |
| "grad_norm": 0.47988182306289673, | |
| "learning_rate": 0.001, | |
| "loss": 4.1351, | |
| "step": 143900 | |
| }, | |
| { | |
| "epoch": 5.759769609215631, | |
| "grad_norm": 0.45885664224624634, | |
| "learning_rate": 0.001, | |
| "loss": 4.1384, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 5.763769449222031, | |
| "grad_norm": 0.474288672208786, | |
| "learning_rate": 0.001, | |
| "loss": 4.139, | |
| "step": 144100 | |
| }, | |
| { | |
| "epoch": 5.767769289228431, | |
| "grad_norm": 0.4917161464691162, | |
| "learning_rate": 0.001, | |
| "loss": 4.1369, | |
| "step": 144200 | |
| }, | |
| { | |
| "epoch": 5.771769129234831, | |
| "grad_norm": 0.46606698632240295, | |
| "learning_rate": 0.001, | |
| "loss": 4.1322, | |
| "step": 144300 | |
| }, | |
| { | |
| "epoch": 5.775768969241231, | |
| "grad_norm": 0.49236711859703064, | |
| "learning_rate": 0.001, | |
| "loss": 4.1371, | |
| "step": 144400 | |
| }, | |
| { | |
| "epoch": 5.7797688092476305, | |
| "grad_norm": 0.48581868410110474, | |
| "learning_rate": 0.001, | |
| "loss": 4.1363, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 5.78376864925403, | |
| "grad_norm": 0.44188404083251953, | |
| "learning_rate": 0.001, | |
| "loss": 4.1348, | |
| "step": 144600 | |
| }, | |
| { | |
| "epoch": 5.787768489260429, | |
| "grad_norm": 0.5125553011894226, | |
| "learning_rate": 0.001, | |
| "loss": 4.1374, | |
| "step": 144700 | |
| }, | |
| { | |
| "epoch": 5.791768329266829, | |
| "grad_norm": 0.3982478380203247, | |
| "learning_rate": 0.001, | |
| "loss": 4.1374, | |
| "step": 144800 | |
| }, | |
| { | |
| "epoch": 5.795768169273229, | |
| "grad_norm": 0.4386448860168457, | |
| "learning_rate": 0.001, | |
| "loss": 4.1338, | |
| "step": 144900 | |
| }, | |
| { | |
| "epoch": 5.799768009279629, | |
| "grad_norm": 0.4385557174682617, | |
| "learning_rate": 0.001, | |
| "loss": 4.1355, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 5.803767849286029, | |
| "grad_norm": 0.4551478624343872, | |
| "learning_rate": 0.001, | |
| "loss": 4.134, | |
| "step": 145100 | |
| }, | |
| { | |
| "epoch": 5.807767689292429, | |
| "grad_norm": 0.4078340530395508, | |
| "learning_rate": 0.001, | |
| "loss": 4.1342, | |
| "step": 145200 | |
| }, | |
| { | |
| "epoch": 5.8117675292988284, | |
| "grad_norm": 0.4332394003868103, | |
| "learning_rate": 0.001, | |
| "loss": 4.1354, | |
| "step": 145300 | |
| }, | |
| { | |
| "epoch": 5.815767369305227, | |
| "grad_norm": 0.3897719979286194, | |
| "learning_rate": 0.001, | |
| "loss": 4.1364, | |
| "step": 145400 | |
| }, | |
| { | |
| "epoch": 5.819767209311627, | |
| "grad_norm": 0.45064935088157654, | |
| "learning_rate": 0.001, | |
| "loss": 4.1339, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 5.823767049318027, | |
| "grad_norm": 0.4647873044013977, | |
| "learning_rate": 0.001, | |
| "loss": 4.1335, | |
| "step": 145600 | |
| }, | |
| { | |
| "epoch": 5.827766889324427, | |
| "grad_norm": 0.4528816342353821, | |
| "learning_rate": 0.001, | |
| "loss": 4.1351, | |
| "step": 145700 | |
| }, | |
| { | |
| "epoch": 5.831766729330827, | |
| "grad_norm": 0.38677456974983215, | |
| "learning_rate": 0.001, | |
| "loss": 4.1388, | |
| "step": 145800 | |
| }, | |
| { | |
| "epoch": 5.835766569337227, | |
| "grad_norm": 0.4616670608520508, | |
| "learning_rate": 0.001, | |
| "loss": 4.1353, | |
| "step": 145900 | |
| }, | |
| { | |
| "epoch": 5.839766409343627, | |
| "grad_norm": 0.4020819067955017, | |
| "learning_rate": 0.001, | |
| "loss": 4.1376, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 5.8437662493500255, | |
| "grad_norm": 0.4845848083496094, | |
| "learning_rate": 0.001, | |
| "loss": 4.1409, | |
| "step": 146100 | |
| }, | |
| { | |
| "epoch": 5.847766089356425, | |
| "grad_norm": 0.40645313262939453, | |
| "learning_rate": 0.001, | |
| "loss": 4.1372, | |
| "step": 146200 | |
| }, | |
| { | |
| "epoch": 5.851765929362825, | |
| "grad_norm": 0.37546342611312866, | |
| "learning_rate": 0.001, | |
| "loss": 4.1342, | |
| "step": 146300 | |
| }, | |
| { | |
| "epoch": 5.855765769369225, | |
| "grad_norm": 0.406170517206192, | |
| "learning_rate": 0.001, | |
| "loss": 4.1333, | |
| "step": 146400 | |
| }, | |
| { | |
| "epoch": 5.859765609375625, | |
| "grad_norm": 0.5377382636070251, | |
| "learning_rate": 0.001, | |
| "loss": 4.14, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 5.863765449382025, | |
| "grad_norm": 0.45157358050346375, | |
| "learning_rate": 0.001, | |
| "loss": 4.1341, | |
| "step": 146600 | |
| }, | |
| { | |
| "epoch": 5.867765289388425, | |
| "grad_norm": 0.6039636731147766, | |
| "learning_rate": 0.001, | |
| "loss": 4.1389, | |
| "step": 146700 | |
| }, | |
| { | |
| "epoch": 5.8717651293948245, | |
| "grad_norm": 0.4240739643573761, | |
| "learning_rate": 0.001, | |
| "loss": 4.1356, | |
| "step": 146800 | |
| }, | |
| { | |
| "epoch": 5.875764969401224, | |
| "grad_norm": 0.42058026790618896, | |
| "learning_rate": 0.001, | |
| "loss": 4.1364, | |
| "step": 146900 | |
| }, | |
| { | |
| "epoch": 5.879764809407623, | |
| "grad_norm": 0.454563170671463, | |
| "learning_rate": 0.001, | |
| "loss": 4.138, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 5.883764649414023, | |
| "grad_norm": 0.5056443214416504, | |
| "learning_rate": 0.001, | |
| "loss": 4.1388, | |
| "step": 147100 | |
| }, | |
| { | |
| "epoch": 5.887764489420423, | |
| "grad_norm": 0.44132763147354126, | |
| "learning_rate": 0.001, | |
| "loss": 4.1325, | |
| "step": 147200 | |
| }, | |
| { | |
| "epoch": 5.891764329426823, | |
| "grad_norm": 0.4522813856601715, | |
| "learning_rate": 0.001, | |
| "loss": 4.137, | |
| "step": 147300 | |
| }, | |
| { | |
| "epoch": 5.895764169433223, | |
| "grad_norm": 0.36617109179496765, | |
| "learning_rate": 0.001, | |
| "loss": 4.1326, | |
| "step": 147400 | |
| }, | |
| { | |
| "epoch": 5.899764009439623, | |
| "grad_norm": 0.4096498191356659, | |
| "learning_rate": 0.001, | |
| "loss": 4.1364, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 5.9037638494460225, | |
| "grad_norm": 0.3995516300201416, | |
| "learning_rate": 0.001, | |
| "loss": 4.1374, | |
| "step": 147600 | |
| }, | |
| { | |
| "epoch": 5.9077636894524215, | |
| "grad_norm": 0.5837684869766235, | |
| "learning_rate": 0.001, | |
| "loss": 4.1337, | |
| "step": 147700 | |
| }, | |
| { | |
| "epoch": 5.911763529458821, | |
| "grad_norm": 0.4246392548084259, | |
| "learning_rate": 0.001, | |
| "loss": 4.1349, | |
| "step": 147800 | |
| }, | |
| { | |
| "epoch": 5.915763369465221, | |
| "grad_norm": 0.480863094329834, | |
| "learning_rate": 0.001, | |
| "loss": 4.1365, | |
| "step": 147900 | |
| }, | |
| { | |
| "epoch": 5.919763209471621, | |
| "grad_norm": 0.3852327764034271, | |
| "learning_rate": 0.001, | |
| "loss": 4.1358, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 5.923763049478021, | |
| "grad_norm": 0.4895519018173218, | |
| "learning_rate": 0.001, | |
| "loss": 4.1393, | |
| "step": 148100 | |
| }, | |
| { | |
| "epoch": 5.927762889484421, | |
| "grad_norm": 0.517063319683075, | |
| "learning_rate": 0.001, | |
| "loss": 4.137, | |
| "step": 148200 | |
| }, | |
| { | |
| "epoch": 5.931762729490821, | |
| "grad_norm": 0.47970083355903625, | |
| "learning_rate": 0.001, | |
| "loss": 4.1403, | |
| "step": 148300 | |
| }, | |
| { | |
| "epoch": 5.9357625694972205, | |
| "grad_norm": 0.4487200081348419, | |
| "learning_rate": 0.001, | |
| "loss": 4.135, | |
| "step": 148400 | |
| }, | |
| { | |
| "epoch": 5.93976240950362, | |
| "grad_norm": 0.46553564071655273, | |
| "learning_rate": 0.001, | |
| "loss": 4.1392, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 5.943762249510019, | |
| "grad_norm": 0.39696386456489563, | |
| "learning_rate": 0.001, | |
| "loss": 4.1334, | |
| "step": 148600 | |
| }, | |
| { | |
| "epoch": 5.947762089516419, | |
| "grad_norm": 0.3962916433811188, | |
| "learning_rate": 0.001, | |
| "loss": 4.1388, | |
| "step": 148700 | |
| }, | |
| { | |
| "epoch": 5.951761929522819, | |
| "grad_norm": 0.5088990926742554, | |
| "learning_rate": 0.001, | |
| "loss": 4.1363, | |
| "step": 148800 | |
| }, | |
| { | |
| "epoch": 5.955761769529219, | |
| "grad_norm": 0.5045955777168274, | |
| "learning_rate": 0.001, | |
| "loss": 4.1367, | |
| "step": 148900 | |
| }, | |
| { | |
| "epoch": 5.959761609535619, | |
| "grad_norm": 0.4137150049209595, | |
| "learning_rate": 0.001, | |
| "loss": 4.1347, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 5.963761449542019, | |
| "grad_norm": 0.4232303202152252, | |
| "learning_rate": 0.001, | |
| "loss": 4.1397, | |
| "step": 149100 | |
| }, | |
| { | |
| "epoch": 5.9677612895484184, | |
| "grad_norm": 0.4458197057247162, | |
| "learning_rate": 0.001, | |
| "loss": 4.1358, | |
| "step": 149200 | |
| }, | |
| { | |
| "epoch": 5.971761129554817, | |
| "grad_norm": 0.4045810103416443, | |
| "learning_rate": 0.001, | |
| "loss": 4.1314, | |
| "step": 149300 | |
| }, | |
| { | |
| "epoch": 5.975760969561217, | |
| "grad_norm": 0.45485568046569824, | |
| "learning_rate": 0.001, | |
| "loss": 4.1348, | |
| "step": 149400 | |
| }, | |
| { | |
| "epoch": 5.979760809567617, | |
| "grad_norm": 0.4166460335254669, | |
| "learning_rate": 0.001, | |
| "loss": 4.1394, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 5.983760649574017, | |
| "grad_norm": 0.40538185834884644, | |
| "learning_rate": 0.001, | |
| "loss": 4.136, | |
| "step": 149600 | |
| }, | |
| { | |
| "epoch": 5.987760489580417, | |
| "grad_norm": 0.4489404857158661, | |
| "learning_rate": 0.001, | |
| "loss": 4.1382, | |
| "step": 149700 | |
| }, | |
| { | |
| "epoch": 5.991760329586817, | |
| "grad_norm": 0.47682425379753113, | |
| "learning_rate": 0.001, | |
| "loss": 4.134, | |
| "step": 149800 | |
| }, | |
| { | |
| "epoch": 5.995760169593217, | |
| "grad_norm": 0.5068487524986267, | |
| "learning_rate": 0.001, | |
| "loss": 4.1346, | |
| "step": 149900 | |
| }, | |
| { | |
| "epoch": 5.9997600095996155, | |
| "grad_norm": 0.4409950077533722, | |
| "learning_rate": 0.001, | |
| "loss": 4.1359, | |
| "step": 150000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 150000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 6, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.59971946496e+18, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |