Invalid JSON: Unexpected token 'N', ..."al_loss": NaN,
"... is not valid JSON
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.07228886138917137, | |
| "eval_steps": 369, | |
| "global_step": 1475, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 4.900939755198059e-05, | |
| "grad_norm": 0.5761741399765015, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0427, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 4.900939755198059e-05, | |
| "eval_loss": NaN, | |
| "eval_runtime": 184.6812, | |
| "eval_samples_per_second": 46.523, | |
| "eval_steps_per_second": 23.262, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 9.801879510396118e-05, | |
| "grad_norm": 0.9498972296714783, | |
| "learning_rate": 4e-05, | |
| "loss": 1.2518, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.00014702819265594177, | |
| "grad_norm": 0.7824539542198181, | |
| "learning_rate": 6e-05, | |
| "loss": 1.1888, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.00019603759020792236, | |
| "grad_norm": 0.6423453688621521, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0259, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.00024504698775990296, | |
| "grad_norm": 0.637169361114502, | |
| "learning_rate": 0.0001, | |
| "loss": 1.0737, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.00029405638531188353, | |
| "grad_norm": 0.8547672033309937, | |
| "learning_rate": 0.00012, | |
| "loss": 1.2056, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.00034306578286386416, | |
| "grad_norm": 0.8592035174369812, | |
| "learning_rate": 0.00014, | |
| "loss": 1.2962, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.00039207518041584473, | |
| "grad_norm": 0.8024699091911316, | |
| "learning_rate": 0.00016, | |
| "loss": 1.2049, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.00044108457796782535, | |
| "grad_norm": 0.7319013476371765, | |
| "learning_rate": 0.00018, | |
| "loss": 1.0241, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0004900939755198059, | |
| "grad_norm": 0.7202540040016174, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2143, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0005391033730717865, | |
| "grad_norm": 0.7399929165840149, | |
| "learning_rate": 0.00019999977007069113, | |
| "loss": 1.0112, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0005881127706237671, | |
| "grad_norm": 0.6700228452682495, | |
| "learning_rate": 0.00019999908028382185, | |
| "loss": 1.1158, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0006371221681757477, | |
| "grad_norm": 0.7994217872619629, | |
| "learning_rate": 0.0001999979306425642, | |
| "loss": 1.1398, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.0006861315657277283, | |
| "grad_norm": 0.6176120042800903, | |
| "learning_rate": 0.00019999632115220493, | |
| "loss": 1.0548, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0007351409632797088, | |
| "grad_norm": 0.7442915439605713, | |
| "learning_rate": 0.0001999942518201454, | |
| "loss": 1.0992, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0007841503608316895, | |
| "grad_norm": 0.7436463832855225, | |
| "learning_rate": 0.0001999917226559016, | |
| "loss": 1.2715, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0008331597583836701, | |
| "grad_norm": 0.654746949672699, | |
| "learning_rate": 0.00019998873367110416, | |
| "loss": 1.1128, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0008821691559356507, | |
| "grad_norm": 0.6541568040847778, | |
| "learning_rate": 0.00019998528487949813, | |
| "loss": 1.0187, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0009311785534876312, | |
| "grad_norm": 0.5537770986557007, | |
| "learning_rate": 0.00019998137629694308, | |
| "loss": 1.0205, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.0009801879510396118, | |
| "grad_norm": 0.7401204109191895, | |
| "learning_rate": 0.000199977007941413, | |
| "loss": 1.1563, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0010291973485915924, | |
| "grad_norm": 0.6118784546852112, | |
| "learning_rate": 0.0001999721798329961, | |
| "loss": 1.0828, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.001078206746143573, | |
| "grad_norm": 0.8245735764503479, | |
| "learning_rate": 0.0001999668919938949, | |
| "loss": 1.2984, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.0011272161436955536, | |
| "grad_norm": 0.5586501955986023, | |
| "learning_rate": 0.00019996114444842595, | |
| "loss": 0.9791, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.0011762255412475341, | |
| "grad_norm": 0.6310902833938599, | |
| "learning_rate": 0.00019995493722301989, | |
| "loss": 1.0142, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0012252349387995149, | |
| "grad_norm": 0.5360428094863892, | |
| "learning_rate": 0.0001999482703462211, | |
| "loss": 1.1159, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0012742443363514954, | |
| "grad_norm": 0.616188108921051, | |
| "learning_rate": 0.00019994114384868782, | |
| "loss": 0.9876, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.001323253733903476, | |
| "grad_norm": 0.7839058041572571, | |
| "learning_rate": 0.00019993355776319193, | |
| "loss": 1.0909, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.0013722631314554566, | |
| "grad_norm": 0.5597085356712341, | |
| "learning_rate": 0.00019992551212461856, | |
| "loss": 1.0243, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.0014212725290074371, | |
| "grad_norm": 0.5495719313621521, | |
| "learning_rate": 0.00019991700696996638, | |
| "loss": 1.1049, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.0014702819265594177, | |
| "grad_norm": 0.5567327737808228, | |
| "learning_rate": 0.00019990804233834705, | |
| "loss": 1.0723, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0015192913241113984, | |
| "grad_norm": 0.7199801206588745, | |
| "learning_rate": 0.00019989861827098522, | |
| "loss": 1.1713, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.001568300721663379, | |
| "grad_norm": 0.5128418207168579, | |
| "learning_rate": 0.0001998887348112182, | |
| "loss": 1.1, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0016173101192153596, | |
| "grad_norm": 0.4268779456615448, | |
| "learning_rate": 0.00019987839200449602, | |
| "loss": 0.954, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.0016663195167673402, | |
| "grad_norm": 0.4875427186489105, | |
| "learning_rate": 0.00019986758989838093, | |
| "loss": 0.8048, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.0017153289143193207, | |
| "grad_norm": 0.5128729343414307, | |
| "learning_rate": 0.00019985632854254735, | |
| "loss": 1.0232, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.0017643383118713014, | |
| "grad_norm": 0.5712106823921204, | |
| "learning_rate": 0.0001998446079887816, | |
| "loss": 1.0229, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.001813347709423282, | |
| "grad_norm": 0.6023369431495667, | |
| "learning_rate": 0.00019983242829098164, | |
| "loss": 1.143, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.0018623571069752624, | |
| "grad_norm": 0.5086866617202759, | |
| "learning_rate": 0.00019981978950515687, | |
| "loss": 0.9654, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.0019113665045272432, | |
| "grad_norm": 0.49370044469833374, | |
| "learning_rate": 0.00019980669168942784, | |
| "loss": 1.1224, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.0019603759020792237, | |
| "grad_norm": 0.5668991804122925, | |
| "learning_rate": 0.00019979313490402597, | |
| "loss": 0.935, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.002009385299631204, | |
| "grad_norm": 0.44917380809783936, | |
| "learning_rate": 0.00019977911921129332, | |
| "loss": 1.0326, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.0020583946971831847, | |
| "grad_norm": 0.4713689386844635, | |
| "learning_rate": 0.00019976464467568226, | |
| "loss": 0.8148, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.0021074040947351657, | |
| "grad_norm": 0.8936280012130737, | |
| "learning_rate": 0.0001997497113637552, | |
| "loss": 1.0798, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.002156413492287146, | |
| "grad_norm": 0.5882841348648071, | |
| "learning_rate": 0.0001997343193441842, | |
| "loss": 1.0313, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.0022054228898391267, | |
| "grad_norm": 0.5324700474739075, | |
| "learning_rate": 0.0001997184686877509, | |
| "loss": 1.0233, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.0022544322873911072, | |
| "grad_norm": 0.43486493825912476, | |
| "learning_rate": 0.00019970215946734583, | |
| "loss": 1.0024, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.0023034416849430877, | |
| "grad_norm": 0.6185562014579773, | |
| "learning_rate": 0.00019968539175796833, | |
| "loss": 1.0328, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.0023524510824950683, | |
| "grad_norm": 0.5140308141708374, | |
| "learning_rate": 0.00019966816563672622, | |
| "loss": 1.0239, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.002401460480047049, | |
| "grad_norm": 0.5439414978027344, | |
| "learning_rate": 0.00019965048118283525, | |
| "loss": 0.9122, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.0024504698775990297, | |
| "grad_norm": 0.6618689298629761, | |
| "learning_rate": 0.00019963233847761894, | |
| "loss": 1.0345, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0024994792751510102, | |
| "grad_norm": 0.5437299609184265, | |
| "learning_rate": 0.00019961373760450806, | |
| "loss": 1.0961, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.0025484886727029908, | |
| "grad_norm": 0.49915456771850586, | |
| "learning_rate": 0.00019959467864904035, | |
| "loss": 1.1166, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.0025974980702549713, | |
| "grad_norm": 0.6561310887336731, | |
| "learning_rate": 0.00019957516169886007, | |
| "loss": 1.0409, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.002646507467806952, | |
| "grad_norm": 0.4919995665550232, | |
| "learning_rate": 0.00019955518684371752, | |
| "loss": 1.0823, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.0026955168653589327, | |
| "grad_norm": 0.5455455780029297, | |
| "learning_rate": 0.0001995347541754689, | |
| "loss": 0.9638, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.0027445262629109133, | |
| "grad_norm": 0.5481426119804382, | |
| "learning_rate": 0.0001995138637880755, | |
| "loss": 0.9245, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.0027935356604628938, | |
| "grad_norm": 0.8154264092445374, | |
| "learning_rate": 0.0001994925157776036, | |
| "loss": 1.0783, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.0028425450580148743, | |
| "grad_norm": 0.8507540225982666, | |
| "learning_rate": 0.00019947071024222393, | |
| "loss": 0.9816, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.002891554455566855, | |
| "grad_norm": 0.5288252830505371, | |
| "learning_rate": 0.00019944844728221104, | |
| "loss": 0.9762, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.0029405638531188353, | |
| "grad_norm": 0.5518356561660767, | |
| "learning_rate": 0.0001994257269999431, | |
| "loss": 1.0246, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0029895732506708163, | |
| "grad_norm": 0.562601625919342, | |
| "learning_rate": 0.0001994025494999013, | |
| "loss": 1.0615, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.003038582648222797, | |
| "grad_norm": 0.6437848806381226, | |
| "learning_rate": 0.00019937891488866935, | |
| "loss": 1.2619, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.0030875920457747773, | |
| "grad_norm": 0.4969969391822815, | |
| "learning_rate": 0.00019935482327493306, | |
| "loss": 1.0224, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.003136601443326758, | |
| "grad_norm": 0.4657133221626282, | |
| "learning_rate": 0.00019933027476947977, | |
| "loss": 0.9961, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.0031856108408787383, | |
| "grad_norm": 0.4991621673107147, | |
| "learning_rate": 0.00019930526948519793, | |
| "loss": 1.0202, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.0032346202384307193, | |
| "grad_norm": 0.5264037251472473, | |
| "learning_rate": 0.00019927980753707645, | |
| "loss": 0.9748, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.0032836296359827, | |
| "grad_norm": 0.6157549619674683, | |
| "learning_rate": 0.00019925388904220434, | |
| "loss": 0.861, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.0033326390335346803, | |
| "grad_norm": 0.5480204224586487, | |
| "learning_rate": 0.00019922751411977, | |
| "loss": 0.9667, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.003381648431086661, | |
| "grad_norm": 0.4597250521183014, | |
| "learning_rate": 0.00019920068289106083, | |
| "loss": 1.0202, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.0034306578286386414, | |
| "grad_norm": 0.6237716674804688, | |
| "learning_rate": 0.00019917339547946246, | |
| "loss": 1.1458, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.003479667226190622, | |
| "grad_norm": 0.4779433608055115, | |
| "learning_rate": 0.00019914565201045853, | |
| "loss": 0.967, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.003528676623742603, | |
| "grad_norm": 0.5889516472816467, | |
| "learning_rate": 0.00019911745261162964, | |
| "loss": 1.0123, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.0035776860212945833, | |
| "grad_norm": 0.427267849445343, | |
| "learning_rate": 0.00019908879741265323, | |
| "loss": 0.9696, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.003626695418846564, | |
| "grad_norm": 0.5856832265853882, | |
| "learning_rate": 0.00019905968654530272, | |
| "loss": 1.1239, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.0036757048163985444, | |
| "grad_norm": 0.5578678846359253, | |
| "learning_rate": 0.00019903012014344686, | |
| "loss": 0.9191, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.003724714213950525, | |
| "grad_norm": 0.4885029196739197, | |
| "learning_rate": 0.00019900009834304937, | |
| "loss": 0.9503, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.0037737236115025054, | |
| "grad_norm": 0.7779492735862732, | |
| "learning_rate": 0.00019896962128216809, | |
| "loss": 1.2377, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.0038227330090544864, | |
| "grad_norm": 0.4959696829319, | |
| "learning_rate": 0.00019893868910095437, | |
| "loss": 0.9475, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.003871742406606467, | |
| "grad_norm": 0.5338016152381897, | |
| "learning_rate": 0.0001989073019416525, | |
| "loss": 1.0817, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.003920751804158447, | |
| "grad_norm": 0.6674442887306213, | |
| "learning_rate": 0.0001988754599485991, | |
| "loss": 0.9813, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.003969761201710428, | |
| "grad_norm": 0.6323183178901672, | |
| "learning_rate": 0.00019884316326822225, | |
| "loss": 1.1776, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.004018770599262408, | |
| "grad_norm": 0.542637050151825, | |
| "learning_rate": 0.00019881041204904105, | |
| "loss": 1.0185, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.004067779996814389, | |
| "grad_norm": 0.5368381142616272, | |
| "learning_rate": 0.0001987772064416648, | |
| "loss": 1.0672, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.0041167893943663695, | |
| "grad_norm": 0.5859797596931458, | |
| "learning_rate": 0.00019874354659879238, | |
| "loss": 0.9016, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.00416579879191835, | |
| "grad_norm": 0.7060731053352356, | |
| "learning_rate": 0.00019870943267521145, | |
| "loss": 1.0842, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.004214808189470331, | |
| "grad_norm": 0.5956705212593079, | |
| "learning_rate": 0.0001986748648277978, | |
| "loss": 1.1822, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.0042638175870223114, | |
| "grad_norm": 0.7368344068527222, | |
| "learning_rate": 0.0001986398432155147, | |
| "loss": 1.0949, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.004312826984574292, | |
| "grad_norm": 0.6101769804954529, | |
| "learning_rate": 0.00019860436799941201, | |
| "loss": 0.9986, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.0043618363821262725, | |
| "grad_norm": 0.6834807991981506, | |
| "learning_rate": 0.00019856843934262563, | |
| "loss": 1.0497, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.004410845779678253, | |
| "grad_norm": 0.6897183060646057, | |
| "learning_rate": 0.00019853205741037652, | |
| "loss": 0.7885, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0044598551772302335, | |
| "grad_norm": 0.5369214415550232, | |
| "learning_rate": 0.00019849522236997018, | |
| "loss": 1.0458, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.0045088645747822145, | |
| "grad_norm": 0.6021403670310974, | |
| "learning_rate": 0.00019845793439079567, | |
| "loss": 0.9317, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.004557873972334195, | |
| "grad_norm": 0.7959415316581726, | |
| "learning_rate": 0.00019842019364432504, | |
| "loss": 1.0009, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.0046068833698861755, | |
| "grad_norm": 0.4632684290409088, | |
| "learning_rate": 0.00019838200030411227, | |
| "loss": 1.0488, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.0046558927674381564, | |
| "grad_norm": 0.6845225095748901, | |
| "learning_rate": 0.0001983433545457928, | |
| "loss": 1.0982, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.0047049021649901365, | |
| "grad_norm": 0.5301809906959534, | |
| "learning_rate": 0.00019830425654708246, | |
| "loss": 1.0651, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.0047539115625421175, | |
| "grad_norm": 0.6478580832481384, | |
| "learning_rate": 0.0001982647064877768, | |
| "loss": 1.0767, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.004802920960094098, | |
| "grad_norm": 0.6522680521011353, | |
| "learning_rate": 0.00019822470454975013, | |
| "loss": 0.9553, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.0048519303576460785, | |
| "grad_norm": 0.7917822599411011, | |
| "learning_rate": 0.00019818425091695481, | |
| "loss": 1.0848, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.0049009397551980595, | |
| "grad_norm": 0.6298443078994751, | |
| "learning_rate": 0.00019814334577542038, | |
| "loss": 1.1411, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0049499491527500395, | |
| "grad_norm": 0.5504825115203857, | |
| "learning_rate": 0.00019810198931325266, | |
| "loss": 1.1046, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.0049989585503020205, | |
| "grad_norm": 0.69349604845047, | |
| "learning_rate": 0.00019806018172063288, | |
| "loss": 1.0621, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.005047967947854001, | |
| "grad_norm": 0.6714924573898315, | |
| "learning_rate": 0.00019801792318981687, | |
| "loss": 1.1085, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.0050969773454059815, | |
| "grad_norm": 0.5743169784545898, | |
| "learning_rate": 0.00019797521391513412, | |
| "loss": 0.9405, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.0051459867429579625, | |
| "grad_norm": 0.5487734079360962, | |
| "learning_rate": 0.00019793205409298693, | |
| "loss": 1.0558, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.0051949961405099426, | |
| "grad_norm": 0.9174990057945251, | |
| "learning_rate": 0.00019788844392184943, | |
| "loss": 1.2962, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.0052440055380619235, | |
| "grad_norm": 0.6920627355575562, | |
| "learning_rate": 0.0001978443836022668, | |
| "loss": 1.0936, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.005293014935613904, | |
| "grad_norm": 0.5456178784370422, | |
| "learning_rate": 0.0001977998733368542, | |
| "loss": 1.0819, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.0053420243331658845, | |
| "grad_norm": 0.4566207528114319, | |
| "learning_rate": 0.00019775491333029588, | |
| "loss": 0.9396, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.0053910337307178655, | |
| "grad_norm": 0.48873913288116455, | |
| "learning_rate": 0.00019770950378934435, | |
| "loss": 1.0805, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.005440043128269846, | |
| "grad_norm": 0.5160651803016663, | |
| "learning_rate": 0.00019766364492281924, | |
| "loss": 1.078, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.0054890525258218265, | |
| "grad_norm": 0.4412589967250824, | |
| "learning_rate": 0.00019761733694160656, | |
| "loss": 1.0189, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.005538061923373807, | |
| "grad_norm": 0.5093545913696289, | |
| "learning_rate": 0.00019757058005865754, | |
| "loss": 1.0374, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.0055870713209257876, | |
| "grad_norm": 0.546847403049469, | |
| "learning_rate": 0.0001975233744889877, | |
| "loss": 1.0663, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.0056360807184777685, | |
| "grad_norm": 0.5874338746070862, | |
| "learning_rate": 0.000197475720449676, | |
| "loss": 1.0858, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.005685090116029749, | |
| "grad_norm": 0.478515625, | |
| "learning_rate": 0.00019742761815986354, | |
| "loss": 0.895, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.0057340995135817295, | |
| "grad_norm": 0.55860435962677, | |
| "learning_rate": 0.00019737906784075292, | |
| "loss": 0.986, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.00578310891113371, | |
| "grad_norm": 0.7575963735580444, | |
| "learning_rate": 0.00019733006971560694, | |
| "loss": 1.1348, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.005832118308685691, | |
| "grad_norm": 0.5981109738349915, | |
| "learning_rate": 0.00019728062400974775, | |
| "loss": 1.1434, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.005881127706237671, | |
| "grad_norm": 0.5200974941253662, | |
| "learning_rate": 0.00019723073095055557, | |
| "loss": 1.0091, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.005930137103789652, | |
| "grad_norm": 0.4704555571079254, | |
| "learning_rate": 0.00019718039076746809, | |
| "loss": 0.9529, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.0059791465013416326, | |
| "grad_norm": 0.5887392163276672, | |
| "learning_rate": 0.00019712960369197883, | |
| "loss": 1.0724, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.006028155898893613, | |
| "grad_norm": 0.6547783613204956, | |
| "learning_rate": 0.00019707836995763663, | |
| "loss": 1.0877, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.006077165296445594, | |
| "grad_norm": 0.5115367770195007, | |
| "learning_rate": 0.00019702668980004423, | |
| "loss": 0.8939, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.006126174693997574, | |
| "grad_norm": 0.652423620223999, | |
| "learning_rate": 0.0001969745634568572, | |
| "loss": 1.109, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.006175184091549555, | |
| "grad_norm": 0.48406100273132324, | |
| "learning_rate": 0.00019692199116778315, | |
| "loss": 1.0766, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.006224193489101536, | |
| "grad_norm": 0.6201152801513672, | |
| "learning_rate": 0.00019686897317458015, | |
| "loss": 1.0048, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.006273202886653516, | |
| "grad_norm": 0.5421327948570251, | |
| "learning_rate": 0.0001968155097210561, | |
| "loss": 1.1253, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.006322212284205497, | |
| "grad_norm": 0.4736417233943939, | |
| "learning_rate": 0.00019676160105306728, | |
| "loss": 0.9182, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.006371221681757477, | |
| "grad_norm": 0.5314813852310181, | |
| "learning_rate": 0.00019670724741851735, | |
| "loss": 1.0786, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.006420231079309458, | |
| "grad_norm": 0.7178956866264343, | |
| "learning_rate": 0.00019665244906735618, | |
| "loss": 1.1229, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.006469240476861439, | |
| "grad_norm": 0.7294998168945312, | |
| "learning_rate": 0.00019659720625157868, | |
| "loss": 0.9181, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.006518249874413419, | |
| "grad_norm": 0.49370306730270386, | |
| "learning_rate": 0.00019654151922522374, | |
| "loss": 0.9575, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.0065672592719654, | |
| "grad_norm": 0.6981483697891235, | |
| "learning_rate": 0.00019648538824437292, | |
| "loss": 1.039, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.00661626866951738, | |
| "grad_norm": 0.5041336417198181, | |
| "learning_rate": 0.0001964288135671494, | |
| "loss": 1.0359, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.006665278067069361, | |
| "grad_norm": 0.6436519026756287, | |
| "learning_rate": 0.00019637179545371666, | |
| "loss": 1.3036, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.006714287464621341, | |
| "grad_norm": 0.7052600979804993, | |
| "learning_rate": 0.00019631433416627747, | |
| "loss": 1.1122, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.006763296862173322, | |
| "grad_norm": 0.7348969578742981, | |
| "learning_rate": 0.00019625642996907248, | |
| "loss": 0.9007, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.006812306259725303, | |
| "grad_norm": 0.46670839190483093, | |
| "learning_rate": 0.00019619808312837912, | |
| "loss": 0.9005, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.006861315657277283, | |
| "grad_norm": 0.5681964755058289, | |
| "learning_rate": 0.0001961392939125104, | |
| "loss": 0.9202, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.006910325054829264, | |
| "grad_norm": 0.5394182801246643, | |
| "learning_rate": 0.00019608006259181355, | |
| "loss": 1.0096, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.006959334452381244, | |
| "grad_norm": 0.48045870661735535, | |
| "learning_rate": 0.00019602038943866896, | |
| "loss": 0.9415, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.007008343849933225, | |
| "grad_norm": 0.5574181079864502, | |
| "learning_rate": 0.0001959602747274887, | |
| "loss": 0.9763, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.007057353247485206, | |
| "grad_norm": 0.6325384378433228, | |
| "learning_rate": 0.00019589971873471552, | |
| "loss": 0.8447, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.007106362645037186, | |
| "grad_norm": 0.5570932626724243, | |
| "learning_rate": 0.00019583872173882129, | |
| "loss": 1.0864, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.007155372042589167, | |
| "grad_norm": 0.5965400338172913, | |
| "learning_rate": 0.00019577728402030603, | |
| "loss": 1.0298, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.007204381440141147, | |
| "grad_norm": 0.6382545828819275, | |
| "learning_rate": 0.00019571540586169633, | |
| "loss": 1.1735, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.007253390837693128, | |
| "grad_norm": 0.5810325145721436, | |
| "learning_rate": 0.0001956530875475443, | |
| "loss": 1.0175, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.007302400235245109, | |
| "grad_norm": 0.5877084732055664, | |
| "learning_rate": 0.000195590329364426, | |
| "loss": 1.018, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.007351409632797089, | |
| "grad_norm": 0.6393083930015564, | |
| "learning_rate": 0.00019552713160094038, | |
| "loss": 1.0437, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.00740041903034907, | |
| "grad_norm": 0.551087498664856, | |
| "learning_rate": 0.0001954634945477078, | |
| "loss": 1.191, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.00744942842790105, | |
| "grad_norm": 0.5265794396400452, | |
| "learning_rate": 0.00019539941849736875, | |
| "loss": 1.0204, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.007498437825453031, | |
| "grad_norm": 0.537392795085907, | |
| "learning_rate": 0.00019533490374458245, | |
| "loss": 1.1164, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.007547447223005011, | |
| "grad_norm": 0.5640383958816528, | |
| "learning_rate": 0.00019526995058602553, | |
| "loss": 1.1857, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.007596456620556992, | |
| "grad_norm": 0.5242000818252563, | |
| "learning_rate": 0.0001952045593203907, | |
| "loss": 1.1145, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.007645466018108973, | |
| "grad_norm": 0.5312201976776123, | |
| "learning_rate": 0.0001951387302483854, | |
| "loss": 0.8864, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.007694475415660953, | |
| "grad_norm": 0.5541028380393982, | |
| "learning_rate": 0.00019507246367273017, | |
| "loss": 0.9166, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.007743484813212934, | |
| "grad_norm": 0.7059816122055054, | |
| "learning_rate": 0.00019500575989815766, | |
| "loss": 1.1794, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.007792494210764914, | |
| "grad_norm": 0.5818477272987366, | |
| "learning_rate": 0.00019493861923141088, | |
| "loss": 1.0268, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.007841503608316895, | |
| "grad_norm": 0.7610416412353516, | |
| "learning_rate": 0.000194871041981242, | |
| "loss": 1.1005, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.007890513005868875, | |
| "grad_norm": 0.5330606698989868, | |
| "learning_rate": 0.0001948030284584108, | |
| "loss": 1.017, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.007939522403420857, | |
| "grad_norm": 0.5154563188552856, | |
| "learning_rate": 0.00019473457897568338, | |
| "loss": 0.8678, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.007988531800972837, | |
| "grad_norm": 0.5452204346656799, | |
| "learning_rate": 0.0001946656938478305, | |
| "loss": 0.9008, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.008037541198524817, | |
| "grad_norm": 0.6915609836578369, | |
| "learning_rate": 0.0001945963733916264, | |
| "loss": 0.7523, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.008086550596076797, | |
| "grad_norm": 0.5153624415397644, | |
| "learning_rate": 0.0001945266179258472, | |
| "loss": 1.0295, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.008135559993628779, | |
| "grad_norm": 0.5068039298057556, | |
| "learning_rate": 0.0001944564277712694, | |
| "loss": 0.9964, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.008184569391180759, | |
| "grad_norm": 0.5240331888198853, | |
| "learning_rate": 0.00019438580325066847, | |
| "loss": 1.0435, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.008233578788732739, | |
| "grad_norm": 0.663153350353241, | |
| "learning_rate": 0.00019431474468881735, | |
| "loss": 1.0541, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.00828258818628472, | |
| "grad_norm": 0.5227854251861572, | |
| "learning_rate": 0.00019424325241248496, | |
| "loss": 1.095, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.0083315975838367, | |
| "grad_norm": 0.5788130164146423, | |
| "learning_rate": 0.00019417132675043471, | |
| "loss": 0.9356, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.008380606981388681, | |
| "grad_norm": 0.584823727607727, | |
| "learning_rate": 0.00019409896803342292, | |
| "loss": 0.949, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.008429616378940663, | |
| "grad_norm": 0.6930997371673584, | |
| "learning_rate": 0.00019402617659419744, | |
| "loss": 1.114, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.008478625776492643, | |
| "grad_norm": 0.6039778590202332, | |
| "learning_rate": 0.00019395295276749592, | |
| "loss": 0.9691, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.008527635174044623, | |
| "grad_norm": 0.786405086517334, | |
| "learning_rate": 0.00019387929689004447, | |
| "loss": 1.0854, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.008576644571596603, | |
| "grad_norm": 0.7498408555984497, | |
| "learning_rate": 0.00019380520930055602, | |
| "loss": 1.0526, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.008625653969148585, | |
| "grad_norm": 0.4368021488189697, | |
| "learning_rate": 0.00019373069033972864, | |
| "loss": 0.8813, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.008674663366700565, | |
| "grad_norm": 0.6928194165229797, | |
| "learning_rate": 0.0001936557403502443, | |
| "loss": 0.972, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.008723672764252545, | |
| "grad_norm": 0.6533752679824829, | |
| "learning_rate": 0.0001935803596767669, | |
| "loss": 0.9243, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.008772682161804527, | |
| "grad_norm": 0.6834339499473572, | |
| "learning_rate": 0.00019350454866594105, | |
| "loss": 0.9531, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.008821691559356507, | |
| "grad_norm": 0.665930986404419, | |
| "learning_rate": 0.00019342830766639013, | |
| "loss": 1.1487, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.008870700956908487, | |
| "grad_norm": 1.0465463399887085, | |
| "learning_rate": 0.000193351637028715, | |
| "loss": 1.4273, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.008919710354460467, | |
| "grad_norm": 0.4840773642063141, | |
| "learning_rate": 0.00019327453710549214, | |
| "loss": 0.9474, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.008968719752012449, | |
| "grad_norm": 0.674241304397583, | |
| "learning_rate": 0.00019319700825127227, | |
| "loss": 1.2863, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.009017729149564429, | |
| "grad_norm": 0.45641985535621643, | |
| "learning_rate": 0.00019311905082257846, | |
| "loss": 1.0501, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.009066738547116409, | |
| "grad_norm": 0.4727928042411804, | |
| "learning_rate": 0.00019304066517790465, | |
| "loss": 0.9775, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.00911574794466839, | |
| "grad_norm": 0.5751796960830688, | |
| "learning_rate": 0.00019296185167771404, | |
| "loss": 0.9788, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.009164757342220371, | |
| "grad_norm": 0.6331678628921509, | |
| "learning_rate": 0.00019288261068443725, | |
| "loss": 1.1389, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.009213766739772351, | |
| "grad_norm": 0.6849628686904907, | |
| "learning_rate": 0.0001928029425624708, | |
| "loss": 1.1108, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.009262776137324333, | |
| "grad_norm": 0.515921413898468, | |
| "learning_rate": 0.0001927228476781755, | |
| "loss": 1.1741, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.009311785534876313, | |
| "grad_norm": 0.7446777820587158, | |
| "learning_rate": 0.0001926423263998745, | |
| "loss": 1.2786, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.009360794932428293, | |
| "grad_norm": 0.5110065340995789, | |
| "learning_rate": 0.00019256137909785185, | |
| "loss": 1.142, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.009409804329980273, | |
| "grad_norm": 0.5118314623832703, | |
| "learning_rate": 0.0001924800061443507, | |
| "loss": 1.3642, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.009458813727532255, | |
| "grad_norm": 0.5356354117393494, | |
| "learning_rate": 0.00019239820791357165, | |
| "loss": 1.0135, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.009507823125084235, | |
| "grad_norm": 0.6616246700286865, | |
| "learning_rate": 0.00019231598478167082, | |
| "loss": 1.1266, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.009556832522636215, | |
| "grad_norm": 0.5914151072502136, | |
| "learning_rate": 0.00019223333712675838, | |
| "loss": 0.915, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.009605841920188197, | |
| "grad_norm": 0.745429277420044, | |
| "learning_rate": 0.00019215026532889675, | |
| "loss": 1.0138, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.009654851317740177, | |
| "grad_norm": 0.6249227523803711, | |
| "learning_rate": 0.00019206676977009872, | |
| "loss": 1.0224, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.009703860715292157, | |
| "grad_norm": 0.7981612086296082, | |
| "learning_rate": 0.0001919828508343258, | |
| "loss": 1.045, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.009752870112844137, | |
| "grad_norm": 0.5909057855606079, | |
| "learning_rate": 0.00019189850890748652, | |
| "loss": 1.0247, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.009801879510396119, | |
| "grad_norm": 0.7416546940803528, | |
| "learning_rate": 0.00019181374437743438, | |
| "loss": 1.0874, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.009850888907948099, | |
| "grad_norm": 0.5643423795700073, | |
| "learning_rate": 0.00019172855763396643, | |
| "loss": 1.1505, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.009899898305500079, | |
| "grad_norm": 0.5001385807991028, | |
| "learning_rate": 0.0001916429490688213, | |
| "loss": 0.8847, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.009948907703052061, | |
| "grad_norm": 0.7633288502693176, | |
| "learning_rate": 0.00019155691907567728, | |
| "loss": 0.9812, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.009997917100604041, | |
| "grad_norm": 0.5486236214637756, | |
| "learning_rate": 0.00019147046805015076, | |
| "loss": 0.8544, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.010046926498156021, | |
| "grad_norm": 0.6690341234207153, | |
| "learning_rate": 0.0001913835963897942, | |
| "loss": 1.0948, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.010095935895708001, | |
| "grad_norm": 0.7186251282691956, | |
| "learning_rate": 0.00019129630449409444, | |
| "loss": 0.9121, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.010144945293259983, | |
| "grad_norm": 0.5190759897232056, | |
| "learning_rate": 0.00019120859276447076, | |
| "loss": 1.0424, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.010193954690811963, | |
| "grad_norm": 0.5398345589637756, | |
| "learning_rate": 0.00019112046160427312, | |
| "loss": 1.1069, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.010242964088363943, | |
| "grad_norm": 0.6046240329742432, | |
| "learning_rate": 0.00019103191141878027, | |
| "loss": 1.1071, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.010291973485915925, | |
| "grad_norm": 0.6967079043388367, | |
| "learning_rate": 0.00019094294261519785, | |
| "loss": 1.055, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.010340982883467905, | |
| "grad_norm": 0.6013466715812683, | |
| "learning_rate": 0.00019085355560265657, | |
| "loss": 1.0539, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.010389992281019885, | |
| "grad_norm": 0.6052589416503906, | |
| "learning_rate": 0.0001907637507922103, | |
| "loss": 0.9343, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.010439001678571867, | |
| "grad_norm": 0.46449118852615356, | |
| "learning_rate": 0.00019067352859683423, | |
| "loss": 0.9858, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.010488011076123847, | |
| "grad_norm": 0.45442724227905273, | |
| "learning_rate": 0.0001905828894314229, | |
| "loss": 0.8839, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.010537020473675827, | |
| "grad_norm": 0.5393562316894531, | |
| "learning_rate": 0.00019049183371278828, | |
| "loss": 0.9945, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.010586029871227807, | |
| "grad_norm": 0.5419538021087646, | |
| "learning_rate": 0.00019040036185965798, | |
| "loss": 0.8671, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.010635039268779789, | |
| "grad_norm": 0.5931328535079956, | |
| "learning_rate": 0.00019030847429267318, | |
| "loss": 0.936, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.010684048666331769, | |
| "grad_norm": 0.6994450092315674, | |
| "learning_rate": 0.00019021617143438678, | |
| "loss": 1.1344, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.01073305806388375, | |
| "grad_norm": 0.5890368819236755, | |
| "learning_rate": 0.00019012345370926145, | |
| "loss": 1.0253, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.010782067461435731, | |
| "grad_norm": 0.71031254529953, | |
| "learning_rate": 0.0001900303215436676, | |
| "loss": 0.8868, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.010831076858987711, | |
| "grad_norm": 0.5714280009269714, | |
| "learning_rate": 0.00018993677536588156, | |
| "loss": 1.1322, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.010880086256539691, | |
| "grad_norm": 0.5989257097244263, | |
| "learning_rate": 0.00018984281560608345, | |
| "loss": 1.0701, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.010929095654091671, | |
| "grad_norm": 0.653944730758667, | |
| "learning_rate": 0.00018974844269635535, | |
| "loss": 0.908, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.010978105051643653, | |
| "grad_norm": 0.5854188203811646, | |
| "learning_rate": 0.00018965365707067922, | |
| "loss": 0.9684, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.011027114449195633, | |
| "grad_norm": 0.4724690616130829, | |
| "learning_rate": 0.0001895584591649349, | |
| "loss": 0.974, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.011076123846747613, | |
| "grad_norm": 0.5080939531326294, | |
| "learning_rate": 0.00018946284941689817, | |
| "loss": 1.0113, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.011125133244299595, | |
| "grad_norm": 0.5035115480422974, | |
| "learning_rate": 0.00018936682826623875, | |
| "loss": 1.033, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.011174142641851575, | |
| "grad_norm": 0.544927179813385, | |
| "learning_rate": 0.0001892703961545181, | |
| "loss": 1.0694, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.011223152039403555, | |
| "grad_norm": 0.5631483793258667, | |
| "learning_rate": 0.00018917355352518765, | |
| "loss": 1.1408, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.011272161436955537, | |
| "grad_norm": 0.5837711095809937, | |
| "learning_rate": 0.00018907630082358657, | |
| "loss": 0.9456, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.011321170834507517, | |
| "grad_norm": 0.5248781442642212, | |
| "learning_rate": 0.00018897863849693972, | |
| "loss": 0.7417, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.011370180232059497, | |
| "grad_norm": 0.7330105304718018, | |
| "learning_rate": 0.00018888056699435584, | |
| "loss": 1.1141, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.011419189629611477, | |
| "grad_norm": 0.7595493197441101, | |
| "learning_rate": 0.00018878208676682508, | |
| "loss": 1.1187, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.011468199027163459, | |
| "grad_norm": 0.47531285881996155, | |
| "learning_rate": 0.00018868319826721735, | |
| "loss": 0.9251, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.01151720842471544, | |
| "grad_norm": 0.4790467917919159, | |
| "learning_rate": 0.00018858390195027985, | |
| "loss": 1.0659, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.01156621782226742, | |
| "grad_norm": 0.49423718452453613, | |
| "learning_rate": 0.0001884841982726353, | |
| "loss": 0.9563, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.011615227219819401, | |
| "grad_norm": 0.5443271398544312, | |
| "learning_rate": 0.00018838408769277965, | |
| "loss": 1.0261, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.011664236617371381, | |
| "grad_norm": 0.44612443447113037, | |
| "learning_rate": 0.00018828357067108, | |
| "loss": 0.9187, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.011713246014923361, | |
| "grad_norm": 0.4964349865913391, | |
| "learning_rate": 0.00018818264766977256, | |
| "loss": 1.1576, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.011762255412475341, | |
| "grad_norm": 0.5051829218864441, | |
| "learning_rate": 0.00018808131915296045, | |
| "loss": 1.0866, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.011811264810027323, | |
| "grad_norm": 0.5532406568527222, | |
| "learning_rate": 0.0001879795855866116, | |
| "loss": 1.0958, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.011860274207579303, | |
| "grad_norm": 0.6946402788162231, | |
| "learning_rate": 0.00018787744743855656, | |
| "loss": 1.0638, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.011909283605131283, | |
| "grad_norm": 0.5688795447349548, | |
| "learning_rate": 0.00018777490517848643, | |
| "loss": 1.1, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.011958293002683265, | |
| "grad_norm": 0.7369052767753601, | |
| "learning_rate": 0.00018767195927795057, | |
| "loss": 0.9118, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.012007302400235245, | |
| "grad_norm": 0.5264215469360352, | |
| "learning_rate": 0.00018756861021035462, | |
| "loss": 0.9238, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.012056311797787225, | |
| "grad_norm": 0.6346014738082886, | |
| "learning_rate": 0.0001874648584509582, | |
| "loss": 0.9497, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.012105321195339207, | |
| "grad_norm": 0.48699167370796204, | |
| "learning_rate": 0.00018736070447687267, | |
| "loss": 1.0339, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.012154330592891187, | |
| "grad_norm": 0.5329305529594421, | |
| "learning_rate": 0.00018725614876705907, | |
| "loss": 1.0089, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.012203339990443167, | |
| "grad_norm": 0.5199721455574036, | |
| "learning_rate": 0.00018715119180232582, | |
| "loss": 0.9166, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.012252349387995147, | |
| "grad_norm": 0.6322909593582153, | |
| "learning_rate": 0.00018704583406532662, | |
| "loss": 1.0835, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.01230135878554713, | |
| "grad_norm": 0.5659762620925903, | |
| "learning_rate": 0.00018694007604055807, | |
| "loss": 1.0007, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.01235036818309911, | |
| "grad_norm": 0.6697028279304504, | |
| "learning_rate": 0.00018683391821435757, | |
| "loss": 1.0302, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.01239937758065109, | |
| "grad_norm": 0.5258873105049133, | |
| "learning_rate": 0.00018672736107490102, | |
| "loss": 0.9095, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.012448386978203071, | |
| "grad_norm": 0.718046247959137, | |
| "learning_rate": 0.00018662040511220062, | |
| "loss": 0.774, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.012497396375755051, | |
| "grad_norm": 0.5691109895706177, | |
| "learning_rate": 0.0001865130508181026, | |
| "loss": 0.8814, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.012546405773307031, | |
| "grad_norm": 0.5212081074714661, | |
| "learning_rate": 0.00018640529868628488, | |
| "loss": 0.9577, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.012595415170859011, | |
| "grad_norm": 0.4526687264442444, | |
| "learning_rate": 0.00018629714921225495, | |
| "loss": 1.1245, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.012644424568410993, | |
| "grad_norm": 0.5150011777877808, | |
| "learning_rate": 0.0001861886028933475, | |
| "loss": 0.9222, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.012693433965962973, | |
| "grad_norm": 0.5537204742431641, | |
| "learning_rate": 0.0001860796602287221, | |
| "loss": 1.0903, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.012742443363514953, | |
| "grad_norm": 0.5243268013000488, | |
| "learning_rate": 0.00018597032171936104, | |
| "loss": 1.0161, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.012791452761066935, | |
| "grad_norm": 0.6439827084541321, | |
| "learning_rate": 0.00018586058786806685, | |
| "loss": 1.0715, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.012840462158618915, | |
| "grad_norm": 0.9355524778366089, | |
| "learning_rate": 0.00018575045917946007, | |
| "loss": 1.129, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.012889471556170895, | |
| "grad_norm": 0.5358828902244568, | |
| "learning_rate": 0.000185639936159977, | |
| "loss": 0.997, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.012938480953722877, | |
| "grad_norm": 1.1923553943634033, | |
| "learning_rate": 0.00018552901931786727, | |
| "loss": 1.0837, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.012987490351274857, | |
| "grad_norm": 0.7703134417533875, | |
| "learning_rate": 0.0001854177091631915, | |
| "loss": 1.059, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.013036499748826837, | |
| "grad_norm": 0.46094661951065063, | |
| "learning_rate": 0.00018530600620781903, | |
| "loss": 0.9812, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.013085509146378817, | |
| "grad_norm": 0.5393503308296204, | |
| "learning_rate": 0.00018519391096542563, | |
| "loss": 0.9942, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.0131345185439308, | |
| "grad_norm": 0.5819559693336487, | |
| "learning_rate": 0.00018508142395149077, | |
| "loss": 0.7817, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.01318352794148278, | |
| "grad_norm": 0.5847815275192261, | |
| "learning_rate": 0.0001849685456832958, | |
| "loss": 0.9157, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.01323253733903476, | |
| "grad_norm": 0.5651043653488159, | |
| "learning_rate": 0.00018485527667992115, | |
| "loss": 0.9457, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.013281546736586741, | |
| "grad_norm": 0.6798825860023499, | |
| "learning_rate": 0.00018474161746224403, | |
| "loss": 1.0945, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.013330556134138721, | |
| "grad_norm": 0.48076069355010986, | |
| "learning_rate": 0.0001846275685529362, | |
| "loss": 0.9762, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.013379565531690701, | |
| "grad_norm": 0.48303014039993286, | |
| "learning_rate": 0.0001845131304764614, | |
| "loss": 0.7044, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.013428574929242681, | |
| "grad_norm": 0.5329292416572571, | |
| "learning_rate": 0.00018439830375907294, | |
| "loss": 0.9718, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.013477584326794663, | |
| "grad_norm": 0.7514523267745972, | |
| "learning_rate": 0.0001842830889288114, | |
| "loss": 1.0568, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.013526593724346643, | |
| "grad_norm": 0.7108585238456726, | |
| "learning_rate": 0.00018416748651550212, | |
| "loss": 1.0998, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.013575603121898623, | |
| "grad_norm": 0.49022603034973145, | |
| "learning_rate": 0.00018405149705075276, | |
| "loss": 0.8044, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.013624612519450605, | |
| "grad_norm": 0.5161099433898926, | |
| "learning_rate": 0.00018393512106795086, | |
| "loss": 1.011, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.013673621917002585, | |
| "grad_norm": 0.6260969042778015, | |
| "learning_rate": 0.0001838183591022614, | |
| "loss": 1.0089, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.013722631314554565, | |
| "grad_norm": 0.5831906795501709, | |
| "learning_rate": 0.00018370121169062435, | |
| "loss": 0.942, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.013771640712106547, | |
| "grad_norm": 0.4630817174911499, | |
| "learning_rate": 0.0001835836793717522, | |
| "loss": 1.0795, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.013820650109658527, | |
| "grad_norm": 0.6668508052825928, | |
| "learning_rate": 0.0001834657626861274, | |
| "loss": 1.0784, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.013869659507210507, | |
| "grad_norm": 0.523901104927063, | |
| "learning_rate": 0.000183347462176, | |
| "loss": 1.0338, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.013918668904762488, | |
| "grad_norm": 0.6257583498954773, | |
| "learning_rate": 0.00018322877838538513, | |
| "loss": 0.9845, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.01396767830231447, | |
| "grad_norm": 0.6482861638069153, | |
| "learning_rate": 0.00018310971186006038, | |
| "loss": 0.9081, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.01401668769986645, | |
| "grad_norm": 0.600287139415741, | |
| "learning_rate": 0.00018299026314756348, | |
| "loss": 1.0216, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.01406569709741843, | |
| "grad_norm": 0.732085108757019, | |
| "learning_rate": 0.00018287043279718957, | |
| "loss": 1.1922, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.014114706494970411, | |
| "grad_norm": 0.7779308557510376, | |
| "learning_rate": 0.00018275022135998888, | |
| "loss": 1.3221, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.014163715892522391, | |
| "grad_norm": 0.606619656085968, | |
| "learning_rate": 0.00018262962938876408, | |
| "loss": 0.9412, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.014212725290074371, | |
| "grad_norm": 0.7746778130531311, | |
| "learning_rate": 0.00018250865743806767, | |
| "loss": 1.1909, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.014261734687626352, | |
| "grad_norm": 0.7022519111633301, | |
| "learning_rate": 0.00018238730606419965, | |
| "loss": 0.8972, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.014310744085178333, | |
| "grad_norm": 0.5326123237609863, | |
| "learning_rate": 0.00018226557582520477, | |
| "loss": 0.8689, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.014359753482730313, | |
| "grad_norm": 0.5942233800888062, | |
| "learning_rate": 0.00018214346728087, | |
| "loss": 0.9346, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.014408762880282294, | |
| "grad_norm": 0.5361180901527405, | |
| "learning_rate": 0.00018202098099272205, | |
| "loss": 0.8712, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.014457772277834275, | |
| "grad_norm": 0.5793167948722839, | |
| "learning_rate": 0.00018189811752402458, | |
| "loss": 1.0495, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.014506781675386255, | |
| "grad_norm": 0.5728379487991333, | |
| "learning_rate": 0.00018177487743977597, | |
| "loss": 1.0623, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.014555791072938236, | |
| "grad_norm": 0.6366235613822937, | |
| "learning_rate": 0.00018165126130670624, | |
| "loss": 0.9413, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.014604800470490217, | |
| "grad_norm": 0.5405012965202332, | |
| "learning_rate": 0.0001815272696932749, | |
| "loss": 1.1222, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.014653809868042197, | |
| "grad_norm": 0.701434850692749, | |
| "learning_rate": 0.00018140290316966806, | |
| "loss": 1.185, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.014702819265594178, | |
| "grad_norm": 0.5572264790534973, | |
| "learning_rate": 0.0001812781623077959, | |
| "loss": 1.0778, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.014751828663146158, | |
| "grad_norm": 1.0823994874954224, | |
| "learning_rate": 0.00018115304768129, | |
| "loss": 1.2721, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.01480083806069814, | |
| "grad_norm": 0.7624396681785583, | |
| "learning_rate": 0.00018102755986550078, | |
| "loss": 1.103, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.01484984745825012, | |
| "grad_norm": 0.6541030406951904, | |
| "learning_rate": 0.00018090169943749476, | |
| "loss": 1.1571, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.0148988568558021, | |
| "grad_norm": 0.4585016071796417, | |
| "learning_rate": 0.00018077546697605197, | |
| "loss": 0.8446, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.014947866253354081, | |
| "grad_norm": 0.5118653774261475, | |
| "learning_rate": 0.00018064886306166323, | |
| "loss": 0.8785, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.014996875650906061, | |
| "grad_norm": 0.6636449098587036, | |
| "learning_rate": 0.0001805218882765276, | |
| "loss": 1.1065, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.015045885048458042, | |
| "grad_norm": 0.5869944095611572, | |
| "learning_rate": 0.00018039454320454957, | |
| "loss": 1.2293, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.015094894446010022, | |
| "grad_norm": 0.5056412816047668, | |
| "learning_rate": 0.0001802668284313364, | |
| "loss": 1.0039, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.015143903843562003, | |
| "grad_norm": 0.4390663504600525, | |
| "learning_rate": 0.00018013874454419546, | |
| "loss": 0.9446, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.015192913241113984, | |
| "grad_norm": 0.640178918838501, | |
| "learning_rate": 0.00018001029213213162, | |
| "loss": 1.0914, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.015241922638665964, | |
| "grad_norm": 0.6631582975387573, | |
| "learning_rate": 0.00017988147178584426, | |
| "loss": 1.0147, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.015290932036217945, | |
| "grad_norm": 0.4660460352897644, | |
| "learning_rate": 0.00017975228409772496, | |
| "loss": 0.94, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.015339941433769926, | |
| "grad_norm": 0.6155552864074707, | |
| "learning_rate": 0.00017962272966185437, | |
| "loss": 1.0236, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.015388950831321906, | |
| "grad_norm": 0.4859367609024048, | |
| "learning_rate": 0.0001794928090739997, | |
| "loss": 1.0036, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.015437960228873887, | |
| "grad_norm": 0.46601447463035583, | |
| "learning_rate": 0.00017936252293161204, | |
| "loss": 1.0945, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.015486969626425868, | |
| "grad_norm": 0.5885031819343567, | |
| "learning_rate": 0.00017923187183382342, | |
| "loss": 1.0307, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.015535979023977848, | |
| "grad_norm": 0.552923321723938, | |
| "learning_rate": 0.00017910085638144416, | |
| "loss": 0.9364, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.015584988421529828, | |
| "grad_norm": 0.7075880169868469, | |
| "learning_rate": 0.00017896947717696012, | |
| "loss": 1.0722, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.01563399781908181, | |
| "grad_norm": 0.47218647599220276, | |
| "learning_rate": 0.00017883773482452986, | |
| "loss": 0.7295, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.01568300721663379, | |
| "grad_norm": 0.6173276901245117, | |
| "learning_rate": 0.00017870562992998193, | |
| "loss": 1.019, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.01573201661418577, | |
| "grad_norm": 0.6104951500892639, | |
| "learning_rate": 0.0001785731631008122, | |
| "loss": 0.9296, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.01578102601173775, | |
| "grad_norm": 0.6612362861633301, | |
| "learning_rate": 0.00017844033494618063, | |
| "loss": 1.0921, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.01583003540928973, | |
| "grad_norm": 0.7498394250869751, | |
| "learning_rate": 0.00017830714607690906, | |
| "loss": 1.0273, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.015879044806841713, | |
| "grad_norm": 0.6212750673294067, | |
| "learning_rate": 0.0001781735971054779, | |
| "loss": 0.9132, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.015928054204393693, | |
| "grad_norm": 0.7612932920455933, | |
| "learning_rate": 0.0001780396886460237, | |
| "loss": 1.0407, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.015977063601945674, | |
| "grad_norm": 0.5707682371139526, | |
| "learning_rate": 0.00017790542131433597, | |
| "loss": 1.0211, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.016026072999497654, | |
| "grad_norm": 0.5141270756721497, | |
| "learning_rate": 0.00017777079572785462, | |
| "loss": 1.0815, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.016075082397049634, | |
| "grad_norm": 0.552099347114563, | |
| "learning_rate": 0.000177635812505667, | |
| "loss": 0.9927, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.016124091794601614, | |
| "grad_norm": 0.8106556534767151, | |
| "learning_rate": 0.00017750047226850512, | |
| "loss": 1.0921, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.016173101192153594, | |
| "grad_norm": 0.7649658918380737, | |
| "learning_rate": 0.00017736477563874275, | |
| "loss": 1.0336, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.016222110589705577, | |
| "grad_norm": 0.5398699045181274, | |
| "learning_rate": 0.00017722872324039247, | |
| "loss": 0.8543, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.016271119987257558, | |
| "grad_norm": 0.7642013430595398, | |
| "learning_rate": 0.00017709231569910296, | |
| "loss": 1.0422, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.016320129384809538, | |
| "grad_norm": 0.7440565824508667, | |
| "learning_rate": 0.0001769555536421561, | |
| "loss": 0.9295, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.016369138782361518, | |
| "grad_norm": 0.8193507194519043, | |
| "learning_rate": 0.000176818437698464, | |
| "loss": 1.2756, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.016418148179913498, | |
| "grad_norm": 0.8170818090438843, | |
| "learning_rate": 0.0001766809684985661, | |
| "loss": 1.1115, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.016467157577465478, | |
| "grad_norm": 0.6213825941085815, | |
| "learning_rate": 0.0001765431466746264, | |
| "loss": 1.0258, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.016516166975017458, | |
| "grad_norm": 0.5284627079963684, | |
| "learning_rate": 0.00017640497286043036, | |
| "loss": 0.8486, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.01656517637256944, | |
| "grad_norm": 0.6979788541793823, | |
| "learning_rate": 0.00017626644769138227, | |
| "loss": 1.0995, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.01661418577012142, | |
| "grad_norm": 0.43623411655426025, | |
| "learning_rate": 0.00017612757180450204, | |
| "loss": 0.8647, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.0166631951676734, | |
| "grad_norm": 0.6120411157608032, | |
| "learning_rate": 0.00017598834583842235, | |
| "loss": 0.7997, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.01671220456522538, | |
| "grad_norm": 0.5070543885231018, | |
| "learning_rate": 0.00017584877043338583, | |
| "loss": 0.8916, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.016761213962777362, | |
| "grad_norm": 0.7476792931556702, | |
| "learning_rate": 0.00017570884623124201, | |
| "loss": 1.0732, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.016810223360329342, | |
| "grad_norm": 0.5438335537910461, | |
| "learning_rate": 0.00017556857387544442, | |
| "loss": 0.9926, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.016859232757881325, | |
| "grad_norm": 0.5938275456428528, | |
| "learning_rate": 0.00017542795401104751, | |
| "loss": 1.2716, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.016908242155433306, | |
| "grad_norm": 0.6337954998016357, | |
| "learning_rate": 0.00017528698728470392, | |
| "loss": 0.851, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.016957251552985286, | |
| "grad_norm": 0.5400267243385315, | |
| "learning_rate": 0.00017514567434466122, | |
| "loss": 1.0499, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.017006260950537266, | |
| "grad_norm": 0.5979104042053223, | |
| "learning_rate": 0.0001750040158407592, | |
| "loss": 0.957, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.017055270348089246, | |
| "grad_norm": 0.649106502532959, | |
| "learning_rate": 0.0001748620124244267, | |
| "loss": 0.9333, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.017104279745641226, | |
| "grad_norm": 0.6345680356025696, | |
| "learning_rate": 0.00017471966474867857, | |
| "loss": 1.0754, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.017153289143193206, | |
| "grad_norm": 0.6540642380714417, | |
| "learning_rate": 0.000174576973468113, | |
| "loss": 1.0878, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.01720229854074519, | |
| "grad_norm": 0.633299708366394, | |
| "learning_rate": 0.00017443393923890805, | |
| "loss": 0.9907, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.01725130793829717, | |
| "grad_norm": 0.6478168368339539, | |
| "learning_rate": 0.00017429056271881898, | |
| "loss": 0.9081, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.01730031733584915, | |
| "grad_norm": 0.6225663423538208, | |
| "learning_rate": 0.00017414684456717506, | |
| "loss": 1.1538, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.01734932673340113, | |
| "grad_norm": 0.6177115440368652, | |
| "learning_rate": 0.0001740027854448766, | |
| "loss": 1.0673, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.01739833613095311, | |
| "grad_norm": 0.5825146436691284, | |
| "learning_rate": 0.0001738583860143919, | |
| "loss": 0.9452, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.01744734552850509, | |
| "grad_norm": 0.6394603848457336, | |
| "learning_rate": 0.0001737136469397542, | |
| "loss": 0.8666, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.01749635492605707, | |
| "grad_norm": 0.8003541827201843, | |
| "learning_rate": 0.00017356856888655858, | |
| "loss": 0.9749, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.017545364323609054, | |
| "grad_norm": 0.6348778009414673, | |
| "learning_rate": 0.00017342315252195895, | |
| "loss": 1.0315, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.017594373721161034, | |
| "grad_norm": 0.5486456751823425, | |
| "learning_rate": 0.00017327739851466504, | |
| "loss": 0.9171, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.017643383118713014, | |
| "grad_norm": 0.4903838038444519, | |
| "learning_rate": 0.00017313130753493917, | |
| "loss": 0.8439, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.017692392516264994, | |
| "grad_norm": 0.5837622284889221, | |
| "learning_rate": 0.00017298488025459336, | |
| "loss": 1.1164, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.017741401913816974, | |
| "grad_norm": 0.5179843306541443, | |
| "learning_rate": 0.00017283811734698602, | |
| "loss": 0.8872, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.017790411311368954, | |
| "grad_norm": 0.5837355852127075, | |
| "learning_rate": 0.00017269101948701906, | |
| "loss": 0.9488, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.017839420708920934, | |
| "grad_norm": 0.5812946557998657, | |
| "learning_rate": 0.00017254358735113465, | |
| "loss": 0.9592, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.017888430106472918, | |
| "grad_norm": 0.6350980401039124, | |
| "learning_rate": 0.00017239582161731218, | |
| "loss": 0.836, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.017937439504024898, | |
| "grad_norm": 0.5819278359413147, | |
| "learning_rate": 0.0001722477229650651, | |
| "loss": 1.1291, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.017986448901576878, | |
| "grad_norm": 0.5616326332092285, | |
| "learning_rate": 0.00017209929207543786, | |
| "loss": 1.1593, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.018035458299128858, | |
| "grad_norm": 0.6132553815841675, | |
| "learning_rate": 0.00017195052963100266, | |
| "loss": 0.978, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.018084467696680838, | |
| "grad_norm": 0.7652292251586914, | |
| "learning_rate": 0.0001718014363158564, | |
| "loss": 0.9381, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.018084467696680838, | |
| "eval_loss": NaN, | |
| "eval_runtime": 183.4447, | |
| "eval_samples_per_second": 46.837, | |
| "eval_steps_per_second": 23.419, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.018133477094232818, | |
| "grad_norm": 0.5262779593467712, | |
| "learning_rate": 0.0001716520128156176, | |
| "loss": 1.0514, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.018182486491784798, | |
| "grad_norm": 0.5101955533027649, | |
| "learning_rate": 0.00017150225981742308, | |
| "loss": 0.9021, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.01823149588933678, | |
| "grad_norm": 0.6334530711174011, | |
| "learning_rate": 0.00017135217800992488, | |
| "loss": 0.9931, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.01828050528688876, | |
| "grad_norm": 0.7216410040855408, | |
| "learning_rate": 0.00017120176808328713, | |
| "loss": 0.8422, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.018329514684440742, | |
| "grad_norm": 0.7431232333183289, | |
| "learning_rate": 0.00017105103072918292, | |
| "loss": 0.9414, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.018378524081992722, | |
| "grad_norm": 0.7508997321128845, | |
| "learning_rate": 0.00017089996664079084, | |
| "loss": 1.1678, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.018427533479544702, | |
| "grad_norm": 0.6515668034553528, | |
| "learning_rate": 0.0001707485765127922, | |
| "loss": 0.9482, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.018476542877096682, | |
| "grad_norm": 0.5263504981994629, | |
| "learning_rate": 0.0001705968610413675, | |
| "loss": 1.055, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.018525552274648666, | |
| "grad_norm": 0.57945716381073, | |
| "learning_rate": 0.00017044482092419346, | |
| "loss": 0.9905, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.018574561672200646, | |
| "grad_norm": 0.5509532690048218, | |
| "learning_rate": 0.00017029245686043965, | |
| "loss": 0.8783, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.018623571069752626, | |
| "grad_norm": 0.7690268754959106, | |
| "learning_rate": 0.00017013976955076535, | |
| "loss": 1.0791, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.018672580467304606, | |
| "grad_norm": 0.7092053890228271, | |
| "learning_rate": 0.00016998675969731624, | |
| "loss": 1.0632, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.018721589864856586, | |
| "grad_norm": 0.6620251536369324, | |
| "learning_rate": 0.0001698334280037214, | |
| "loss": 0.9258, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.018770599262408566, | |
| "grad_norm": 0.7011927366256714, | |
| "learning_rate": 0.0001696797751750898, | |
| "loss": 0.9686, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.018819608659960546, | |
| "grad_norm": 0.5856828093528748, | |
| "learning_rate": 0.00016952580191800727, | |
| "loss": 1.0147, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.01886861805751253, | |
| "grad_norm": 0.5925838947296143, | |
| "learning_rate": 0.00016937150894053303, | |
| "loss": 1.017, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.01891762745506451, | |
| "grad_norm": 0.5735793709754944, | |
| "learning_rate": 0.00016921689695219665, | |
| "loss": 0.849, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.01896663685261649, | |
| "grad_norm": 0.6090975403785706, | |
| "learning_rate": 0.0001690619666639947, | |
| "loss": 0.8939, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.01901564625016847, | |
| "grad_norm": 0.589528501033783, | |
| "learning_rate": 0.00016890671878838745, | |
| "loss": 1.0517, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.01906465564772045, | |
| "grad_norm": 0.517760694026947, | |
| "learning_rate": 0.00016875115403929564, | |
| "loss": 1.0258, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.01911366504527243, | |
| "grad_norm": 0.5802696943283081, | |
| "learning_rate": 0.00016859527313209722, | |
| "loss": 1.0879, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.01916267444282441, | |
| "grad_norm": 0.5980463027954102, | |
| "learning_rate": 0.00016843907678362388, | |
| "loss": 0.9885, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.019211683840376394, | |
| "grad_norm": 0.5813844203948975, | |
| "learning_rate": 0.00016828256571215804, | |
| "loss": 0.9511, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.019260693237928374, | |
| "grad_norm": 0.588787853717804, | |
| "learning_rate": 0.00016812574063742935, | |
| "loss": 0.9681, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.019309702635480354, | |
| "grad_norm": 0.5007123351097107, | |
| "learning_rate": 0.00016796860228061145, | |
| "loss": 1.0178, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.019358712033032334, | |
| "grad_norm": 0.6212466359138489, | |
| "learning_rate": 0.00016781115136431856, | |
| "loss": 0.9542, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.019407721430584314, | |
| "grad_norm": 0.63362056016922, | |
| "learning_rate": 0.0001676533886126024, | |
| "loss": 0.9947, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.019456730828136294, | |
| "grad_norm": 0.5138733386993408, | |
| "learning_rate": 0.00016749531475094843, | |
| "loss": 0.9759, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.019505740225688274, | |
| "grad_norm": 0.5656266808509827, | |
| "learning_rate": 0.000167336930506273, | |
| "loss": 0.9864, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.019554749623240258, | |
| "grad_norm": 0.5729262828826904, | |
| "learning_rate": 0.00016717823660691972, | |
| "loss": 0.9961, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.019603759020792238, | |
| "grad_norm": 0.5822618007659912, | |
| "learning_rate": 0.00016701923378265615, | |
| "loss": 1.1899, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.019652768418344218, | |
| "grad_norm": 0.6505359411239624, | |
| "learning_rate": 0.0001668599227646705, | |
| "loss": 1.2617, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.019701777815896198, | |
| "grad_norm": 0.5997886061668396, | |
| "learning_rate": 0.00016670030428556816, | |
| "loss": 0.9047, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.019750787213448178, | |
| "grad_norm": 0.5251815915107727, | |
| "learning_rate": 0.00016654037907936847, | |
| "loss": 0.8927, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.019799796611000158, | |
| "grad_norm": 0.6396327614784241, | |
| "learning_rate": 0.00016638014788150133, | |
| "loss": 1.1262, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.019848806008552138, | |
| "grad_norm": 0.5167068839073181, | |
| "learning_rate": 0.0001662196114288037, | |
| "loss": 0.9205, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.019897815406104122, | |
| "grad_norm": 0.5277456045150757, | |
| "learning_rate": 0.00016605877045951624, | |
| "loss": 0.975, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.019946824803656102, | |
| "grad_norm": 0.5696888566017151, | |
| "learning_rate": 0.00016589762571328005, | |
| "loss": 0.9641, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.019995834201208082, | |
| "grad_norm": 0.6764147877693176, | |
| "learning_rate": 0.00016573617793113308, | |
| "loss": 0.9214, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.020044843598760062, | |
| "grad_norm": 0.5221783518791199, | |
| "learning_rate": 0.000165574427855507, | |
| "loss": 1.0529, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.020093852996312042, | |
| "grad_norm": 0.598206639289856, | |
| "learning_rate": 0.0001654123762302233, | |
| "loss": 1.1109, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.020142862393864022, | |
| "grad_norm": 0.7939401268959045, | |
| "learning_rate": 0.0001652500238004905, | |
| "loss": 1.0441, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.020191871791416002, | |
| "grad_norm": 0.7071179747581482, | |
| "learning_rate": 0.00016508737131290012, | |
| "loss": 0.9036, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.020240881188967986, | |
| "grad_norm": 0.502358615398407, | |
| "learning_rate": 0.00016492441951542367, | |
| "loss": 0.8945, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.020289890586519966, | |
| "grad_norm": 0.7627480626106262, | |
| "learning_rate": 0.0001647611691574091, | |
| "loss": 0.8688, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.020338899984071946, | |
| "grad_norm": 0.6446793675422668, | |
| "learning_rate": 0.0001645976209895772, | |
| "loss": 1.083, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.020387909381623926, | |
| "grad_norm": 0.5880749821662903, | |
| "learning_rate": 0.00016443377576401828, | |
| "loss": 1.0038, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.020436918779175906, | |
| "grad_norm": 0.7221845388412476, | |
| "learning_rate": 0.00016426963423418878, | |
| "loss": 1.1188, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.020485928176727886, | |
| "grad_norm": 0.6247958540916443, | |
| "learning_rate": 0.00016410519715490764, | |
| "loss": 0.9271, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.02053493757427987, | |
| "grad_norm": 0.47129571437835693, | |
| "learning_rate": 0.000163940465282353, | |
| "loss": 0.9078, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.02058394697183185, | |
| "grad_norm": 0.5665132999420166, | |
| "learning_rate": 0.00016377543937405847, | |
| "loss": 0.8963, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.02063295636938383, | |
| "grad_norm": 0.5928874611854553, | |
| "learning_rate": 0.00016361012018890997, | |
| "loss": 1.1101, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.02068196576693581, | |
| "grad_norm": 0.493681937456131, | |
| "learning_rate": 0.00016344450848714204, | |
| "loss": 0.9305, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.02073097516448779, | |
| "grad_norm": 0.5588467121124268, | |
| "learning_rate": 0.00016327860503033436, | |
| "loss": 0.7529, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.02077998456203977, | |
| "grad_norm": 0.8472508192062378, | |
| "learning_rate": 0.00016311241058140823, | |
| "loss": 1.0739, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.02082899395959175, | |
| "grad_norm": 0.5035362243652344, | |
| "learning_rate": 0.00016294592590462316, | |
| "loss": 0.9373, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.020878003357143734, | |
| "grad_norm": 0.494436651468277, | |
| "learning_rate": 0.00016277915176557333, | |
| "loss": 0.8846, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.020927012754695714, | |
| "grad_norm": 0.5663606524467468, | |
| "learning_rate": 0.00016261208893118392, | |
| "loss": 0.9322, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.020976022152247694, | |
| "grad_norm": 0.5859829783439636, | |
| "learning_rate": 0.0001624447381697078, | |
| "loss": 1.1366, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.021025031549799674, | |
| "grad_norm": 0.6672139763832092, | |
| "learning_rate": 0.00016227710025072187, | |
| "loss": 0.9509, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.021074040947351654, | |
| "grad_norm": 0.5644333958625793, | |
| "learning_rate": 0.00016210917594512356, | |
| "loss": 0.9532, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.021123050344903634, | |
| "grad_norm": 0.828715443611145, | |
| "learning_rate": 0.00016194096602512725, | |
| "loss": 1.1273, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.021172059742455614, | |
| "grad_norm": 0.6238652467727661, | |
| "learning_rate": 0.00016177247126426076, | |
| "loss": 1.0033, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.021221069140007598, | |
| "grad_norm": 0.5639209151268005, | |
| "learning_rate": 0.00016160369243736175, | |
| "loss": 0.9039, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.021270078537559578, | |
| "grad_norm": 0.6098002195358276, | |
| "learning_rate": 0.00016143463032057423, | |
| "loss": 1.0728, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.021319087935111558, | |
| "grad_norm": 0.5710477232933044, | |
| "learning_rate": 0.00016126528569134488, | |
| "loss": 1.0031, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.021368097332663538, | |
| "grad_norm": 0.6529645919799805, | |
| "learning_rate": 0.0001610956593284196, | |
| "loss": 0.9533, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.021417106730215518, | |
| "grad_norm": 0.6168728470802307, | |
| "learning_rate": 0.0001609257520118398, | |
| "loss": 0.9776, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.0214661161277675, | |
| "grad_norm": 0.6322219371795654, | |
| "learning_rate": 0.00016075556452293895, | |
| "loss": 1.1566, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.02151512552531948, | |
| "grad_norm": 0.7928242087364197, | |
| "learning_rate": 0.00016058509764433886, | |
| "loss": 1.0876, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.021564134922871462, | |
| "grad_norm": 0.5785205364227295, | |
| "learning_rate": 0.00016041435215994622, | |
| "loss": 0.9904, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.021613144320423442, | |
| "grad_norm": 0.6156221628189087, | |
| "learning_rate": 0.0001602433288549488, | |
| "loss": 0.9935, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.021662153717975422, | |
| "grad_norm": 0.6420785784721375, | |
| "learning_rate": 0.000160072028515812, | |
| "loss": 1.0979, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.021711163115527402, | |
| "grad_norm": 0.512675404548645, | |
| "learning_rate": 0.00015990045193027522, | |
| "loss": 0.8404, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.021760172513079382, | |
| "grad_norm": 0.4817085564136505, | |
| "learning_rate": 0.00015972859988734817, | |
| "loss": 0.86, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.021809181910631362, | |
| "grad_norm": 0.6226520538330078, | |
| "learning_rate": 0.00015955647317730727, | |
| "loss": 1.0432, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.021858191308183342, | |
| "grad_norm": 0.5906574130058289, | |
| "learning_rate": 0.000159384072591692, | |
| "loss": 0.9998, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.021907200705735326, | |
| "grad_norm": 0.5114994049072266, | |
| "learning_rate": 0.00015921139892330138, | |
| "loss": 0.971, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.021956210103287306, | |
| "grad_norm": 0.6080278158187866, | |
| "learning_rate": 0.00015903845296619013, | |
| "loss": 0.829, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.022005219500839286, | |
| "grad_norm": 0.8234065175056458, | |
| "learning_rate": 0.0001588652355156651, | |
| "loss": 0.9837, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.022054228898391266, | |
| "grad_norm": 0.6270102262496948, | |
| "learning_rate": 0.00015869174736828168, | |
| "loss": 0.9846, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.022103238295943246, | |
| "grad_norm": 0.9178394675254822, | |
| "learning_rate": 0.0001585179893218401, | |
| "loss": 0.8804, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.022152247693495226, | |
| "grad_norm": 0.7372391223907471, | |
| "learning_rate": 0.0001583439621753817, | |
| "loss": 1.0138, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.02220125709104721, | |
| "grad_norm": 0.620411217212677, | |
| "learning_rate": 0.00015816966672918529, | |
| "loss": 1.0152, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.02225026648859919, | |
| "grad_norm": 0.5102325677871704, | |
| "learning_rate": 0.00015799510378476347, | |
| "loss": 0.8829, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.02229927588615117, | |
| "grad_norm": 0.5099406242370605, | |
| "learning_rate": 0.00015782027414485905, | |
| "loss": 0.9327, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.02234828528370315, | |
| "grad_norm": 0.5905872583389282, | |
| "learning_rate": 0.00015764517861344116, | |
| "loss": 0.9433, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.02239729468125513, | |
| "grad_norm": 0.6617975831031799, | |
| "learning_rate": 0.0001574698179957017, | |
| "loss": 1.0295, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.02244630407880711, | |
| "grad_norm": 0.5137200355529785, | |
| "learning_rate": 0.0001572941930980516, | |
| "loss": 0.9562, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.02249531347635909, | |
| "grad_norm": 0.5464679598808289, | |
| "learning_rate": 0.00015711830472811702, | |
| "loss": 0.9995, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.022544322873911074, | |
| "grad_norm": 0.574379026889801, | |
| "learning_rate": 0.00015694215369473584, | |
| "loss": 1.0157, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.022593332271463054, | |
| "grad_norm": 0.5301153063774109, | |
| "learning_rate": 0.00015676574080795378, | |
| "loss": 0.9596, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.022642341669015034, | |
| "grad_norm": 0.6540731191635132, | |
| "learning_rate": 0.0001565890668790207, | |
| "loss": 0.9636, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.022691351066567014, | |
| "grad_norm": 0.9363406300544739, | |
| "learning_rate": 0.00015641213272038682, | |
| "loss": 1.1141, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.022740360464118994, | |
| "grad_norm": 0.6103693842887878, | |
| "learning_rate": 0.0001562349391456992, | |
| "loss": 0.8886, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.022789369861670974, | |
| "grad_norm": 0.6556861996650696, | |
| "learning_rate": 0.0001560574869697977, | |
| "loss": 1.0414, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.022838379259222955, | |
| "grad_norm": 0.5614734292030334, | |
| "learning_rate": 0.0001558797770087115, | |
| "loss": 0.8673, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.022887388656774938, | |
| "grad_norm": 0.6785566210746765, | |
| "learning_rate": 0.00015570181007965514, | |
| "loss": 0.9473, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.022936398054326918, | |
| "grad_norm": 0.5315887928009033, | |
| "learning_rate": 0.0001555235870010249, | |
| "loss": 0.9594, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.022985407451878898, | |
| "grad_norm": 0.567318320274353, | |
| "learning_rate": 0.00015534510859239493, | |
| "loss": 1.157, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.02303441684943088, | |
| "grad_norm": 0.5194117426872253, | |
| "learning_rate": 0.00015516637567451356, | |
| "loss": 0.9412, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.02308342624698286, | |
| "grad_norm": 0.6578914523124695, | |
| "learning_rate": 0.0001549873890692996, | |
| "loss": 1.1002, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.02313243564453484, | |
| "grad_norm": 0.7504667639732361, | |
| "learning_rate": 0.0001548081495998383, | |
| "loss": 0.9016, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.02318144504208682, | |
| "grad_norm": 0.6988145112991333, | |
| "learning_rate": 0.00015462865809037784, | |
| "loss": 1.0586, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.023230454439638802, | |
| "grad_norm": 0.6438858509063721, | |
| "learning_rate": 0.0001544489153663254, | |
| "loss": 0.8252, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.023279463837190782, | |
| "grad_norm": 0.5463119149208069, | |
| "learning_rate": 0.00015426892225424337, | |
| "loss": 0.9599, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.023328473234742762, | |
| "grad_norm": 0.6298267245292664, | |
| "learning_rate": 0.00015408867958184556, | |
| "loss": 0.9993, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.023377482632294742, | |
| "grad_norm": 0.5038778185844421, | |
| "learning_rate": 0.0001539081881779935, | |
| "loss": 0.9883, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.023426492029846722, | |
| "grad_norm": 0.7516053318977356, | |
| "learning_rate": 0.00015372744887269242, | |
| "loss": 1.0561, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.023475501427398703, | |
| "grad_norm": 0.5092015862464905, | |
| "learning_rate": 0.00015354646249708757, | |
| "loss": 0.9311, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.023524510824950683, | |
| "grad_norm": 0.6461193561553955, | |
| "learning_rate": 0.00015336522988346047, | |
| "loss": 0.9045, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.023573520222502666, | |
| "grad_norm": 0.8320329785346985, | |
| "learning_rate": 0.00015318375186522485, | |
| "loss": 1.0344, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.023622529620054646, | |
| "grad_norm": 0.5258587002754211, | |
| "learning_rate": 0.00015300202927692302, | |
| "loss": 0.9523, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.023671539017606626, | |
| "grad_norm": 0.7670847773551941, | |
| "learning_rate": 0.00015282006295422199, | |
| "loss": 1.005, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.023720548415158606, | |
| "grad_norm": 0.5239679217338562, | |
| "learning_rate": 0.00015263785373390956, | |
| "loss": 0.9929, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.023769557812710587, | |
| "grad_norm": 0.5952014923095703, | |
| "learning_rate": 0.00015245540245389052, | |
| "loss": 1.1896, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.023818567210262567, | |
| "grad_norm": 0.7766487002372742, | |
| "learning_rate": 0.0001522727099531828, | |
| "loss": 0.8564, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.02386757660781455, | |
| "grad_norm": 0.7068402171134949, | |
| "learning_rate": 0.00015208977707191368, | |
| "loss": 1.0832, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.02391658600536653, | |
| "grad_norm": 0.5393612384796143, | |
| "learning_rate": 0.0001519066046513157, | |
| "loss": 0.8455, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.02396559540291851, | |
| "grad_norm": 0.6815643906593323, | |
| "learning_rate": 0.00015172319353372302, | |
| "loss": 0.8441, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.02401460480047049, | |
| "grad_norm": 0.7284293174743652, | |
| "learning_rate": 0.00015153954456256753, | |
| "loss": 0.9822, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.02406361419802247, | |
| "grad_norm": 0.547886312007904, | |
| "learning_rate": 0.00015135565858237482, | |
| "loss": 0.8793, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.02411262359557445, | |
| "grad_norm": 0.5711491703987122, | |
| "learning_rate": 0.00015117153643876038, | |
| "loss": 0.7461, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.02416163299312643, | |
| "grad_norm": 0.5130283236503601, | |
| "learning_rate": 0.00015098717897842585, | |
| "loss": 0.8651, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.024210642390678414, | |
| "grad_norm": 0.7330240607261658, | |
| "learning_rate": 0.00015080258704915477, | |
| "loss": 1.1199, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.024259651788230394, | |
| "grad_norm": 0.6397481560707092, | |
| "learning_rate": 0.00015061776149980914, | |
| "loss": 0.8711, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.024308661185782374, | |
| "grad_norm": 0.5871744751930237, | |
| "learning_rate": 0.00015043270318032512, | |
| "loss": 1.0653, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.024357670583334354, | |
| "grad_norm": 0.560553789138794, | |
| "learning_rate": 0.0001502474129417094, | |
| "loss": 0.8683, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.024406679980886335, | |
| "grad_norm": 0.6320847272872925, | |
| "learning_rate": 0.00015006189163603502, | |
| "loss": 0.8956, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.024455689378438315, | |
| "grad_norm": 0.7588667869567871, | |
| "learning_rate": 0.00014987614011643775, | |
| "loss": 0.9022, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.024504698775990295, | |
| "grad_norm": 0.6102482676506042, | |
| "learning_rate": 0.00014969015923711195, | |
| "loss": 0.9273, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.024553708173542278, | |
| "grad_norm": 0.8161607384681702, | |
| "learning_rate": 0.00014950394985330676, | |
| "loss": 0.892, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.02460271757109426, | |
| "grad_norm": 0.6331537961959839, | |
| "learning_rate": 0.000149317512821322, | |
| "loss": 1.0033, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.02465172696864624, | |
| "grad_norm": 0.5715288519859314, | |
| "learning_rate": 0.00014913084899850448, | |
| "loss": 0.8952, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.02470073636619822, | |
| "grad_norm": 0.6018710136413574, | |
| "learning_rate": 0.00014894395924324388, | |
| "loss": 0.8296, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.0247497457637502, | |
| "grad_norm": 0.4526127576828003, | |
| "learning_rate": 0.00014875684441496883, | |
| "loss": 0.9113, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.02479875516130218, | |
| "grad_norm": 0.7399595975875854, | |
| "learning_rate": 0.00014856950537414299, | |
| "loss": 1.0297, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.02484776455885416, | |
| "grad_norm": 0.6361096501350403, | |
| "learning_rate": 0.00014838194298226104, | |
| "loss": 1.0017, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.024896773956406142, | |
| "grad_norm": 0.5069610476493835, | |
| "learning_rate": 0.00014819415810184491, | |
| "loss": 0.8191, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.024945783353958122, | |
| "grad_norm": 0.6111281514167786, | |
| "learning_rate": 0.00014800615159643945, | |
| "loss": 1.1032, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.024994792751510102, | |
| "grad_norm": 0.6328058242797852, | |
| "learning_rate": 0.00014781792433060884, | |
| "loss": 0.9169, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.025043802149062083, | |
| "grad_norm": 0.5302674174308777, | |
| "learning_rate": 0.00014762947716993237, | |
| "loss": 0.9313, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.025092811546614063, | |
| "grad_norm": 0.8632691502571106, | |
| "learning_rate": 0.00014744081098100052, | |
| "loss": 0.8959, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.025141820944166043, | |
| "grad_norm": 0.5266912579536438, | |
| "learning_rate": 0.00014725192663141108, | |
| "loss": 0.971, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.025190830341718023, | |
| "grad_norm": 0.6190224885940552, | |
| "learning_rate": 0.00014706282498976495, | |
| "loss": 1.0125, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.025239839739270006, | |
| "grad_norm": 0.56204754114151, | |
| "learning_rate": 0.00014687350692566236, | |
| "loss": 0.9021, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.025288849136821986, | |
| "grad_norm": 0.5350925326347351, | |
| "learning_rate": 0.0001466839733096987, | |
| "loss": 0.9583, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.025337858534373967, | |
| "grad_norm": 0.5631693005561829, | |
| "learning_rate": 0.0001464942250134607, | |
| "loss": 0.8102, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.025386867931925947, | |
| "grad_norm": 0.5800350308418274, | |
| "learning_rate": 0.00014630426290952218, | |
| "loss": 1.0154, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.025435877329477927, | |
| "grad_norm": 0.9159958362579346, | |
| "learning_rate": 0.0001461140878714403, | |
| "loss": 0.8732, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.025484886727029907, | |
| "grad_norm": 0.5498703122138977, | |
| "learning_rate": 0.00014592370077375132, | |
| "loss": 0.8598, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.025533896124581887, | |
| "grad_norm": 0.6629413962364197, | |
| "learning_rate": 0.00014573310249196679, | |
| "loss": 0.9835, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.02558290552213387, | |
| "grad_norm": 0.795328676700592, | |
| "learning_rate": 0.00014554229390256924, | |
| "loss": 0.9006, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.02563191491968585, | |
| "grad_norm": 0.5357116460800171, | |
| "learning_rate": 0.00014535127588300847, | |
| "loss": 1.0219, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.02568092431723783, | |
| "grad_norm": 0.6141706705093384, | |
| "learning_rate": 0.00014516004931169728, | |
| "loss": 0.9475, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.02572993371478981, | |
| "grad_norm": 0.6289846897125244, | |
| "learning_rate": 0.00014496861506800758, | |
| "loss": 1.2036, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.02577894311234179, | |
| "grad_norm": 0.639453113079071, | |
| "learning_rate": 0.0001447769740322662, | |
| "loss": 0.994, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.02582795250989377, | |
| "grad_norm": 0.6463608145713806, | |
| "learning_rate": 0.00014458512708575094, | |
| "loss": 1.0525, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.025876961907445754, | |
| "grad_norm": 1.537865400314331, | |
| "learning_rate": 0.00014439307511068656, | |
| "loss": 1.5053, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.025925971304997734, | |
| "grad_norm": 0.6124917268753052, | |
| "learning_rate": 0.00014420081899024057, | |
| "loss": 0.8965, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.025974980702549715, | |
| "grad_norm": 0.5339726209640503, | |
| "learning_rate": 0.00014400835960851936, | |
| "loss": 0.9666, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.026023990100101695, | |
| "grad_norm": 0.5528753399848938, | |
| "learning_rate": 0.00014381569785056395, | |
| "loss": 0.8453, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.026072999497653675, | |
| "grad_norm": 0.506305456161499, | |
| "learning_rate": 0.00014362283460234604, | |
| "loss": 0.9733, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.026122008895205655, | |
| "grad_norm": 0.6728483438491821, | |
| "learning_rate": 0.0001434297707507639, | |
| "loss": 1.0848, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.026171018292757635, | |
| "grad_norm": 0.7605665326118469, | |
| "learning_rate": 0.0001432365071836383, | |
| "loss": 0.8888, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.02622002769030962, | |
| "grad_norm": 0.5308957099914551, | |
| "learning_rate": 0.00014304304478970838, | |
| "loss": 0.9177, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.0262690370878616, | |
| "grad_norm": 0.6177141666412354, | |
| "learning_rate": 0.00014284938445862768, | |
| "loss": 0.7726, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.02631804648541358, | |
| "grad_norm": 1.5204870700836182, | |
| "learning_rate": 0.00014265552708095987, | |
| "loss": 0.9485, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.02636705588296556, | |
| "grad_norm": 0.7078920602798462, | |
| "learning_rate": 0.00014246147354817485, | |
| "loss": 1.0088, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.02641606528051754, | |
| "grad_norm": 0.6084794402122498, | |
| "learning_rate": 0.00014226722475264449, | |
| "loss": 0.9549, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.02646507467806952, | |
| "grad_norm": 0.6052922010421753, | |
| "learning_rate": 0.0001420727815876386, | |
| "loss": 1.0489, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.0265140840756215, | |
| "grad_norm": 0.6130591034889221, | |
| "learning_rate": 0.00014187814494732087, | |
| "loss": 1.0125, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.026563093473173482, | |
| "grad_norm": 0.5894243121147156, | |
| "learning_rate": 0.00014168331572674464, | |
| "loss": 0.9286, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.026612102870725463, | |
| "grad_norm": 0.5302923917770386, | |
| "learning_rate": 0.00014148829482184887, | |
| "loss": 1.0314, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.026661112268277443, | |
| "grad_norm": 0.7226726412773132, | |
| "learning_rate": 0.000141293083129454, | |
| "loss": 1.0828, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.026710121665829423, | |
| "grad_norm": 0.6821955442428589, | |
| "learning_rate": 0.00014109768154725783, | |
| "loss": 0.7577, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.026759131063381403, | |
| "grad_norm": 0.5746994018554688, | |
| "learning_rate": 0.00014090209097383135, | |
| "loss": 0.9804, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.026808140460933383, | |
| "grad_norm": 0.7999975085258484, | |
| "learning_rate": 0.0001407063123086147, | |
| "loss": 1.2291, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.026857149858485363, | |
| "grad_norm": 0.5952467918395996, | |
| "learning_rate": 0.00014051034645191294, | |
| "loss": 0.9799, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.026906159256037347, | |
| "grad_norm": 0.5163621306419373, | |
| "learning_rate": 0.00014031419430489192, | |
| "loss": 0.964, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.026955168653589327, | |
| "grad_norm": 0.5633589029312134, | |
| "learning_rate": 0.00014011785676957422, | |
| "loss": 1.1302, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.027004178051141307, | |
| "grad_norm": 0.9280727505683899, | |
| "learning_rate": 0.0001399213347488349, | |
| "loss": 1.1204, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.027053187448693287, | |
| "grad_norm": 0.47326645255088806, | |
| "learning_rate": 0.00013972462914639745, | |
| "loss": 0.9462, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.027102196846245267, | |
| "grad_norm": 0.646728515625, | |
| "learning_rate": 0.00013952774086682944, | |
| "loss": 1.098, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.027151206243797247, | |
| "grad_norm": 0.660001277923584, | |
| "learning_rate": 0.00013933067081553868, | |
| "loss": 0.905, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.027200215641349227, | |
| "grad_norm": 0.565215528011322, | |
| "learning_rate": 0.00013913341989876875, | |
| "loss": 0.8937, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.02724922503890121, | |
| "grad_norm": 0.5420377254486084, | |
| "learning_rate": 0.00013893598902359498, | |
| "loss": 0.8095, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.02729823443645319, | |
| "grad_norm": 0.5699804425239563, | |
| "learning_rate": 0.0001387383790979203, | |
| "loss": 0.8931, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.02734724383400517, | |
| "grad_norm": 0.6203681230545044, | |
| "learning_rate": 0.00013854059103047094, | |
| "loss": 0.9657, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.02739625323155715, | |
| "grad_norm": 0.5285091996192932, | |
| "learning_rate": 0.00013834262573079241, | |
| "loss": 0.9815, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.02744526262910913, | |
| "grad_norm": 0.5105606913566589, | |
| "learning_rate": 0.0001381444841092452, | |
| "loss": 0.7874, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.02749427202666111, | |
| "grad_norm": 0.5473595261573792, | |
| "learning_rate": 0.00013794616707700057, | |
| "loss": 0.8856, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.027543281424213095, | |
| "grad_norm": 0.5748807191848755, | |
| "learning_rate": 0.00013774767554603659, | |
| "loss": 0.9379, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.027592290821765075, | |
| "grad_norm": 0.6852862238883972, | |
| "learning_rate": 0.00013754901042913357, | |
| "loss": 0.9867, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.027641300219317055, | |
| "grad_norm": 0.5716254711151123, | |
| "learning_rate": 0.0001373501726398702, | |
| "loss": 1.0988, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.027690309616869035, | |
| "grad_norm": 0.5285595059394836, | |
| "learning_rate": 0.0001371511630926192, | |
| "loss": 0.8952, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.027739319014421015, | |
| "grad_norm": 0.6386389136314392, | |
| "learning_rate": 0.00013695198270254316, | |
| "loss": 0.9052, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.027788328411972995, | |
| "grad_norm": 0.6316874623298645, | |
| "learning_rate": 0.0001367526323855902, | |
| "loss": 0.8897, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.027837337809524975, | |
| "grad_norm": 0.5656607151031494, | |
| "learning_rate": 0.00013655311305848996, | |
| "loss": 1.0086, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.02788634720707696, | |
| "grad_norm": 0.5613694190979004, | |
| "learning_rate": 0.00013635342563874926, | |
| "loss": 1.0209, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.02793535660462894, | |
| "grad_norm": 0.53194659948349, | |
| "learning_rate": 0.00013615357104464795, | |
| "loss": 1.1905, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.02798436600218092, | |
| "grad_norm": 0.7656781077384949, | |
| "learning_rate": 0.00013595355019523452, | |
| "loss": 0.8432, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.0280333753997329, | |
| "grad_norm": 0.8043819665908813, | |
| "learning_rate": 0.00013575336401032214, | |
| "loss": 1.0537, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.02808238479728488, | |
| "grad_norm": 0.543787956237793, | |
| "learning_rate": 0.00013555301341048424, | |
| "loss": 1.0334, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.02813139419483686, | |
| "grad_norm": 0.5572656989097595, | |
| "learning_rate": 0.00013535249931705029, | |
| "loss": 1.0404, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.02818040359238884, | |
| "grad_norm": 0.6637741923332214, | |
| "learning_rate": 0.00013515182265210165, | |
| "loss": 0.9432, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.028229412989940823, | |
| "grad_norm": 0.5407254695892334, | |
| "learning_rate": 0.00013495098433846724, | |
| "loss": 0.8214, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.028278422387492803, | |
| "grad_norm": 0.7442420125007629, | |
| "learning_rate": 0.00013474998529971934, | |
| "loss": 0.8009, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.028327431785044783, | |
| "grad_norm": 0.8374704122543335, | |
| "learning_rate": 0.00013454882646016938, | |
| "loss": 0.8955, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.028376441182596763, | |
| "grad_norm": 0.54608553647995, | |
| "learning_rate": 0.0001343475087448636, | |
| "loss": 0.8587, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.028425450580148743, | |
| "grad_norm": 0.6886517405509949, | |
| "learning_rate": 0.0001341460330795789, | |
| "loss": 1.0729, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.028474459977700723, | |
| "grad_norm": 0.9504325985908508, | |
| "learning_rate": 0.00013394440039081847, | |
| "loss": 0.9301, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.028523469375252703, | |
| "grad_norm": 0.5252017974853516, | |
| "learning_rate": 0.00013374261160580754, | |
| "loss": 1.0221, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.028572478772804687, | |
| "grad_norm": 0.7967292070388794, | |
| "learning_rate": 0.00013354066765248934, | |
| "loss": 0.9371, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.028621488170356667, | |
| "grad_norm": 0.6831346750259399, | |
| "learning_rate": 0.0001333385694595205, | |
| "loss": 1.0537, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.028670497567908647, | |
| "grad_norm": 0.5146148204803467, | |
| "learning_rate": 0.0001331363179562669, | |
| "loss": 0.9417, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.028719506965460627, | |
| "grad_norm": 0.49967578053474426, | |
| "learning_rate": 0.0001329339140727996, | |
| "loss": 0.9316, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.028768516363012607, | |
| "grad_norm": 0.6498879790306091, | |
| "learning_rate": 0.00013273135873989028, | |
| "loss": 0.8361, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.028817525760564587, | |
| "grad_norm": 0.49114206433296204, | |
| "learning_rate": 0.00013252865288900707, | |
| "loss": 0.8651, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.028866535158116567, | |
| "grad_norm": 0.5896993279457092, | |
| "learning_rate": 0.00013232579745231035, | |
| "loss": 0.8044, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.02891554455566855, | |
| "grad_norm": 0.5868169069290161, | |
| "learning_rate": 0.00013212279336264823, | |
| "loss": 0.85, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.02896455395322053, | |
| "grad_norm": 0.5865799188613892, | |
| "learning_rate": 0.00013191964155355264, | |
| "loss": 1.1317, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.02901356335077251, | |
| "grad_norm": 0.5267806053161621, | |
| "learning_rate": 0.00013171634295923456, | |
| "loss": 1.0084, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.02906257274832449, | |
| "grad_norm": 0.5632867813110352, | |
| "learning_rate": 0.00013151289851458015, | |
| "loss": 0.9592, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.02911158214587647, | |
| "grad_norm": 0.5206688642501831, | |
| "learning_rate": 0.00013130930915514623, | |
| "loss": 0.9212, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.02916059154342845, | |
| "grad_norm": 0.6168157458305359, | |
| "learning_rate": 0.000131105575817156, | |
| "loss": 1.0057, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.029209600940980435, | |
| "grad_norm": 0.5929550528526306, | |
| "learning_rate": 0.00013090169943749476, | |
| "loss": 0.9311, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.029258610338532415, | |
| "grad_norm": 0.6218149065971375, | |
| "learning_rate": 0.00013069768095370563, | |
| "loss": 0.8691, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.029307619736084395, | |
| "grad_norm": 0.7326745390892029, | |
| "learning_rate": 0.00013049352130398517, | |
| "loss": 0.9513, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.029356629133636375, | |
| "grad_norm": 0.5618883371353149, | |
| "learning_rate": 0.00013028922142717918, | |
| "loss": 1.0182, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.029405638531188355, | |
| "grad_norm": 1.0137871503829956, | |
| "learning_rate": 0.00013008478226277816, | |
| "loss": 1.0928, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.029454647928740335, | |
| "grad_norm": 0.5762143135070801, | |
| "learning_rate": 0.00012988020475091327, | |
| "loss": 0.7268, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.029503657326292315, | |
| "grad_norm": 0.6828392148017883, | |
| "learning_rate": 0.00012967548983235187, | |
| "loss": 1.0076, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.0295526667238443, | |
| "grad_norm": 0.6265244483947754, | |
| "learning_rate": 0.00012947063844849307, | |
| "loss": 0.8805, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.02960167612139628, | |
| "grad_norm": 0.7893605828285217, | |
| "learning_rate": 0.00012926565154136368, | |
| "loss": 1.067, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.02965068551894826, | |
| "grad_norm": 0.6898416876792908, | |
| "learning_rate": 0.00012906053005361365, | |
| "loss": 1.187, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.02969969491650024, | |
| "grad_norm": 0.6458303332328796, | |
| "learning_rate": 0.0001288552749285118, | |
| "loss": 0.9442, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.02974870431405222, | |
| "grad_norm": 0.6307141184806824, | |
| "learning_rate": 0.0001286498871099415, | |
| "loss": 1.1363, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.0297977137116042, | |
| "grad_norm": 0.6049492955207825, | |
| "learning_rate": 0.00012844436754239636, | |
| "loss": 0.8748, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.02984672310915618, | |
| "grad_norm": 0.6229107975959778, | |
| "learning_rate": 0.0001282387171709758, | |
| "loss": 0.8046, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.029895732506708163, | |
| "grad_norm": 0.5819247364997864, | |
| "learning_rate": 0.00012803293694138077, | |
| "loss": 1.0562, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.029944741904260143, | |
| "grad_norm": 0.6565641164779663, | |
| "learning_rate": 0.00012782702779990944, | |
| "loss": 1.1314, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.029993751301812123, | |
| "grad_norm": 0.6735644340515137, | |
| "learning_rate": 0.00012762099069345268, | |
| "loss": 0.9843, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.030042760699364103, | |
| "grad_norm": 0.5780985355377197, | |
| "learning_rate": 0.00012741482656948992, | |
| "loss": 0.8717, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.030091770096916083, | |
| "grad_norm": 0.5610291957855225, | |
| "learning_rate": 0.0001272085363760846, | |
| "loss": 0.8648, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.030140779494468063, | |
| "grad_norm": 0.7192383408546448, | |
| "learning_rate": 0.00012700212106188009, | |
| "loss": 0.8972, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.030189788892020043, | |
| "grad_norm": 0.49262872338294983, | |
| "learning_rate": 0.00012679558157609479, | |
| "loss": 0.8287, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.030238798289572027, | |
| "grad_norm": 0.6038398146629333, | |
| "learning_rate": 0.00012658891886851848, | |
| "loss": 0.9688, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.030287807687124007, | |
| "grad_norm": 0.7166770100593567, | |
| "learning_rate": 0.0001263821338895074, | |
| "loss": 1.2706, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.030336817084675987, | |
| "grad_norm": 0.6760246753692627, | |
| "learning_rate": 0.00012617522758998006, | |
| "loss": 0.7879, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.030385826482227967, | |
| "grad_norm": 0.5924021005630493, | |
| "learning_rate": 0.00012596820092141295, | |
| "loss": 0.9037, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.030434835879779947, | |
| "grad_norm": 0.8994151949882507, | |
| "learning_rate": 0.00012576105483583602, | |
| "loss": 1.0, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.030483845277331927, | |
| "grad_norm": 0.6243171095848083, | |
| "learning_rate": 0.00012555379028582838, | |
| "loss": 1.1253, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.030532854674883907, | |
| "grad_norm": 0.5963743925094604, | |
| "learning_rate": 0.00012534640822451398, | |
| "loss": 0.9301, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.03058186407243589, | |
| "grad_norm": 0.8039215803146362, | |
| "learning_rate": 0.00012513890960555706, | |
| "loss": 0.9926, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.03063087346998787, | |
| "grad_norm": 0.5766738057136536, | |
| "learning_rate": 0.00012493129538315788, | |
| "loss": 0.8439, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.03067988286753985, | |
| "grad_norm": 0.4972213804721832, | |
| "learning_rate": 0.0001247235665120484, | |
| "loss": 0.7884, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.03072889226509183, | |
| "grad_norm": 0.6013949513435364, | |
| "learning_rate": 0.00012451572394748766, | |
| "loss": 1.0443, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.03077790166264381, | |
| "grad_norm": 0.6317854523658752, | |
| "learning_rate": 0.0001243077686452577, | |
| "loss": 0.8208, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.03082691106019579, | |
| "grad_norm": 0.5569573044776917, | |
| "learning_rate": 0.00012409970156165878, | |
| "loss": 0.8572, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.030875920457747775, | |
| "grad_norm": 0.5597726702690125, | |
| "learning_rate": 0.0001238915236535054, | |
| "loss": 1.1361, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.030924929855299755, | |
| "grad_norm": 0.5146288871765137, | |
| "learning_rate": 0.00012368323587812162, | |
| "loss": 0.9265, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.030973939252851735, | |
| "grad_norm": 0.6121995449066162, | |
| "learning_rate": 0.00012347483919333664, | |
| "loss": 0.9335, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.031022948650403715, | |
| "grad_norm": 0.5894641280174255, | |
| "learning_rate": 0.00012326633455748065, | |
| "loss": 0.8862, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.031071958047955695, | |
| "grad_norm": 0.6794971823692322, | |
| "learning_rate": 0.00012305772292938016, | |
| "loss": 1.0275, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.031120967445507675, | |
| "grad_norm": 0.5644852519035339, | |
| "learning_rate": 0.0001228490052683537, | |
| "loss": 0.9269, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.031169976843059655, | |
| "grad_norm": 0.6075661778450012, | |
| "learning_rate": 0.00012264018253420748, | |
| "loss": 1.1916, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.03121898624061164, | |
| "grad_norm": 0.7460759878158569, | |
| "learning_rate": 0.00012243125568723077, | |
| "loss": 1.1546, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.03126799563816362, | |
| "grad_norm": 0.6334986090660095, | |
| "learning_rate": 0.00012222222568819172, | |
| "loss": 0.9377, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.0313170050357156, | |
| "grad_norm": 0.6031709909439087, | |
| "learning_rate": 0.00012201309349833279, | |
| "loss": 0.9242, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.03136601443326758, | |
| "grad_norm": 0.6870954632759094, | |
| "learning_rate": 0.00012180386007936637, | |
| "loss": 1.0381, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.03141502383081956, | |
| "grad_norm": 0.5240617394447327, | |
| "learning_rate": 0.0001215945263934704, | |
| "loss": 1.0243, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.03146403322837154, | |
| "grad_norm": 0.5612547397613525, | |
| "learning_rate": 0.00012138509340328381, | |
| "loss": 0.7303, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.03151304262592352, | |
| "grad_norm": 0.7289676070213318, | |
| "learning_rate": 0.0001211755620719023, | |
| "loss": 0.8771, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.0315620520234755, | |
| "grad_norm": 0.5122924447059631, | |
| "learning_rate": 0.00012096593336287376, | |
| "loss": 0.8387, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.03161106142102748, | |
| "grad_norm": 0.6075708866119385, | |
| "learning_rate": 0.00012075620824019384, | |
| "loss": 0.9293, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.03166007081857946, | |
| "grad_norm": 0.6391776204109192, | |
| "learning_rate": 0.00012054638766830162, | |
| "loss": 1.0081, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.03170908021613145, | |
| "grad_norm": 0.6238724589347839, | |
| "learning_rate": 0.00012033647261207505, | |
| "loss": 0.9146, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.03175808961368343, | |
| "grad_norm": 0.9260525703430176, | |
| "learning_rate": 0.00012012646403682663, | |
| "loss": 0.7296, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.03180709901123541, | |
| "grad_norm": 0.6483685374259949, | |
| "learning_rate": 0.00011991636290829893, | |
| "loss": 1.0605, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.03185610840878739, | |
| "grad_norm": 0.6315485835075378, | |
| "learning_rate": 0.00011970617019266, | |
| "loss": 0.8309, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.03190511780633937, | |
| "grad_norm": 0.5987114310264587, | |
| "learning_rate": 0.00011949588685649922, | |
| "loss": 1.0255, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.03195412720389135, | |
| "grad_norm": 0.6101971864700317, | |
| "learning_rate": 0.00011928551386682262, | |
| "loss": 0.9697, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.03200313660144333, | |
| "grad_norm": 0.6032626032829285, | |
| "learning_rate": 0.00011907505219104856, | |
| "loss": 0.8307, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.03205214599899531, | |
| "grad_norm": 0.8691760301589966, | |
| "learning_rate": 0.00011886450279700313, | |
| "loss": 1.0574, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.03210115539654729, | |
| "grad_norm": 0.5532340407371521, | |
| "learning_rate": 0.00011865386665291591, | |
| "loss": 0.9853, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.03215016479409927, | |
| "grad_norm": 0.5070647597312927, | |
| "learning_rate": 0.00011844314472741533, | |
| "loss": 0.9257, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.03219917419165125, | |
| "grad_norm": 0.5597984790802002, | |
| "learning_rate": 0.00011823233798952434, | |
| "loss": 0.8883, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.03224818358920323, | |
| "grad_norm": 0.601868212223053, | |
| "learning_rate": 0.00011802144740865589, | |
| "loss": 1.0045, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.03229719298675521, | |
| "grad_norm": 0.5818256139755249, | |
| "learning_rate": 0.00011781047395460847, | |
| "loss": 0.9819, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.03234620238430719, | |
| "grad_norm": 0.5207995772361755, | |
| "learning_rate": 0.00011759941859756172, | |
| "loss": 0.9152, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.032395211781859175, | |
| "grad_norm": 0.5202385187149048, | |
| "learning_rate": 0.00011738828230807184, | |
| "loss": 0.9291, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.032444221179411155, | |
| "grad_norm": 0.5271541476249695, | |
| "learning_rate": 0.00011717706605706735, | |
| "loss": 1.1041, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.032493230576963135, | |
| "grad_norm": 0.5379035472869873, | |
| "learning_rate": 0.00011696577081584426, | |
| "loss": 1.0, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.032542239974515115, | |
| "grad_norm": 0.625950276851654, | |
| "learning_rate": 0.00011675439755606203, | |
| "loss": 0.9559, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.032591249372067095, | |
| "grad_norm": 0.7411119937896729, | |
| "learning_rate": 0.0001165429472497388, | |
| "loss": 0.8572, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.032640258769619075, | |
| "grad_norm": 0.734190046787262, | |
| "learning_rate": 0.000116331420869247, | |
| "loss": 0.8517, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.032689268167171055, | |
| "grad_norm": 0.6342369318008423, | |
| "learning_rate": 0.00011611981938730892, | |
| "loss": 0.9284, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.032738277564723035, | |
| "grad_norm": 0.6163157820701599, | |
| "learning_rate": 0.00011590814377699224, | |
| "loss": 0.9593, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.032787286962275015, | |
| "grad_norm": 0.7204130291938782, | |
| "learning_rate": 0.00011569639501170545, | |
| "loss": 0.8955, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.032836296359826996, | |
| "grad_norm": 0.8771257400512695, | |
| "learning_rate": 0.00011548457406519356, | |
| "loss": 1.0293, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.032885305757378976, | |
| "grad_norm": 0.6680341362953186, | |
| "learning_rate": 0.00011527268191153337, | |
| "loss": 0.9114, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.032934315154930956, | |
| "grad_norm": 0.5671672821044922, | |
| "learning_rate": 0.0001150607195251293, | |
| "loss": 0.9915, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.032983324552482936, | |
| "grad_norm": 0.5537201762199402, | |
| "learning_rate": 0.00011484868788070855, | |
| "loss": 1.1166, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.033032333950034916, | |
| "grad_norm": 0.7294965386390686, | |
| "learning_rate": 0.00011463658795331695, | |
| "loss": 0.8928, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.0330813433475869, | |
| "grad_norm": 0.5197209715843201, | |
| "learning_rate": 0.00011442442071831434, | |
| "loss": 0.8729, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.03313035274513888, | |
| "grad_norm": 0.6862895488739014, | |
| "learning_rate": 0.00011421218715136996, | |
| "loss": 1.2211, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.03317936214269086, | |
| "grad_norm": 0.7276074886322021, | |
| "learning_rate": 0.00011399988822845822, | |
| "loss": 0.925, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.03322837154024284, | |
| "grad_norm": 0.6285182237625122, | |
| "learning_rate": 0.00011378752492585396, | |
| "loss": 0.8794, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.03327738093779482, | |
| "grad_norm": 0.7362743020057678, | |
| "learning_rate": 0.00011357509822012817, | |
| "loss": 0.9594, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.0333263903353468, | |
| "grad_norm": 0.7037959098815918, | |
| "learning_rate": 0.00011336260908814336, | |
| "loss": 0.748, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.03337539973289878, | |
| "grad_norm": 0.6326875686645508, | |
| "learning_rate": 0.00011315005850704907, | |
| "loss": 1.0679, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.03342440913045076, | |
| "grad_norm": 0.5508519411087036, | |
| "learning_rate": 0.0001129374474542775, | |
| "loss": 1.0165, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.033473418528002744, | |
| "grad_norm": 0.8182931542396545, | |
| "learning_rate": 0.00011272477690753893, | |
| "loss": 1.0418, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.033522427925554724, | |
| "grad_norm": 0.5952259302139282, | |
| "learning_rate": 0.00011251204784481712, | |
| "loss": 0.8814, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.033571437323106704, | |
| "grad_norm": 0.8138315677642822, | |
| "learning_rate": 0.00011229926124436505, | |
| "loss": 0.9702, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.033620446720658684, | |
| "grad_norm": 0.6177354454994202, | |
| "learning_rate": 0.00011208641808470024, | |
| "loss": 1.0031, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.033669456118210664, | |
| "grad_norm": 0.696311891078949, | |
| "learning_rate": 0.00011187351934460029, | |
| "loss": 1.0397, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.03371846551576265, | |
| "grad_norm": 0.706696093082428, | |
| "learning_rate": 0.0001116605660030984, | |
| "loss": 1.0239, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.03376747491331463, | |
| "grad_norm": 0.6652610898017883, | |
| "learning_rate": 0.00011144755903947886, | |
| "loss": 0.878, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.03381648431086661, | |
| "grad_norm": 0.7847949862480164, | |
| "learning_rate": 0.00011123449943327256, | |
| "loss": 0.879, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.03386549370841859, | |
| "grad_norm": 0.6994475722312927, | |
| "learning_rate": 0.00011102138816425244, | |
| "loss": 1.0035, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.03391450310597057, | |
| "grad_norm": 0.6607378721237183, | |
| "learning_rate": 0.00011080822621242905, | |
| "loss": 0.9448, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.03396351250352255, | |
| "grad_norm": 0.7712897062301636, | |
| "learning_rate": 0.00011059501455804602, | |
| "loss": 0.9159, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.03401252190107453, | |
| "grad_norm": 0.749193012714386, | |
| "learning_rate": 0.00011038175418157548, | |
| "loss": 1.0213, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.03406153129862651, | |
| "grad_norm": 0.5379979610443115, | |
| "learning_rate": 0.00011016844606371364, | |
| "loss": 0.8125, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.03411054069617849, | |
| "grad_norm": 0.6513563990592957, | |
| "learning_rate": 0.00010995509118537632, | |
| "loss": 0.9148, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.03415955009373047, | |
| "grad_norm": 0.6969139575958252, | |
| "learning_rate": 0.00010974169052769425, | |
| "loss": 0.9821, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.03420855949128245, | |
| "grad_norm": 0.6268694400787354, | |
| "learning_rate": 0.0001095282450720088, | |
| "loss": 1.1762, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.03425756888883443, | |
| "grad_norm": 0.6678013205528259, | |
| "learning_rate": 0.00010931475579986725, | |
| "loss": 0.7744, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.03430657828638641, | |
| "grad_norm": 0.6000432968139648, | |
| "learning_rate": 0.00010910122369301842, | |
| "loss": 0.8299, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.03435558768393839, | |
| "grad_norm": 0.8176240921020508, | |
| "learning_rate": 0.00010888764973340815, | |
| "loss": 1.0136, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.03440459708149038, | |
| "grad_norm": 0.5953860282897949, | |
| "learning_rate": 0.00010867403490317465, | |
| "loss": 0.8912, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.03445360647904236, | |
| "grad_norm": 0.677713930606842, | |
| "learning_rate": 0.00010846038018464413, | |
| "loss": 0.9466, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.03450261587659434, | |
| "grad_norm": 0.6491566896438599, | |
| "learning_rate": 0.00010824668656032624, | |
| "loss": 0.752, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.03455162527414632, | |
| "grad_norm": 0.46719634532928467, | |
| "learning_rate": 0.00010803295501290953, | |
| "loss": 0.9382, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.0346006346716983, | |
| "grad_norm": 0.7482130527496338, | |
| "learning_rate": 0.00010781918652525693, | |
| "loss": 1.1858, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.03464964406925028, | |
| "grad_norm": 0.5864776968955994, | |
| "learning_rate": 0.00010760538208040125, | |
| "loss": 0.9708, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.03469865346680226, | |
| "grad_norm": 0.5870556831359863, | |
| "learning_rate": 0.00010739154266154065, | |
| "loss": 0.9644, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.03474766286435424, | |
| "grad_norm": 0.7998099327087402, | |
| "learning_rate": 0.00010717766925203418, | |
| "loss": 0.8556, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.03479667226190622, | |
| "grad_norm": 0.6456443667411804, | |
| "learning_rate": 0.00010696376283539704, | |
| "loss": 0.9475, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.0348456816594582, | |
| "grad_norm": 0.7642804384231567, | |
| "learning_rate": 0.00010674982439529642, | |
| "loss": 0.818, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.03489469105701018, | |
| "grad_norm": 0.763278067111969, | |
| "learning_rate": 0.00010653585491554664, | |
| "loss": 0.9955, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.03494370045456216, | |
| "grad_norm": 0.5111615061759949, | |
| "learning_rate": 0.00010632185538010477, | |
| "loss": 0.8955, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.03499270985211414, | |
| "grad_norm": 0.8736951947212219, | |
| "learning_rate": 0.00010610782677306614, | |
| "loss": 1.1456, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.03504171924966612, | |
| "grad_norm": 0.5861103534698486, | |
| "learning_rate": 0.00010589377007865973, | |
| "loss": 0.8175, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.03509072864721811, | |
| "grad_norm": 0.5677356123924255, | |
| "learning_rate": 0.00010567968628124367, | |
| "loss": 1.0074, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.03513973804477009, | |
| "grad_norm": 0.7105158567428589, | |
| "learning_rate": 0.00010546557636530086, | |
| "loss": 0.8538, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.03518874744232207, | |
| "grad_norm": 0.6045786738395691, | |
| "learning_rate": 0.00010525144131543405, | |
| "loss": 0.9659, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.03523775683987405, | |
| "grad_norm": 0.6696767807006836, | |
| "learning_rate": 0.00010503728211636185, | |
| "loss": 0.9815, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.03528676623742603, | |
| "grad_norm": 0.7084499001502991, | |
| "learning_rate": 0.00010482309975291373, | |
| "loss": 1.1448, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.03533577563497801, | |
| "grad_norm": 0.6141902804374695, | |
| "learning_rate": 0.00010460889521002572, | |
| "loss": 0.9899, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.03538478503252999, | |
| "grad_norm": 0.6070237755775452, | |
| "learning_rate": 0.00010439466947273595, | |
| "loss": 0.9804, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.03543379443008197, | |
| "grad_norm": 0.5481370091438293, | |
| "learning_rate": 0.00010418042352617982, | |
| "loss": 0.8835, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.03548280382763395, | |
| "grad_norm": 0.679847776889801, | |
| "learning_rate": 0.0001039661583555859, | |
| "loss": 1.1018, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.03553181322518593, | |
| "grad_norm": 0.5827143788337708, | |
| "learning_rate": 0.00010375187494627098, | |
| "loss": 0.7655, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.03558082262273791, | |
| "grad_norm": 0.6592413187026978, | |
| "learning_rate": 0.00010353757428363579, | |
| "loss": 0.9435, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.03562983202028989, | |
| "grad_norm": 0.5104247331619263, | |
| "learning_rate": 0.0001033232573531604, | |
| "loss": 0.741, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.03567884141784187, | |
| "grad_norm": 0.6800944805145264, | |
| "learning_rate": 0.00010310892514039967, | |
| "loss": 1.0278, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.035727850815393855, | |
| "grad_norm": 0.6092975735664368, | |
| "learning_rate": 0.00010289457863097875, | |
| "loss": 0.9647, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.035776860212945835, | |
| "grad_norm": 0.7586796879768372, | |
| "learning_rate": 0.00010268021881058858, | |
| "loss": 1.0619, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.035825869610497815, | |
| "grad_norm": 0.7803993821144104, | |
| "learning_rate": 0.0001024658466649812, | |
| "loss": 0.9005, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.035874879008049795, | |
| "grad_norm": 0.6602010726928711, | |
| "learning_rate": 0.00010225146317996546, | |
| "loss": 0.8085, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.035923888405601775, | |
| "grad_norm": 0.8171247839927673, | |
| "learning_rate": 0.00010203706934140225, | |
| "loss": 0.9024, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.035972897803153756, | |
| "grad_norm": 0.6471207737922668, | |
| "learning_rate": 0.00010182266613520013, | |
| "loss": 0.9098, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.036021907200705736, | |
| "grad_norm": 0.8064071536064148, | |
| "learning_rate": 0.00010160825454731071, | |
| "loss": 0.8098, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.036070916598257716, | |
| "grad_norm": 0.5769966840744019, | |
| "learning_rate": 0.00010139383556372418, | |
| "loss": 0.8571, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.036119925995809696, | |
| "grad_norm": 0.6435970664024353, | |
| "learning_rate": 0.00010117941017046467, | |
| "loss": 0.8363, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.036168935393361676, | |
| "grad_norm": 0.9202609062194824, | |
| "learning_rate": 0.00010096497935358584, | |
| "loss": 1.1239, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.036168935393361676, | |
| "eval_loss": NaN, | |
| "eval_runtime": 183.5809, | |
| "eval_samples_per_second": 46.802, | |
| "eval_steps_per_second": 23.401, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.036217944790913656, | |
| "grad_norm": 0.7503764629364014, | |
| "learning_rate": 0.00010075054409916631, | |
| "loss": 1.0747, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.036266954188465636, | |
| "grad_norm": 0.8134047389030457, | |
| "learning_rate": 0.00010053610539330507, | |
| "loss": 0.9621, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.036315963586017616, | |
| "grad_norm": 0.5960626006126404, | |
| "learning_rate": 0.00010032166422211697, | |
| "loss": 0.9175, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.036364972983569596, | |
| "grad_norm": 0.6606927514076233, | |
| "learning_rate": 0.00010010722157172818, | |
| "loss": 1.087, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.03641398238112158, | |
| "grad_norm": 1.0997828245162964, | |
| "learning_rate": 9.989277842827183e-05, | |
| "loss": 1.2575, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.03646299177867356, | |
| "grad_norm": 0.6103606224060059, | |
| "learning_rate": 9.967833577788308e-05, | |
| "loss": 0.8763, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.03651200117622554, | |
| "grad_norm": 0.9485415816307068, | |
| "learning_rate": 9.946389460669496e-05, | |
| "loss": 1.1258, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.03656101057377752, | |
| "grad_norm": 0.5692161917686462, | |
| "learning_rate": 9.924945590083371e-05, | |
| "loss": 0.853, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.036610019971329504, | |
| "grad_norm": 0.5405739545822144, | |
| "learning_rate": 9.90350206464142e-05, | |
| "loss": 0.8583, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.036659029368881484, | |
| "grad_norm": 0.546116054058075, | |
| "learning_rate": 9.882058982953536e-05, | |
| "loss": 0.9951, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.036708038766433464, | |
| "grad_norm": 0.5827860832214355, | |
| "learning_rate": 9.860616443627586e-05, | |
| "loss": 1.0165, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.036757048163985444, | |
| "grad_norm": 0.5827939510345459, | |
| "learning_rate": 9.839174545268931e-05, | |
| "loss": 0.7871, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.036806057561537424, | |
| "grad_norm": 0.5125460028648376, | |
| "learning_rate": 9.817733386479987e-05, | |
| "loss": 1.0009, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.036855066959089404, | |
| "grad_norm": 0.6548879742622375, | |
| "learning_rate": 9.796293065859776e-05, | |
| "loss": 1.0134, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.036904076356641384, | |
| "grad_norm": 0.5788992643356323, | |
| "learning_rate": 9.774853682003456e-05, | |
| "loss": 1.0251, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.036953085754193364, | |
| "grad_norm": 0.5059471130371094, | |
| "learning_rate": 9.75341533350188e-05, | |
| "loss": 0.9197, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.037002095151745344, | |
| "grad_norm": 1.1150619983673096, | |
| "learning_rate": 9.731978118941142e-05, | |
| "loss": 0.9863, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.03705110454929733, | |
| "grad_norm": 0.8003139495849609, | |
| "learning_rate": 9.710542136902127e-05, | |
| "loss": 0.8551, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.03710011394684931, | |
| "grad_norm": 0.6236535310745239, | |
| "learning_rate": 9.689107485960038e-05, | |
| "loss": 0.8808, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.03714912334440129, | |
| "grad_norm": 0.7307648658752441, | |
| "learning_rate": 9.667674264683962e-05, | |
| "loss": 0.7793, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.03719813274195327, | |
| "grad_norm": 0.6053159236907959, | |
| "learning_rate": 9.646242571636423e-05, | |
| "loss": 0.8144, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.03724714213950525, | |
| "grad_norm": 0.731212854385376, | |
| "learning_rate": 9.624812505372907e-05, | |
| "loss": 1.0786, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.03729615153705723, | |
| "grad_norm": 0.8967050313949585, | |
| "learning_rate": 9.60338416444141e-05, | |
| "loss": 1.0071, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.03734516093460921, | |
| "grad_norm": 0.9089602828025818, | |
| "learning_rate": 9.581957647382019e-05, | |
| "loss": 1.0854, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.03739417033216119, | |
| "grad_norm": 0.6406798958778381, | |
| "learning_rate": 9.56053305272641e-05, | |
| "loss": 0.8949, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.03744317972971317, | |
| "grad_norm": 0.5702521204948425, | |
| "learning_rate": 9.539110478997429e-05, | |
| "loss": 0.9991, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.03749218912726515, | |
| "grad_norm": 0.6696991324424744, | |
| "learning_rate": 9.517690024708628e-05, | |
| "loss": 0.7774, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.03754119852481713, | |
| "grad_norm": 0.7377912998199463, | |
| "learning_rate": 9.496271788363819e-05, | |
| "loss": 1.0607, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.03759020792236911, | |
| "grad_norm": 0.651839554309845, | |
| "learning_rate": 9.474855868456593e-05, | |
| "loss": 0.8064, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.03763921731992109, | |
| "grad_norm": 0.6405967473983765, | |
| "learning_rate": 9.453442363469917e-05, | |
| "loss": 0.8916, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.03768822671747307, | |
| "grad_norm": 0.6190482974052429, | |
| "learning_rate": 9.432031371875634e-05, | |
| "loss": 0.8863, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.03773723611502506, | |
| "grad_norm": 0.7289479970932007, | |
| "learning_rate": 9.410622992134032e-05, | |
| "loss": 0.9322, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.03778624551257704, | |
| "grad_norm": 0.5301181077957153, | |
| "learning_rate": 9.389217322693388e-05, | |
| "loss": 0.8915, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.03783525491012902, | |
| "grad_norm": 0.5292813181877136, | |
| "learning_rate": 9.367814461989526e-05, | |
| "loss": 0.9377, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.037884264307681, | |
| "grad_norm": 0.5793006420135498, | |
| "learning_rate": 9.346414508445341e-05, | |
| "loss": 0.8897, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.03793327370523298, | |
| "grad_norm": 0.6730450987815857, | |
| "learning_rate": 9.325017560470358e-05, | |
| "loss": 0.8906, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.03798228310278496, | |
| "grad_norm": 0.7565414905548096, | |
| "learning_rate": 9.303623716460297e-05, | |
| "loss": 1.0982, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.03803129250033694, | |
| "grad_norm": 0.5824629664421082, | |
| "learning_rate": 9.282233074796587e-05, | |
| "loss": 0.7489, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.03808030189788892, | |
| "grad_norm": 0.6062259078025818, | |
| "learning_rate": 9.260845733845934e-05, | |
| "loss": 1.003, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.0381293112954409, | |
| "grad_norm": 0.6178932785987854, | |
| "learning_rate": 9.239461791959876e-05, | |
| "loss": 1.0034, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.03817832069299288, | |
| "grad_norm": 0.4914613962173462, | |
| "learning_rate": 9.218081347474311e-05, | |
| "loss": 0.9453, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.03822733009054486, | |
| "grad_norm": 0.5196013450622559, | |
| "learning_rate": 9.196704498709049e-05, | |
| "loss": 0.8815, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.03827633948809684, | |
| "grad_norm": 0.636272132396698, | |
| "learning_rate": 9.175331343967377e-05, | |
| "loss": 0.9161, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.03832534888564882, | |
| "grad_norm": 0.5790011286735535, | |
| "learning_rate": 9.153961981535589e-05, | |
| "loss": 0.882, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.0383743582832008, | |
| "grad_norm": 0.5262641310691833, | |
| "learning_rate": 9.13259650968254e-05, | |
| "loss": 0.8483, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.03842336768075279, | |
| "grad_norm": 0.9476785063743591, | |
| "learning_rate": 9.111235026659187e-05, | |
| "loss": 0.9465, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.03847237707830477, | |
| "grad_norm": 0.7593225836753845, | |
| "learning_rate": 9.089877630698158e-05, | |
| "loss": 0.9721, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.03852138647585675, | |
| "grad_norm": 0.7612167000770569, | |
| "learning_rate": 9.068524420013277e-05, | |
| "loss": 0.9404, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.03857039587340873, | |
| "grad_norm": 0.6220802664756775, | |
| "learning_rate": 9.047175492799121e-05, | |
| "loss": 0.8821, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.03861940527096071, | |
| "grad_norm": 0.5717998743057251, | |
| "learning_rate": 9.025830947230578e-05, | |
| "loss": 0.8925, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.03866841466851269, | |
| "grad_norm": 0.5665514469146729, | |
| "learning_rate": 9.00449088146237e-05, | |
| "loss": 0.902, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.03871742406606467, | |
| "grad_norm": 0.658800482749939, | |
| "learning_rate": 8.983155393628635e-05, | |
| "loss": 0.9597, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.03876643346361665, | |
| "grad_norm": 0.5453858375549316, | |
| "learning_rate": 8.961824581842454e-05, | |
| "loss": 0.8837, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.03881544286116863, | |
| "grad_norm": 0.6304888725280762, | |
| "learning_rate": 8.940498544195402e-05, | |
| "loss": 0.9236, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.03886445225872061, | |
| "grad_norm": 0.6249805688858032, | |
| "learning_rate": 8.919177378757096e-05, | |
| "loss": 0.8296, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.03891346165627259, | |
| "grad_norm": 0.5646822452545166, | |
| "learning_rate": 8.897861183574758e-05, | |
| "loss": 0.6453, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.03896247105382457, | |
| "grad_norm": 0.8026358485221863, | |
| "learning_rate": 8.876550056672747e-05, | |
| "loss": 0.925, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.03901148045137655, | |
| "grad_norm": 0.5161921381950378, | |
| "learning_rate": 8.855244096052114e-05, | |
| "loss": 0.9219, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.039060489848928535, | |
| "grad_norm": 0.6423723697662354, | |
| "learning_rate": 8.833943399690163e-05, | |
| "loss": 0.856, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.039109499246480516, | |
| "grad_norm": 0.5908942818641663, | |
| "learning_rate": 8.812648065539973e-05, | |
| "loss": 0.8297, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.039158508644032496, | |
| "grad_norm": 0.7309346199035645, | |
| "learning_rate": 8.791358191529978e-05, | |
| "loss": 1.057, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.039207518041584476, | |
| "grad_norm": 0.7637178301811218, | |
| "learning_rate": 8.770073875563493e-05, | |
| "loss": 1.0936, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.039256527439136456, | |
| "grad_norm": 0.6103758215904236, | |
| "learning_rate": 8.74879521551829e-05, | |
| "loss": 0.8872, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.039305536836688436, | |
| "grad_norm": 0.666135311126709, | |
| "learning_rate": 8.727522309246111e-05, | |
| "loss": 0.7733, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.039354546234240416, | |
| "grad_norm": 0.6191517114639282, | |
| "learning_rate": 8.70625525457225e-05, | |
| "loss": 0.9633, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.039403555631792396, | |
| "grad_norm": 0.6226015090942383, | |
| "learning_rate": 8.684994149295094e-05, | |
| "loss": 0.8724, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.039452565029344376, | |
| "grad_norm": 0.5788629055023193, | |
| "learning_rate": 8.663739091185668e-05, | |
| "loss": 0.9352, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.039501574426896356, | |
| "grad_norm": 0.9742415547370911, | |
| "learning_rate": 8.642490177987183e-05, | |
| "loss": 0.8849, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.039550583824448336, | |
| "grad_norm": 0.5254642367362976, | |
| "learning_rate": 8.621247507414605e-05, | |
| "loss": 0.7238, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.039599593222000316, | |
| "grad_norm": 0.7346206307411194, | |
| "learning_rate": 8.600011177154181e-05, | |
| "loss": 0.871, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.039648602619552296, | |
| "grad_norm": 0.6629201173782349, | |
| "learning_rate": 8.578781284863005e-05, | |
| "loss": 0.8112, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.039697612017104277, | |
| "grad_norm": 0.5637260675430298, | |
| "learning_rate": 8.557557928168568e-05, | |
| "loss": 0.9456, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.039746621414656264, | |
| "grad_norm": 0.8434960842132568, | |
| "learning_rate": 8.536341204668307e-05, | |
| "loss": 1.0455, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.039795630812208244, | |
| "grad_norm": 0.6098150014877319, | |
| "learning_rate": 8.515131211929151e-05, | |
| "loss": 0.9403, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.039844640209760224, | |
| "grad_norm": 0.6109006404876709, | |
| "learning_rate": 8.493928047487074e-05, | |
| "loss": 0.9758, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.039893649607312204, | |
| "grad_norm": 0.8072890043258667, | |
| "learning_rate": 8.472731808846664e-05, | |
| "loss": 1.007, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.039942659004864184, | |
| "grad_norm": 0.9491591453552246, | |
| "learning_rate": 8.45154259348065e-05, | |
| "loss": 0.9375, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.039991668402416164, | |
| "grad_norm": 0.5851994752883911, | |
| "learning_rate": 8.430360498829456e-05, | |
| "loss": 0.9222, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.040040677799968144, | |
| "grad_norm": 0.5684947967529297, | |
| "learning_rate": 8.40918562230078e-05, | |
| "loss": 0.9068, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.040089687197520124, | |
| "grad_norm": 0.540804386138916, | |
| "learning_rate": 8.388018061269112e-05, | |
| "loss": 0.8686, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.040138696595072104, | |
| "grad_norm": 0.5974246859550476, | |
| "learning_rate": 8.366857913075301e-05, | |
| "loss": 0.7919, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.040187705992624084, | |
| "grad_norm": 0.6588082909584045, | |
| "learning_rate": 8.345705275026123e-05, | |
| "loss": 0.9414, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.040236715390176064, | |
| "grad_norm": 0.6050723195075989, | |
| "learning_rate": 8.324560244393799e-05, | |
| "loss": 1.031, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.040285724787728044, | |
| "grad_norm": 0.9468902945518494, | |
| "learning_rate": 8.303422918415575e-05, | |
| "loss": 1.0062, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.040334734185280025, | |
| "grad_norm": 0.5976113677024841, | |
| "learning_rate": 8.282293394293268e-05, | |
| "loss": 0.9076, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.040383743582832005, | |
| "grad_norm": 0.651478111743927, | |
| "learning_rate": 8.261171769192818e-05, | |
| "loss": 1.1146, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.04043275298038399, | |
| "grad_norm": 0.5034336447715759, | |
| "learning_rate": 8.240058140243834e-05, | |
| "loss": 0.7507, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.04048176237793597, | |
| "grad_norm": 0.6404789090156555, | |
| "learning_rate": 8.218952604539157e-05, | |
| "loss": 1.0417, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.04053077177548795, | |
| "grad_norm": 0.5468174815177917, | |
| "learning_rate": 8.197855259134415e-05, | |
| "loss": 0.9414, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.04057978117303993, | |
| "grad_norm": 0.7501478791236877, | |
| "learning_rate": 8.176766201047573e-05, | |
| "loss": 0.8987, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.04062879057059191, | |
| "grad_norm": 0.8284885287284851, | |
| "learning_rate": 8.155685527258469e-05, | |
| "loss": 0.7266, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.04067779996814389, | |
| "grad_norm": 0.6814586520195007, | |
| "learning_rate": 8.134613334708412e-05, | |
| "loss": 0.9165, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.04072680936569587, | |
| "grad_norm": 0.9068307280540466, | |
| "learning_rate": 8.113549720299689e-05, | |
| "loss": 0.9455, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.04077581876324785, | |
| "grad_norm": 0.8567232489585876, | |
| "learning_rate": 8.092494780895144e-05, | |
| "loss": 0.9211, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.04082482816079983, | |
| "grad_norm": 0.5463480949401855, | |
| "learning_rate": 8.071448613317739e-05, | |
| "loss": 0.7966, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.04087383755835181, | |
| "grad_norm": 0.6157633662223816, | |
| "learning_rate": 8.05041131435008e-05, | |
| "loss": 0.8836, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.04092284695590379, | |
| "grad_norm": 0.81013423204422, | |
| "learning_rate": 8.029382980734e-05, | |
| "loss": 1.1235, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.04097185635345577, | |
| "grad_norm": 0.693514883518219, | |
| "learning_rate": 8.00836370917011e-05, | |
| "loss": 0.7711, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.04102086575100775, | |
| "grad_norm": 0.908645510673523, | |
| "learning_rate": 7.987353596317339e-05, | |
| "loss": 1.1738, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.04106987514855974, | |
| "grad_norm": 0.6220524311065674, | |
| "learning_rate": 7.966352738792497e-05, | |
| "loss": 1.0084, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.04111888454611172, | |
| "grad_norm": 0.5720627903938293, | |
| "learning_rate": 7.945361233169841e-05, | |
| "loss": 0.8428, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.0411678939436637, | |
| "grad_norm": 0.5578500628471375, | |
| "learning_rate": 7.924379175980618e-05, | |
| "loss": 0.9334, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.04121690334121568, | |
| "grad_norm": 0.6064552664756775, | |
| "learning_rate": 7.90340666371263e-05, | |
| "loss": 0.8455, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.04126591273876766, | |
| "grad_norm": 0.6311472654342651, | |
| "learning_rate": 7.882443792809772e-05, | |
| "loss": 0.9325, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.04131492213631964, | |
| "grad_norm": 0.5778111815452576, | |
| "learning_rate": 7.861490659671621e-05, | |
| "loss": 0.9305, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.04136393153387162, | |
| "grad_norm": 0.6144542098045349, | |
| "learning_rate": 7.840547360652964e-05, | |
| "loss": 0.7443, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.0414129409314236, | |
| "grad_norm": 0.6736694574356079, | |
| "learning_rate": 7.819613992063361e-05, | |
| "loss": 0.9912, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.04146195032897558, | |
| "grad_norm": 0.6833612322807312, | |
| "learning_rate": 7.798690650166722e-05, | |
| "loss": 0.8983, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.04151095972652756, | |
| "grad_norm": 0.6620497703552246, | |
| "learning_rate": 7.77777743118083e-05, | |
| "loss": 0.9986, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.04155996912407954, | |
| "grad_norm": 0.6063772439956665, | |
| "learning_rate": 7.756874431276924e-05, | |
| "loss": 1.0581, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.04160897852163152, | |
| "grad_norm": 0.6608020663261414, | |
| "learning_rate": 7.735981746579254e-05, | |
| "loss": 0.8809, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.0416579879191835, | |
| "grad_norm": 0.6642486453056335, | |
| "learning_rate": 7.715099473164632e-05, | |
| "loss": 0.9361, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.04170699731673548, | |
| "grad_norm": 0.6425474882125854, | |
| "learning_rate": 7.694227707061989e-05, | |
| "loss": 1.0101, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.04175600671428747, | |
| "grad_norm": 0.5888500809669495, | |
| "learning_rate": 7.673366544251936e-05, | |
| "loss": 0.9401, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.04180501611183945, | |
| "grad_norm": 0.7488614916801453, | |
| "learning_rate": 7.652516080666337e-05, | |
| "loss": 0.921, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.04185402550939143, | |
| "grad_norm": 0.6826761960983276, | |
| "learning_rate": 7.631676412187845e-05, | |
| "loss": 0.9312, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.04190303490694341, | |
| "grad_norm": 0.6342119574546814, | |
| "learning_rate": 7.610847634649458e-05, | |
| "loss": 0.9297, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.04195204430449539, | |
| "grad_norm": 0.6165019869804382, | |
| "learning_rate": 7.590029843834123e-05, | |
| "loss": 0.8475, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.04200105370204737, | |
| "grad_norm": 0.6887275576591492, | |
| "learning_rate": 7.569223135474235e-05, | |
| "loss": 0.8233, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.04205006309959935, | |
| "grad_norm": 0.5744852423667908, | |
| "learning_rate": 7.548427605251234e-05, | |
| "loss": 0.8901, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.04209907249715133, | |
| "grad_norm": 0.9375432133674622, | |
| "learning_rate": 7.527643348795162e-05, | |
| "loss": 1.059, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.04214808189470331, | |
| "grad_norm": 0.6605034470558167, | |
| "learning_rate": 7.506870461684215e-05, | |
| "loss": 0.8993, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.04219709129225529, | |
| "grad_norm": 0.6732087135314941, | |
| "learning_rate": 7.486109039444296e-05, | |
| "loss": 0.7159, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.04224610068980727, | |
| "grad_norm": 0.6579399704933167, | |
| "learning_rate": 7.465359177548605e-05, | |
| "loss": 0.8402, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.04229511008735925, | |
| "grad_norm": 0.756745457649231, | |
| "learning_rate": 7.444620971417163e-05, | |
| "loss": 1.1157, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.04234411948491123, | |
| "grad_norm": 0.6364356279373169, | |
| "learning_rate": 7.423894516416403e-05, | |
| "loss": 0.8478, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.042393128882463216, | |
| "grad_norm": 0.5769418478012085, | |
| "learning_rate": 7.403179907858708e-05, | |
| "loss": 0.8928, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.042442138280015196, | |
| "grad_norm": 0.5486810207366943, | |
| "learning_rate": 7.382477241001995e-05, | |
| "loss": 0.9149, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.042491147677567176, | |
| "grad_norm": 0.7786839604377747, | |
| "learning_rate": 7.361786611049265e-05, | |
| "loss": 0.9168, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.042540157075119156, | |
| "grad_norm": 0.7111212611198425, | |
| "learning_rate": 7.341108113148152e-05, | |
| "loss": 1.0262, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.042589166472671136, | |
| "grad_norm": 0.6772000193595886, | |
| "learning_rate": 7.320441842390522e-05, | |
| "loss": 0.6625, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.042638175870223116, | |
| "grad_norm": 0.5871595740318298, | |
| "learning_rate": 7.299787893811998e-05, | |
| "loss": 0.883, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.042687185267775096, | |
| "grad_norm": 0.7698076963424683, | |
| "learning_rate": 7.279146362391538e-05, | |
| "loss": 1.1053, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.042736194665327076, | |
| "grad_norm": 0.9533458352088928, | |
| "learning_rate": 7.25851734305101e-05, | |
| "loss": 1.1109, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.042785204062879056, | |
| "grad_norm": 0.5938100218772888, | |
| "learning_rate": 7.237900930654735e-05, | |
| "loss": 0.9771, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.042834213460431037, | |
| "grad_norm": 0.6919374465942383, | |
| "learning_rate": 7.217297220009059e-05, | |
| "loss": 0.8795, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.04288322285798302, | |
| "grad_norm": 1.0210683345794678, | |
| "learning_rate": 7.196706305861925e-05, | |
| "loss": 1.0654, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.042932232255535, | |
| "grad_norm": 0.6737746000289917, | |
| "learning_rate": 7.176128282902423e-05, | |
| "loss": 0.8689, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.04298124165308698, | |
| "grad_norm": 0.6844781041145325, | |
| "learning_rate": 7.15556324576037e-05, | |
| "loss": 0.848, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.04303025105063896, | |
| "grad_norm": 0.7788298726081848, | |
| "learning_rate": 7.135011289005853e-05, | |
| "loss": 0.9539, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.043079260448190944, | |
| "grad_norm": 0.6003298759460449, | |
| "learning_rate": 7.114472507148824e-05, | |
| "loss": 0.947, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.043128269845742924, | |
| "grad_norm": 0.6445959806442261, | |
| "learning_rate": 7.093946994638638e-05, | |
| "loss": 1.0555, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.043177279243294904, | |
| "grad_norm": 0.6316936612129211, | |
| "learning_rate": 7.073434845863631e-05, | |
| "loss": 0.8573, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.043226288640846884, | |
| "grad_norm": 0.7546505331993103, | |
| "learning_rate": 7.052936155150694e-05, | |
| "loss": 0.94, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.043275298038398864, | |
| "grad_norm": 0.6299151182174683, | |
| "learning_rate": 7.032451016764817e-05, | |
| "loss": 1.0281, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.043324307435950844, | |
| "grad_norm": 0.546668291091919, | |
| "learning_rate": 7.011979524908674e-05, | |
| "loss": 0.7062, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.043373316833502824, | |
| "grad_norm": 0.6820911765098572, | |
| "learning_rate": 6.991521773722186e-05, | |
| "loss": 0.7952, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.043422326231054804, | |
| "grad_norm": 0.5912885665893555, | |
| "learning_rate": 6.971077857282087e-05, | |
| "loss": 0.927, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.043471335628606785, | |
| "grad_norm": 0.5003869533538818, | |
| "learning_rate": 6.950647869601484e-05, | |
| "loss": 0.9082, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.043520345026158765, | |
| "grad_norm": 0.654216468334198, | |
| "learning_rate": 6.93023190462944e-05, | |
| "loss": 0.8691, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.043569354423710745, | |
| "grad_norm": 0.564075767993927, | |
| "learning_rate": 6.909830056250527e-05, | |
| "loss": 0.9184, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.043618363821262725, | |
| "grad_norm": 0.7271436452865601, | |
| "learning_rate": 6.889442418284402e-05, | |
| "loss": 0.9718, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.043667373218814705, | |
| "grad_norm": 0.628280520439148, | |
| "learning_rate": 6.86906908448538e-05, | |
| "loss": 0.8827, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.043716382616366685, | |
| "grad_norm": 0.5660605430603027, | |
| "learning_rate": 6.848710148541988e-05, | |
| "loss": 0.9325, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.04376539201391867, | |
| "grad_norm": 0.6978943943977356, | |
| "learning_rate": 6.828365704076547e-05, | |
| "loss": 0.8103, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.04381440141147065, | |
| "grad_norm": 0.7359114289283752, | |
| "learning_rate": 6.808035844644738e-05, | |
| "loss": 0.8615, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.04386341080902263, | |
| "grad_norm": 0.6953577399253845, | |
| "learning_rate": 6.787720663735178e-05, | |
| "loss": 0.9831, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.04391242020657461, | |
| "grad_norm": 0.8604477047920227, | |
| "learning_rate": 6.76742025476897e-05, | |
| "loss": 0.9752, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.04396142960412659, | |
| "grad_norm": 0.6907943487167358, | |
| "learning_rate": 6.747134711099292e-05, | |
| "loss": 0.9169, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.04401043900167857, | |
| "grad_norm": 0.644757091999054, | |
| "learning_rate": 6.726864126010973e-05, | |
| "loss": 0.8078, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.04405944839923055, | |
| "grad_norm": 0.566016435623169, | |
| "learning_rate": 6.706608592720043e-05, | |
| "loss": 0.8091, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.04410845779678253, | |
| "grad_norm": 0.7891855835914612, | |
| "learning_rate": 6.68636820437331e-05, | |
| "loss": 1.0295, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.04415746719433451, | |
| "grad_norm": 0.8049860596656799, | |
| "learning_rate": 6.666143054047955e-05, | |
| "loss": 0.9562, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.04420647659188649, | |
| "grad_norm": 0.7799884080886841, | |
| "learning_rate": 6.64593323475107e-05, | |
| "loss": 0.927, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.04425548598943847, | |
| "grad_norm": 1.0052690505981445, | |
| "learning_rate": 6.625738839419244e-05, | |
| "loss": 1.2332, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.04430449538699045, | |
| "grad_norm": 0.5868604183197021, | |
| "learning_rate": 6.605559960918155e-05, | |
| "loss": 0.8657, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.04435350478454243, | |
| "grad_norm": 0.8376001119613647, | |
| "learning_rate": 6.585396692042113e-05, | |
| "loss": 1.0565, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.04440251418209442, | |
| "grad_norm": 0.7444510459899902, | |
| "learning_rate": 6.565249125513641e-05, | |
| "loss": 0.8771, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.0444515235796464, | |
| "grad_norm": 1.0465072393417358, | |
| "learning_rate": 6.545117353983064e-05, | |
| "loss": 1.1834, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.04450053297719838, | |
| "grad_norm": 0.5080894231796265, | |
| "learning_rate": 6.525001470028068e-05, | |
| "loss": 0.785, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.04454954237475036, | |
| "grad_norm": 0.7592306137084961, | |
| "learning_rate": 6.504901566153281e-05, | |
| "loss": 0.9745, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.04459855177230234, | |
| "grad_norm": 0.6754018068313599, | |
| "learning_rate": 6.484817734789838e-05, | |
| "loss": 0.8665, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.04464756116985432, | |
| "grad_norm": 0.7137823700904846, | |
| "learning_rate": 6.464750068294974e-05, | |
| "loss": 0.8713, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.0446965705674063, | |
| "grad_norm": 0.6034334897994995, | |
| "learning_rate": 6.44469865895158e-05, | |
| "loss": 0.865, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.04474557996495828, | |
| "grad_norm": 0.9829195141792297, | |
| "learning_rate": 6.424663598967785e-05, | |
| "loss": 1.0658, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.04479458936251026, | |
| "grad_norm": 1.1569057703018188, | |
| "learning_rate": 6.404644980476551e-05, | |
| "loss": 0.6931, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.04484359876006224, | |
| "grad_norm": 0.913396954536438, | |
| "learning_rate": 6.384642895535209e-05, | |
| "loss": 1.1898, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.04489260815761422, | |
| "grad_norm": 0.5226083993911743, | |
| "learning_rate": 6.364657436125073e-05, | |
| "loss": 0.8841, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.0449416175551662, | |
| "grad_norm": 0.7840003371238708, | |
| "learning_rate": 6.344688694151004e-05, | |
| "loss": 0.6906, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.04499062695271818, | |
| "grad_norm": 0.7827737331390381, | |
| "learning_rate": 6.324736761440983e-05, | |
| "loss": 1.0184, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.04503963635027016, | |
| "grad_norm": 0.6751218438148499, | |
| "learning_rate": 6.304801729745688e-05, | |
| "loss": 0.9126, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.04508864574782215, | |
| "grad_norm": 0.6796963214874268, | |
| "learning_rate": 6.28488369073808e-05, | |
| "loss": 0.8393, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.04513765514537413, | |
| "grad_norm": 0.9316746592521667, | |
| "learning_rate": 6.26498273601298e-05, | |
| "loss": 1.1761, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.04518666454292611, | |
| "grad_norm": 0.704622209072113, | |
| "learning_rate": 6.245098957086648e-05, | |
| "loss": 1.0137, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.04523567394047809, | |
| "grad_norm": 0.6289026737213135, | |
| "learning_rate": 6.225232445396345e-05, | |
| "loss": 0.9149, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.04528468333803007, | |
| "grad_norm": 0.6918268203735352, | |
| "learning_rate": 6.205383292299942e-05, | |
| "loss": 1.0257, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.04533369273558205, | |
| "grad_norm": 0.5847110152244568, | |
| "learning_rate": 6.185551589075482e-05, | |
| "loss": 0.9003, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.04538270213313403, | |
| "grad_norm": 0.6945834159851074, | |
| "learning_rate": 6.165737426920757e-05, | |
| "loss": 0.7409, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.04543171153068601, | |
| "grad_norm": 0.6071839928627014, | |
| "learning_rate": 6.145940896952907e-05, | |
| "loss": 0.9796, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.04548072092823799, | |
| "grad_norm": 0.6884227395057678, | |
| "learning_rate": 6.126162090207972e-05, | |
| "loss": 0.9285, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.04552973032578997, | |
| "grad_norm": 0.7866727709770203, | |
| "learning_rate": 6.106401097640502e-05, | |
| "loss": 0.951, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.04557873972334195, | |
| "grad_norm": 0.7089359164237976, | |
| "learning_rate": 6.0866580101231255e-05, | |
| "loss": 1.0993, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.04562774912089393, | |
| "grad_norm": 0.7076542377471924, | |
| "learning_rate": 6.066932918446135e-05, | |
| "loss": 0.9934, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.04567675851844591, | |
| "grad_norm": 0.5357589721679688, | |
| "learning_rate": 6.047225913317058e-05, | |
| "loss": 0.8851, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.04572576791599789, | |
| "grad_norm": 0.5877269506454468, | |
| "learning_rate": 6.0275370853602596e-05, | |
| "loss": 0.9545, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.045774777313549876, | |
| "grad_norm": 0.6116016507148743, | |
| "learning_rate": 6.007866525116511e-05, | |
| "loss": 0.9497, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.045823786711101856, | |
| "grad_norm": 0.6139143705368042, | |
| "learning_rate": 5.988214323042581e-05, | |
| "loss": 0.9749, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.045872796108653836, | |
| "grad_norm": 0.5860751867294312, | |
| "learning_rate": 5.9685805695108085e-05, | |
| "loss": 0.9524, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.045921805506205816, | |
| "grad_norm": 0.817271888256073, | |
| "learning_rate": 5.948965354808709e-05, | |
| "loss": 0.9185, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.045970814903757796, | |
| "grad_norm": 0.6361889243125916, | |
| "learning_rate": 5.929368769138531e-05, | |
| "loss": 0.8063, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.04601982430130978, | |
| "grad_norm": 0.5513597726821899, | |
| "learning_rate": 5.909790902616864e-05, | |
| "loss": 0.974, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.04606883369886176, | |
| "grad_norm": 0.6770530343055725, | |
| "learning_rate": 5.890231845274219e-05, | |
| "loss": 0.8411, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.04611784309641374, | |
| "grad_norm": 0.7206839919090271, | |
| "learning_rate": 5.870691687054602e-05, | |
| "loss": 0.7419, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.04616685249396572, | |
| "grad_norm": 0.5544096231460571, | |
| "learning_rate": 5.8511705178151145e-05, | |
| "loss": 0.9463, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.0462158618915177, | |
| "grad_norm": 0.8671022653579712, | |
| "learning_rate": 5.83166842732554e-05, | |
| "loss": 0.8734, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.04626487128906968, | |
| "grad_norm": 0.7392458915710449, | |
| "learning_rate": 5.812185505267914e-05, | |
| "loss": 0.9899, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.04631388068662166, | |
| "grad_norm": 0.6004726886749268, | |
| "learning_rate": 5.792721841236143e-05, | |
| "loss": 0.9051, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.04636289008417364, | |
| "grad_norm": 0.7594625949859619, | |
| "learning_rate": 5.773277524735553e-05, | |
| "loss": 1.053, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.046411899481725624, | |
| "grad_norm": 0.7624562382698059, | |
| "learning_rate": 5.753852645182518e-05, | |
| "loss": 0.8534, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.046460908879277604, | |
| "grad_norm": 0.7300339341163635, | |
| "learning_rate": 5.7344472919040136e-05, | |
| "loss": 0.8207, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.046509918276829584, | |
| "grad_norm": 0.7849738597869873, | |
| "learning_rate": 5.715061554137232e-05, | |
| "loss": 1.1812, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.046558927674381564, | |
| "grad_norm": 1.06199049949646, | |
| "learning_rate": 5.695695521029163e-05, | |
| "loss": 1.0975, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.046607937071933545, | |
| "grad_norm": 0.7558932304382324, | |
| "learning_rate": 5.676349281636175e-05, | |
| "loss": 0.9038, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.046656946469485525, | |
| "grad_norm": 0.6437797546386719, | |
| "learning_rate": 5.6570229249236126e-05, | |
| "loss": 0.8132, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.046705955867037505, | |
| "grad_norm": 1.0840975046157837, | |
| "learning_rate": 5.637716539765397e-05, | |
| "loss": 1.0105, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.046754965264589485, | |
| "grad_norm": 0.7432597279548645, | |
| "learning_rate": 5.618430214943608e-05, | |
| "loss": 0.8406, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.046803974662141465, | |
| "grad_norm": 0.5832216143608093, | |
| "learning_rate": 5.599164039148066e-05, | |
| "loss": 0.8905, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.046852984059693445, | |
| "grad_norm": 0.8845385909080505, | |
| "learning_rate": 5.5799181009759474e-05, | |
| "loss": 0.9021, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.046901993457245425, | |
| "grad_norm": 0.6067280173301697, | |
| "learning_rate": 5.5606924889313474e-05, | |
| "loss": 0.8903, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.046951002854797405, | |
| "grad_norm": 0.9422512054443359, | |
| "learning_rate": 5.541487291424909e-05, | |
| "loss": 1.0439, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.047000012252349385, | |
| "grad_norm": 0.6058634519577026, | |
| "learning_rate": 5.522302596773383e-05, | |
| "loss": 0.9344, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.047049021649901365, | |
| "grad_norm": 0.5414987206459045, | |
| "learning_rate": 5.503138493199247e-05, | |
| "loss": 0.938, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.04709803104745335, | |
| "grad_norm": 0.648684561252594, | |
| "learning_rate": 5.483995068830272e-05, | |
| "loss": 0.9395, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.04714704044500533, | |
| "grad_norm": 0.6390448212623596, | |
| "learning_rate": 5.4648724116991536e-05, | |
| "loss": 0.8967, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.04719604984255731, | |
| "grad_norm": 0.496037095785141, | |
| "learning_rate": 5.445770609743078e-05, | |
| "loss": 0.8173, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.04724505924010929, | |
| "grad_norm": 0.583728551864624, | |
| "learning_rate": 5.4266897508033264e-05, | |
| "loss": 0.869, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.04729406863766127, | |
| "grad_norm": 0.6022070050239563, | |
| "learning_rate": 5.407629922624866e-05, | |
| "loss": 0.9478, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.04734307803521325, | |
| "grad_norm": 0.6118979454040527, | |
| "learning_rate": 5.3885912128559725e-05, | |
| "loss": 0.9297, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.04739208743276523, | |
| "grad_norm": 0.6469177007675171, | |
| "learning_rate": 5.369573709047786e-05, | |
| "loss": 0.8545, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.04744109683031721, | |
| "grad_norm": 0.9258518815040588, | |
| "learning_rate": 5.350577498653935e-05, | |
| "loss": 0.8147, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.04749010622786919, | |
| "grad_norm": 0.5451820492744446, | |
| "learning_rate": 5.3316026690301305e-05, | |
| "loss": 0.8505, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.04753911562542117, | |
| "grad_norm": 0.5648760199546814, | |
| "learning_rate": 5.312649307433768e-05, | |
| "loss": 0.811, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.04758812502297315, | |
| "grad_norm": 0.7802280783653259, | |
| "learning_rate": 5.2937175010235096e-05, | |
| "loss": 0.7369, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.04763713442052513, | |
| "grad_norm": 0.5710716843605042, | |
| "learning_rate": 5.2748073368588945e-05, | |
| "loss": 0.9393, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.04768614381807711, | |
| "grad_norm": 0.6245925426483154, | |
| "learning_rate": 5.25591890189995e-05, | |
| "loss": 0.9471, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.0477351532156291, | |
| "grad_norm": 0.8834426999092102, | |
| "learning_rate": 5.2370522830067646e-05, | |
| "loss": 1.0204, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.04778416261318108, | |
| "grad_norm": 0.6848143339157104, | |
| "learning_rate": 5.218207566939116e-05, | |
| "loss": 0.9117, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.04783317201073306, | |
| "grad_norm": 0.5751230120658875, | |
| "learning_rate": 5.1993848403560575e-05, | |
| "loss": 0.868, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.04788218140828504, | |
| "grad_norm": 0.6816519498825073, | |
| "learning_rate": 5.180584189815515e-05, | |
| "loss": 0.9373, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.04793119080583702, | |
| "grad_norm": 0.543278157711029, | |
| "learning_rate": 5.161805701773894e-05, | |
| "loss": 0.8105, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.047980200203389, | |
| "grad_norm": 0.7940229177474976, | |
| "learning_rate": 5.143049462585705e-05, | |
| "loss": 0.8127, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.04802920960094098, | |
| "grad_norm": 0.6375604867935181, | |
| "learning_rate": 5.124315558503121e-05, | |
| "loss": 0.9169, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.04807821899849296, | |
| "grad_norm": 0.6112627983093262, | |
| "learning_rate": 5.105604075675614e-05, | |
| "loss": 0.9554, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.04812722839604494, | |
| "grad_norm": 0.8625818490982056, | |
| "learning_rate": 5.0869151001495504e-05, | |
| "loss": 0.9638, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.04817623779359692, | |
| "grad_norm": 0.6942315101623535, | |
| "learning_rate": 5.068248717867801e-05, | |
| "loss": 0.9234, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.0482252471911489, | |
| "grad_norm": 0.6004964709281921, | |
| "learning_rate": 5.0496050146693254e-05, | |
| "loss": 1.0006, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.04827425658870088, | |
| "grad_norm": 0.6091214418411255, | |
| "learning_rate": 5.030984076288805e-05, | |
| "loss": 0.9975, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.04832326598625286, | |
| "grad_norm": 0.657734751701355, | |
| "learning_rate": 5.0123859883562243e-05, | |
| "loss": 1.0277, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.04837227538380484, | |
| "grad_norm": 0.6832321286201477, | |
| "learning_rate": 4.9938108363965006e-05, | |
| "loss": 0.9766, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.04842128478135683, | |
| "grad_norm": 0.6531272530555725, | |
| "learning_rate": 4.9752587058290625e-05, | |
| "loss": 0.8578, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.04847029417890881, | |
| "grad_norm": 0.7007570266723633, | |
| "learning_rate": 4.956729681967489e-05, | |
| "loss": 0.9515, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.04851930357646079, | |
| "grad_norm": 1.039275050163269, | |
| "learning_rate": 4.938223850019087e-05, | |
| "loss": 0.9176, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.04856831297401277, | |
| "grad_norm": 0.802375853061676, | |
| "learning_rate": 4.9197412950845214e-05, | |
| "loss": 1.0036, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.04861732237156475, | |
| "grad_norm": 0.5418473482131958, | |
| "learning_rate": 4.9012821021574183e-05, | |
| "loss": 0.8192, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.04866633176911673, | |
| "grad_norm": 0.7108655571937561, | |
| "learning_rate": 4.882846356123965e-05, | |
| "loss": 0.831, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.04871534116666871, | |
| "grad_norm": 0.676276683807373, | |
| "learning_rate": 4.864434141762521e-05, | |
| "loss": 0.9073, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.04876435056422069, | |
| "grad_norm": 0.6457884311676025, | |
| "learning_rate": 4.846045543743247e-05, | |
| "loss": 0.8802, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.04881335996177267, | |
| "grad_norm": 0.7171979546546936, | |
| "learning_rate": 4.827680646627699e-05, | |
| "loss": 1.1391, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.04886236935932465, | |
| "grad_norm": 0.8592037558555603, | |
| "learning_rate": 4.809339534868432e-05, | |
| "loss": 1.0055, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.04891137875687663, | |
| "grad_norm": 0.8035635948181152, | |
| "learning_rate": 4.791022292808636e-05, | |
| "loss": 0.8176, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.04896038815442861, | |
| "grad_norm": 0.7246670722961426, | |
| "learning_rate": 4.77272900468172e-05, | |
| "loss": 0.899, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.04900939755198059, | |
| "grad_norm": 0.8624154329299927, | |
| "learning_rate": 4.7544597546109514e-05, | |
| "loss": 0.9579, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.04905840694953257, | |
| "grad_norm": 0.6191447377204895, | |
| "learning_rate": 4.7362146266090465e-05, | |
| "loss": 0.9171, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.049107416347084556, | |
| "grad_norm": 0.664317786693573, | |
| "learning_rate": 4.717993704577806e-05, | |
| "loss": 0.671, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.04915642574463654, | |
| "grad_norm": 0.5835380554199219, | |
| "learning_rate": 4.6997970723077e-05, | |
| "loss": 0.838, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 0.04920543514218852, | |
| "grad_norm": 0.7254071831703186, | |
| "learning_rate": 4.681624813477515e-05, | |
| "loss": 0.9072, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.0492544445397405, | |
| "grad_norm": 0.7444500923156738, | |
| "learning_rate": 4.663477011653955e-05, | |
| "loss": 0.932, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.04930345393729248, | |
| "grad_norm": 0.6171773076057434, | |
| "learning_rate": 4.645353750291245e-05, | |
| "loss": 0.9917, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.04935246333484446, | |
| "grad_norm": 0.6146338582038879, | |
| "learning_rate": 4.627255112730761e-05, | |
| "loss": 1.0638, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 0.04940147273239644, | |
| "grad_norm": 0.5829159617424011, | |
| "learning_rate": 4.6091811822006507e-05, | |
| "loss": 0.7155, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.04945048212994842, | |
| "grad_norm": 0.9201460480690002, | |
| "learning_rate": 4.591132041815445e-05, | |
| "loss": 0.9896, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 0.0494994915275004, | |
| "grad_norm": 0.6689648032188416, | |
| "learning_rate": 4.5731077745756644e-05, | |
| "loss": 1.0704, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.04954850092505238, | |
| "grad_norm": 0.6462419629096985, | |
| "learning_rate": 4.555108463367463e-05, | |
| "loss": 0.9133, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 0.04959751032260436, | |
| "grad_norm": 0.7375591993331909, | |
| "learning_rate": 4.537134190962216e-05, | |
| "loss": 0.9788, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.04964651972015634, | |
| "grad_norm": 0.6392641067504883, | |
| "learning_rate": 4.5191850400161715e-05, | |
| "loss": 0.8552, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 0.04969552911770832, | |
| "grad_norm": 0.7200862169265747, | |
| "learning_rate": 4.5012610930700406e-05, | |
| "loss": 0.9416, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.049744538515260305, | |
| "grad_norm": 0.6843019723892212, | |
| "learning_rate": 4.4833624325486446e-05, | |
| "loss": 0.7569, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.049793547912812285, | |
| "grad_norm": 0.7355234026908875, | |
| "learning_rate": 4.4654891407605096e-05, | |
| "loss": 0.778, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.049842557310364265, | |
| "grad_norm": 0.8321521878242493, | |
| "learning_rate": 4.4476412998975106e-05, | |
| "loss": 0.7831, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 0.049891566707916245, | |
| "grad_norm": 0.8588862419128418, | |
| "learning_rate": 4.429818992034487e-05, | |
| "loss": 0.9741, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.049940576105468225, | |
| "grad_norm": 0.7462893128395081, | |
| "learning_rate": 4.412022299128853e-05, | |
| "loss": 0.8281, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 0.049989585503020205, | |
| "grad_norm": 0.6284858584403992, | |
| "learning_rate": 4.3942513030202305e-05, | |
| "loss": 0.905, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.050038594900572185, | |
| "grad_norm": 0.7673025131225586, | |
| "learning_rate": 4.376506085430081e-05, | |
| "loss": 0.9064, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 0.050087604298124165, | |
| "grad_norm": 0.718449056148529, | |
| "learning_rate": 4.3587867279613206e-05, | |
| "loss": 0.9743, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.050136613695676145, | |
| "grad_norm": 0.7094829678535461, | |
| "learning_rate": 4.341093312097932e-05, | |
| "loss": 0.996, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.050185623093228125, | |
| "grad_norm": 0.8285147547721863, | |
| "learning_rate": 4.3234259192046244e-05, | |
| "loss": 1.1247, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.050234632490780105, | |
| "grad_norm": 0.8108729124069214, | |
| "learning_rate": 4.305784630526416e-05, | |
| "loss": 0.8781, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.050283641888332085, | |
| "grad_norm": 0.6478090882301331, | |
| "learning_rate": 4.288169527188301e-05, | |
| "loss": 0.7416, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.050332651285884066, | |
| "grad_norm": 0.7296517491340637, | |
| "learning_rate": 4.270580690194844e-05, | |
| "loss": 1.0396, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 0.050381660683436046, | |
| "grad_norm": 0.7231200933456421, | |
| "learning_rate": 4.253018200429834e-05, | |
| "loss": 0.9782, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.05043067008098803, | |
| "grad_norm": 0.9415457844734192, | |
| "learning_rate": 4.2354821386558855e-05, | |
| "loss": 1.1343, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 0.05047967947854001, | |
| "grad_norm": 0.776109516620636, | |
| "learning_rate": 4.217972585514095e-05, | |
| "loss": 0.8988, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.05052868887609199, | |
| "grad_norm": 0.6780912280082703, | |
| "learning_rate": 4.2004896215236544e-05, | |
| "loss": 0.9418, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 0.05057769827364397, | |
| "grad_norm": 0.6523477435112, | |
| "learning_rate": 4.183033327081476e-05, | |
| "loss": 0.7962, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.05062670767119595, | |
| "grad_norm": 0.7628219723701477, | |
| "learning_rate": 4.1656037824618325e-05, | |
| "loss": 1.0017, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 0.05067571706874793, | |
| "grad_norm": 0.8618097305297852, | |
| "learning_rate": 4.148201067815989e-05, | |
| "loss": 0.8404, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.05072472646629991, | |
| "grad_norm": 0.6040647029876709, | |
| "learning_rate": 4.1308252631718325e-05, | |
| "loss": 0.9687, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.05077373586385189, | |
| "grad_norm": 0.7941866517066956, | |
| "learning_rate": 4.113476448433491e-05, | |
| "loss": 1.0444, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.05082274526140387, | |
| "grad_norm": 0.6268519163131714, | |
| "learning_rate": 4.09615470338099e-05, | |
| "loss": 1.0379, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 0.05087175465895585, | |
| "grad_norm": 0.7264936566352844, | |
| "learning_rate": 4.078860107669862e-05, | |
| "loss": 0.8959, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.05092076405650783, | |
| "grad_norm": 0.6215760707855225, | |
| "learning_rate": 4.061592740830801e-05, | |
| "loss": 0.9007, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 0.050969773454059814, | |
| "grad_norm": 0.5822103023529053, | |
| "learning_rate": 4.0443526822692755e-05, | |
| "loss": 0.8713, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.051018782851611794, | |
| "grad_norm": 0.7218736410140991, | |
| "learning_rate": 4.027140011265187e-05, | |
| "loss": 0.9172, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 0.051067792249163774, | |
| "grad_norm": 0.678709089756012, | |
| "learning_rate": 4.00995480697248e-05, | |
| "loss": 1.0434, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.05111680164671576, | |
| "grad_norm": 0.7455527782440186, | |
| "learning_rate": 3.9927971484187995e-05, | |
| "loss": 0.8981, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 0.05116581104426774, | |
| "grad_norm": 0.6330903768539429, | |
| "learning_rate": 3.975667114505123e-05, | |
| "loss": 0.8602, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.05121482044181972, | |
| "grad_norm": 0.7926675081253052, | |
| "learning_rate": 3.958564784005382e-05, | |
| "loss": 0.8785, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.0512638298393717, | |
| "grad_norm": 0.7453003525733948, | |
| "learning_rate": 3.9414902355661145e-05, | |
| "loss": 0.8781, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.05131283923692368, | |
| "grad_norm": 0.5748602151870728, | |
| "learning_rate": 3.924443547706106e-05, | |
| "loss": 0.8991, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 0.05136184863447566, | |
| "grad_norm": 0.787139356136322, | |
| "learning_rate": 3.907424798816023e-05, | |
| "loss": 0.8958, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.05141085803202764, | |
| "grad_norm": 0.6263467669487, | |
| "learning_rate": 3.890434067158043e-05, | |
| "loss": 0.8452, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 0.05145986742957962, | |
| "grad_norm": 0.7788869738578796, | |
| "learning_rate": 3.873471430865515e-05, | |
| "loss": 0.8231, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.0515088768271316, | |
| "grad_norm": 1.0092405080795288, | |
| "learning_rate": 3.856536967942579e-05, | |
| "loss": 1.084, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 0.05155788622468358, | |
| "grad_norm": 0.6644636392593384, | |
| "learning_rate": 3.839630756263828e-05, | |
| "loss": 1.0511, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.05160689562223556, | |
| "grad_norm": 0.611219584941864, | |
| "learning_rate": 3.822752873573926e-05, | |
| "loss": 0.877, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 0.05165590501978754, | |
| "grad_norm": 0.7147572636604309, | |
| "learning_rate": 3.8059033974872784e-05, | |
| "loss": 1.0783, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.05170491441733952, | |
| "grad_norm": 0.7852771878242493, | |
| "learning_rate": 3.789082405487645e-05, | |
| "loss": 0.8292, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.05175392381489151, | |
| "grad_norm": 0.5585947036743164, | |
| "learning_rate": 3.772289974927813e-05, | |
| "loss": 0.8234, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.05180293321244349, | |
| "grad_norm": 0.6067047119140625, | |
| "learning_rate": 3.755526183029223e-05, | |
| "loss": 0.9943, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 0.05185194260999547, | |
| "grad_norm": 0.6506572961807251, | |
| "learning_rate": 3.738791106881614e-05, | |
| "loss": 0.9735, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.05190095200754745, | |
| "grad_norm": 0.7339874505996704, | |
| "learning_rate": 3.722084823442669e-05, | |
| "loss": 0.9753, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 0.05194996140509943, | |
| "grad_norm": 0.6757487654685974, | |
| "learning_rate": 3.705407409537684e-05, | |
| "loss": 0.9961, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.05199897080265141, | |
| "grad_norm": 0.6002933382987976, | |
| "learning_rate": 3.68875894185918e-05, | |
| "loss": 0.747, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 0.05204798020020339, | |
| "grad_norm": 0.7781504392623901, | |
| "learning_rate": 3.672139496966566e-05, | |
| "loss": 0.9216, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.05209698959775537, | |
| "grad_norm": 0.7153550386428833, | |
| "learning_rate": 3.655549151285794e-05, | |
| "loss": 0.9178, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 0.05214599899530735, | |
| "grad_norm": 0.7319151759147644, | |
| "learning_rate": 3.638987981109003e-05, | |
| "loss": 0.9615, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.05219500839285933, | |
| "grad_norm": 0.8046290278434753, | |
| "learning_rate": 3.622456062594154e-05, | |
| "loss": 0.9093, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.05224401779041131, | |
| "grad_norm": 0.6472988128662109, | |
| "learning_rate": 3.605953471764705e-05, | |
| "loss": 0.7422, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.05229302718796329, | |
| "grad_norm": 1.0517202615737915, | |
| "learning_rate": 3.5894802845092354e-05, | |
| "loss": 1.0699, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 0.05234203658551527, | |
| "grad_norm": 0.6424945592880249, | |
| "learning_rate": 3.573036576581126e-05, | |
| "loss": 0.8699, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.05239104598306725, | |
| "grad_norm": 0.684185802936554, | |
| "learning_rate": 3.5566224235981737e-05, | |
| "loss": 1.0873, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 0.05244005538061924, | |
| "grad_norm": 0.680806040763855, | |
| "learning_rate": 3.540237901042285e-05, | |
| "loss": 0.9018, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.05248906477817122, | |
| "grad_norm": 0.6284951567649841, | |
| "learning_rate": 3.5238830842590945e-05, | |
| "loss": 0.8599, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 0.0525380741757232, | |
| "grad_norm": 0.7522977590560913, | |
| "learning_rate": 3.50755804845763e-05, | |
| "loss": 0.9505, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.05258708357327518, | |
| "grad_norm": 0.6616835594177246, | |
| "learning_rate": 3.491262868709989e-05, | |
| "loss": 1.0088, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 0.05263609297082716, | |
| "grad_norm": 0.8060148358345032, | |
| "learning_rate": 3.474997619950955e-05, | |
| "loss": 0.9374, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.05268510236837914, | |
| "grad_norm": 0.8048257231712341, | |
| "learning_rate": 3.458762376977669e-05, | |
| "loss": 0.8275, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.05273411176593112, | |
| "grad_norm": 0.7710302472114563, | |
| "learning_rate": 3.4425572144493014e-05, | |
| "loss": 0.8884, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.0527831211634831, | |
| "grad_norm": 0.6914852857589722, | |
| "learning_rate": 3.4263822068866905e-05, | |
| "loss": 1.1101, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 0.05283213056103508, | |
| "grad_norm": 0.874279260635376, | |
| "learning_rate": 3.410237428671995e-05, | |
| "loss": 0.9308, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.05288113995858706, | |
| "grad_norm": 0.637709379196167, | |
| "learning_rate": 3.3941229540483774e-05, | |
| "loss": 0.9252, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 0.05293014935613904, | |
| "grad_norm": 0.6194972395896912, | |
| "learning_rate": 3.378038857119632e-05, | |
| "loss": 0.8933, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.05297915875369102, | |
| "grad_norm": 0.7647547125816345, | |
| "learning_rate": 3.3619852118498685e-05, | |
| "loss": 0.8601, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 0.053028168151243, | |
| "grad_norm": 0.5452641248703003, | |
| "learning_rate": 3.345962092063153e-05, | |
| "loss": 0.8647, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.053077177548794985, | |
| "grad_norm": 0.5692489147186279, | |
| "learning_rate": 3.3299695714431886e-05, | |
| "loss": 0.771, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 0.053126186946346965, | |
| "grad_norm": 0.5245976448059082, | |
| "learning_rate": 3.314007723532954e-05, | |
| "loss": 0.9189, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.053175196343898945, | |
| "grad_norm": 1.0263336896896362, | |
| "learning_rate": 3.298076621734385e-05, | |
| "loss": 0.7727, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.053224205741450925, | |
| "grad_norm": 0.5948834419250488, | |
| "learning_rate": 3.282176339308029e-05, | |
| "loss": 0.9622, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.053273215139002905, | |
| "grad_norm": 0.6458703875541687, | |
| "learning_rate": 3.266306949372704e-05, | |
| "loss": 0.9235, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 0.053322224536554885, | |
| "grad_norm": 0.6071894764900208, | |
| "learning_rate": 3.2504685249051606e-05, | |
| "loss": 0.8635, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.053371233934106865, | |
| "grad_norm": 0.6334041357040405, | |
| "learning_rate": 3.234661138739764e-05, | |
| "loss": 0.8896, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 0.053420243331658845, | |
| "grad_norm": 0.6917246580123901, | |
| "learning_rate": 3.2188848635681446e-05, | |
| "loss": 0.9068, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.053469252729210826, | |
| "grad_norm": 0.7009928226470947, | |
| "learning_rate": 3.2031397719388556e-05, | |
| "loss": 0.9179, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 0.053518262126762806, | |
| "grad_norm": 0.5563487410545349, | |
| "learning_rate": 3.1874259362570666e-05, | |
| "loss": 0.9428, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.053567271524314786, | |
| "grad_norm": 0.814445972442627, | |
| "learning_rate": 3.171743428784198e-05, | |
| "loss": 0.9235, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 0.053616280921866766, | |
| "grad_norm": 0.6743494272232056, | |
| "learning_rate": 3.156092321637616e-05, | |
| "loss": 0.821, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.053665290319418746, | |
| "grad_norm": 0.6412997245788574, | |
| "learning_rate": 3.1404726867902815e-05, | |
| "loss": 0.9212, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.053714299716970726, | |
| "grad_norm": 0.8848609924316406, | |
| "learning_rate": 3.124884596070438e-05, | |
| "loss": 1.057, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.05376330911452271, | |
| "grad_norm": 0.7107406258583069, | |
| "learning_rate": 3.109328121161256e-05, | |
| "loss": 0.9214, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 0.05381231851207469, | |
| "grad_norm": 0.8470025658607483, | |
| "learning_rate": 3.09380333360053e-05, | |
| "loss": 0.9982, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.05386132790962667, | |
| "grad_norm": 0.574743390083313, | |
| "learning_rate": 3.078310304780336e-05, | |
| "loss": 0.7368, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 0.05391033730717865, | |
| "grad_norm": 0.5940549373626709, | |
| "learning_rate": 3.0628491059467014e-05, | |
| "loss": 0.8097, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.05395934670473063, | |
| "grad_norm": 0.8564255237579346, | |
| "learning_rate": 3.0474198081992754e-05, | |
| "loss": 0.968, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 0.05400835610228261, | |
| "grad_norm": 0.6052840352058411, | |
| "learning_rate": 3.0320224824910182e-05, | |
| "loss": 0.999, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.05405736549983459, | |
| "grad_norm": 0.7785791158676147, | |
| "learning_rate": 3.0166571996278615e-05, | |
| "loss": 0.8447, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 0.054106374897386574, | |
| "grad_norm": 0.7929477691650391, | |
| "learning_rate": 3.0013240302683766e-05, | |
| "loss": 0.9153, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.054155384294938554, | |
| "grad_norm": 0.7250458002090454, | |
| "learning_rate": 2.9860230449234706e-05, | |
| "loss": 0.9096, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.054204393692490534, | |
| "grad_norm": 0.8561221957206726, | |
| "learning_rate": 2.9707543139560358e-05, | |
| "loss": 0.8301, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.054253403090042514, | |
| "grad_norm": 0.9120008945465088, | |
| "learning_rate": 2.955517907580656e-05, | |
| "loss": 1.0136, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 0.054253403090042514, | |
| "eval_loss": NaN, | |
| "eval_runtime": 185.5324, | |
| "eval_samples_per_second": 46.31, | |
| "eval_steps_per_second": 23.155, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 0.054302412487594494, | |
| "grad_norm": 0.6959465146064758, | |
| "learning_rate": 2.9403138958632503e-05, | |
| "loss": 0.7701, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.054351421885146474, | |
| "grad_norm": 0.5777866840362549, | |
| "learning_rate": 2.925142348720784e-05, | |
| "loss": 0.8953, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 0.054400431282698454, | |
| "grad_norm": 0.703305184841156, | |
| "learning_rate": 2.910003335920918e-05, | |
| "loss": 1.0123, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.05444944068025044, | |
| "grad_norm": 0.8165416717529297, | |
| "learning_rate": 2.8948969270817096e-05, | |
| "loss": 0.8779, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 0.05449845007780242, | |
| "grad_norm": 0.6612878441810608, | |
| "learning_rate": 2.879823191671286e-05, | |
| "loss": 1.0614, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.0545474594753544, | |
| "grad_norm": 0.6103464365005493, | |
| "learning_rate": 2.8647821990075153e-05, | |
| "loss": 0.7585, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 0.05459646887290638, | |
| "grad_norm": 0.7475747466087341, | |
| "learning_rate": 2.8497740182576948e-05, | |
| "loss": 0.8482, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.05464547827045836, | |
| "grad_norm": 0.6378394365310669, | |
| "learning_rate": 2.8347987184382398e-05, | |
| "loss": 0.9145, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.05469448766801034, | |
| "grad_norm": 0.563686728477478, | |
| "learning_rate": 2.819856368414361e-05, | |
| "loss": 0.8355, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.05474349706556232, | |
| "grad_norm": 0.6585412621498108, | |
| "learning_rate": 2.8049470368997355e-05, | |
| "loss": 0.7453, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 0.0547925064631143, | |
| "grad_norm": 0.5961164236068726, | |
| "learning_rate": 2.7900707924562166e-05, | |
| "loss": 0.9076, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.05484151586066628, | |
| "grad_norm": 0.5402793884277344, | |
| "learning_rate": 2.7752277034934894e-05, | |
| "loss": 0.7223, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 0.05489052525821826, | |
| "grad_norm": 0.7519727945327759, | |
| "learning_rate": 2.760417838268784e-05, | |
| "loss": 1.0406, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.05493953465577024, | |
| "grad_norm": 0.6112848520278931, | |
| "learning_rate": 2.745641264886536e-05, | |
| "loss": 0.7626, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 0.05498854405332222, | |
| "grad_norm": 0.9848921895027161, | |
| "learning_rate": 2.7308980512980965e-05, | |
| "loss": 1.0251, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.0550375534508742, | |
| "grad_norm": 0.5984755158424377, | |
| "learning_rate": 2.716188265301398e-05, | |
| "loss": 0.838, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 0.05508656284842619, | |
| "grad_norm": 0.9945166707038879, | |
| "learning_rate": 2.7015119745406636e-05, | |
| "loss": 1.0268, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.05513557224597817, | |
| "grad_norm": 0.8502665758132935, | |
| "learning_rate": 2.6868692465060828e-05, | |
| "loss": 0.9499, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.05518458164353015, | |
| "grad_norm": 0.6849496960639954, | |
| "learning_rate": 2.6722601485334998e-05, | |
| "loss": 0.7584, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.05523359104108213, | |
| "grad_norm": 0.6284264326095581, | |
| "learning_rate": 2.6576847478041067e-05, | |
| "loss": 0.9142, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 0.05528260043863411, | |
| "grad_norm": 0.644045352935791, | |
| "learning_rate": 2.643143111344144e-05, | |
| "loss": 1.0832, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.05533160983618609, | |
| "grad_norm": 0.690579891204834, | |
| "learning_rate": 2.6286353060245826e-05, | |
| "loss": 0.7999, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 0.05538061923373807, | |
| "grad_norm": 0.6707479953765869, | |
| "learning_rate": 2.6141613985608093e-05, | |
| "loss": 1.1036, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.05542962863129005, | |
| "grad_norm": 0.5971590280532837, | |
| "learning_rate": 2.5997214555123416e-05, | |
| "loss": 1.0718, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 0.05547863802884203, | |
| "grad_norm": 0.616248369216919, | |
| "learning_rate": 2.585315543282496e-05, | |
| "loss": 0.9957, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.05552764742639401, | |
| "grad_norm": 0.7989575266838074, | |
| "learning_rate": 2.570943728118106e-05, | |
| "loss": 0.8987, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 0.05557665682394599, | |
| "grad_norm": 0.553438663482666, | |
| "learning_rate": 2.556606076109198e-05, | |
| "loss": 0.8544, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.05562566622149797, | |
| "grad_norm": 0.6931489109992981, | |
| "learning_rate": 2.542302653188704e-05, | |
| "loss": 0.9196, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.05567467561904995, | |
| "grad_norm": 0.851711630821228, | |
| "learning_rate": 2.528033525132144e-05, | |
| "loss": 1.0254, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.05572368501660193, | |
| "grad_norm": 0.6611626148223877, | |
| "learning_rate": 2.513798757557333e-05, | |
| "loss": 1.0014, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 0.05577269441415392, | |
| "grad_norm": 1.1130104064941406, | |
| "learning_rate": 2.4995984159240814e-05, | |
| "loss": 0.8845, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.0558217038117059, | |
| "grad_norm": 0.6933576464653015, | |
| "learning_rate": 2.4854325655338805e-05, | |
| "loss": 1.0355, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 0.05587071320925788, | |
| "grad_norm": 0.6513526439666748, | |
| "learning_rate": 2.4713012715296113e-05, | |
| "loss": 0.8088, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.05591972260680986, | |
| "grad_norm": 0.8315907716751099, | |
| "learning_rate": 2.4572045988952495e-05, | |
| "loss": 0.973, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 0.05596873200436184, | |
| "grad_norm": 0.5299463272094727, | |
| "learning_rate": 2.4431426124555625e-05, | |
| "loss": 0.8138, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.05601774140191382, | |
| "grad_norm": 0.6596815586090088, | |
| "learning_rate": 2.429115376875799e-05, | |
| "loss": 0.7319, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 0.0560667507994658, | |
| "grad_norm": 0.6985355019569397, | |
| "learning_rate": 2.41512295666142e-05, | |
| "loss": 0.9264, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.05611576019701778, | |
| "grad_norm": 0.8074831366539001, | |
| "learning_rate": 2.4011654161577667e-05, | |
| "loss": 0.8036, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.05616476959456976, | |
| "grad_norm": 0.6924799084663391, | |
| "learning_rate": 2.3872428195497998e-05, | |
| "loss": 1.2314, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.05621377899212174, | |
| "grad_norm": 0.7358079552650452, | |
| "learning_rate": 2.3733552308617736e-05, | |
| "loss": 1.1863, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 0.05626278838967372, | |
| "grad_norm": 0.6338258385658264, | |
| "learning_rate": 2.3595027139569658e-05, | |
| "loss": 0.9938, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.0563117977872257, | |
| "grad_norm": 0.7516512870788574, | |
| "learning_rate": 2.345685332537364e-05, | |
| "loss": 0.8846, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 0.05636080718477768, | |
| "grad_norm": 0.849188506603241, | |
| "learning_rate": 2.331903150143391e-05, | |
| "loss": 0.9154, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.056409816582329665, | |
| "grad_norm": 0.7366519570350647, | |
| "learning_rate": 2.318156230153603e-05, | |
| "loss": 1.0099, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 0.056458825979881645, | |
| "grad_norm": 0.8387792110443115, | |
| "learning_rate": 2.304444635784393e-05, | |
| "loss": 0.9518, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.056507835377433625, | |
| "grad_norm": 0.6383851170539856, | |
| "learning_rate": 2.2907684300897027e-05, | |
| "loss": 1.0355, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 0.056556844774985605, | |
| "grad_norm": 0.7643039226531982, | |
| "learning_rate": 2.2771276759607564e-05, | |
| "loss": 0.9693, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.056605854172537586, | |
| "grad_norm": 0.6342633962631226, | |
| "learning_rate": 2.263522436125729e-05, | |
| "loss": 0.8413, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.056654863570089566, | |
| "grad_norm": 0.7279918193817139, | |
| "learning_rate": 2.2499527731494886e-05, | |
| "loss": 1.1611, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.056703872967641546, | |
| "grad_norm": 0.6141911149024963, | |
| "learning_rate": 2.2364187494333e-05, | |
| "loss": 0.6722, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 0.056752882365193526, | |
| "grad_norm": 0.5212133526802063, | |
| "learning_rate": 2.222920427214541e-05, | |
| "loss": 0.8071, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.056801891762745506, | |
| "grad_norm": 0.6113694906234741, | |
| "learning_rate": 2.2094578685664047e-05, | |
| "loss": 0.8748, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 0.056850901160297486, | |
| "grad_norm": 0.718829333782196, | |
| "learning_rate": 2.1960311353976316e-05, | |
| "loss": 0.9405, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.056899910557849466, | |
| "grad_norm": 0.8659762144088745, | |
| "learning_rate": 2.182640289452207e-05, | |
| "loss": 0.8237, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 0.056948919955401446, | |
| "grad_norm": 0.6210221648216248, | |
| "learning_rate": 2.169285392309095e-05, | |
| "loss": 0.7797, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.056997929352953426, | |
| "grad_norm": 0.6985928416252136, | |
| "learning_rate": 2.1559665053819366e-05, | |
| "loss": 0.8632, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 0.057046938750505406, | |
| "grad_norm": 0.8535677790641785, | |
| "learning_rate": 2.142683689918784e-05, | |
| "loss": 0.987, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.05709594814805739, | |
| "grad_norm": 0.6734438538551331, | |
| "learning_rate": 2.1294370070018076e-05, | |
| "loss": 0.9561, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.05714495754560937, | |
| "grad_norm": 0.6575422883033752, | |
| "learning_rate": 2.1162265175470153e-05, | |
| "loss": 0.9701, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.05719396694316135, | |
| "grad_norm": 1.0730395317077637, | |
| "learning_rate": 2.103052282303992e-05, | |
| "loss": 0.7416, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 0.057242976340713334, | |
| "grad_norm": 0.5347709059715271, | |
| "learning_rate": 2.089914361855588e-05, | |
| "loss": 0.7869, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.057291985738265314, | |
| "grad_norm": 1.0355056524276733, | |
| "learning_rate": 2.0768128166176604e-05, | |
| "loss": 0.954, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 0.057340995135817294, | |
| "grad_norm": 0.6163832545280457, | |
| "learning_rate": 2.0637477068387957e-05, | |
| "loss": 1.0077, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.057390004533369274, | |
| "grad_norm": 0.6667943596839905, | |
| "learning_rate": 2.050719092600031e-05, | |
| "loss": 0.8894, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 0.057439013930921254, | |
| "grad_norm": 0.6550483107566833, | |
| "learning_rate": 2.037727033814565e-05, | |
| "loss": 0.9482, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.057488023328473234, | |
| "grad_norm": 0.8213583827018738, | |
| "learning_rate": 2.0247715902275068e-05, | |
| "loss": 0.7194, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 0.057537032726025214, | |
| "grad_norm": 0.6851637363433838, | |
| "learning_rate": 2.011852821415573e-05, | |
| "loss": 0.7352, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.057586042123577194, | |
| "grad_norm": 0.682693600654602, | |
| "learning_rate": 1.9989707867868425e-05, | |
| "loss": 1.135, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.057635051521129174, | |
| "grad_norm": 0.7706078290939331, | |
| "learning_rate": 1.986125545580455e-05, | |
| "loss": 0.7256, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.057684060918681154, | |
| "grad_norm": 0.6869995594024658, | |
| "learning_rate": 1.9733171568663643e-05, | |
| "loss": 0.9948, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 0.057733070316233134, | |
| "grad_norm": 0.8122284412384033, | |
| "learning_rate": 1.960545679545045e-05, | |
| "loss": 0.7801, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.05778207971378512, | |
| "grad_norm": 0.8644903898239136, | |
| "learning_rate": 1.947811172347239e-05, | |
| "loss": 0.9036, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 0.0578310891113371, | |
| "grad_norm": 0.9533315896987915, | |
| "learning_rate": 1.9351136938336777e-05, | |
| "loss": 1.0587, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.05788009850888908, | |
| "grad_norm": 0.7774580717086792, | |
| "learning_rate": 1.9224533023948077e-05, | |
| "loss": 1.0339, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 0.05792910790644106, | |
| "grad_norm": 0.6420020461082458, | |
| "learning_rate": 1.9098300562505266e-05, | |
| "loss": 0.884, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.05797811730399304, | |
| "grad_norm": 0.6784622669219971, | |
| "learning_rate": 1.8972440134499224e-05, | |
| "loss": 0.7252, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 0.05802712670154502, | |
| "grad_norm": 0.981188952922821, | |
| "learning_rate": 1.884695231871001e-05, | |
| "loss": 1.0207, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.058076136099097, | |
| "grad_norm": 0.6668174862861633, | |
| "learning_rate": 1.8721837692204115e-05, | |
| "loss": 0.8547, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.05812514549664898, | |
| "grad_norm": 0.6276751756668091, | |
| "learning_rate": 1.859709683033195e-05, | |
| "loss": 0.8833, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.05817415489420096, | |
| "grad_norm": 0.6397258043289185, | |
| "learning_rate": 1.8472730306725107e-05, | |
| "loss": 0.8333, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 0.05822316429175294, | |
| "grad_norm": 0.6282427310943604, | |
| "learning_rate": 1.8348738693293777e-05, | |
| "loss": 0.9277, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.05827217368930492, | |
| "grad_norm": 0.742828905582428, | |
| "learning_rate": 1.822512256022405e-05, | |
| "loss": 0.9255, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 0.0583211830868569, | |
| "grad_norm": 0.6836369037628174, | |
| "learning_rate": 1.8101882475975417e-05, | |
| "loss": 1.0912, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.05837019248440888, | |
| "grad_norm": 0.6010401844978333, | |
| "learning_rate": 1.7979019007277975e-05, | |
| "loss": 0.9613, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 0.05841920188196087, | |
| "grad_norm": 0.724628210067749, | |
| "learning_rate": 1.785653271912999e-05, | |
| "loss": 0.8902, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.05846821127951285, | |
| "grad_norm": 0.6914888024330139, | |
| "learning_rate": 1.773442417479525e-05, | |
| "loss": 0.9536, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 0.05851722067706483, | |
| "grad_norm": 0.65279620885849, | |
| "learning_rate": 1.7612693935800373e-05, | |
| "loss": 0.8499, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.05856623007461681, | |
| "grad_norm": 0.617305338382721, | |
| "learning_rate": 1.7491342561932355e-05, | |
| "loss": 1.0133, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.05861523947216879, | |
| "grad_norm": 0.7648184299468994, | |
| "learning_rate": 1.7370370611235963e-05, | |
| "loss": 0.994, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.05866424886972077, | |
| "grad_norm": 0.7441121935844421, | |
| "learning_rate": 1.7249778640011148e-05, | |
| "loss": 0.8789, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 0.05871325826727275, | |
| "grad_norm": 0.9436798691749573, | |
| "learning_rate": 1.7129567202810448e-05, | |
| "loss": 1.0732, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.05876226766482473, | |
| "grad_norm": 0.7364475727081299, | |
| "learning_rate": 1.7009736852436563e-05, | |
| "loss": 0.7994, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 0.05881127706237671, | |
| "grad_norm": 0.7213372588157654, | |
| "learning_rate": 1.6890288139939625e-05, | |
| "loss": 0.9831, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.05886028645992869, | |
| "grad_norm": 0.7466797828674316, | |
| "learning_rate": 1.6771221614614906e-05, | |
| "loss": 0.938, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 0.05890929585748067, | |
| "grad_norm": 0.613419771194458, | |
| "learning_rate": 1.6652537824000015e-05, | |
| "loss": 0.939, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.05895830525503265, | |
| "grad_norm": 0.6488571763038635, | |
| "learning_rate": 1.6534237313872636e-05, | |
| "loss": 1.0457, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 0.05900731465258463, | |
| "grad_norm": 0.6202888488769531, | |
| "learning_rate": 1.641632062824783e-05, | |
| "loss": 1.0043, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.05905632405013661, | |
| "grad_norm": 0.6015529036521912, | |
| "learning_rate": 1.6298788309375644e-05, | |
| "loss": 0.9645, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.0591053334476886, | |
| "grad_norm": 0.5945176482200623, | |
| "learning_rate": 1.6181640897738603e-05, | |
| "loss": 0.9764, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.05915434284524058, | |
| "grad_norm": 0.657114565372467, | |
| "learning_rate": 1.6064878932049166e-05, | |
| "loss": 1.0262, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 0.05920335224279256, | |
| "grad_norm": 0.5701066255569458, | |
| "learning_rate": 1.5948502949247246e-05, | |
| "loss": 0.8868, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.05925236164034454, | |
| "grad_norm": 1.2002894878387451, | |
| "learning_rate": 1.583251348449788e-05, | |
| "loss": 1.1998, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 0.05930137103789652, | |
| "grad_norm": 0.7580971121788025, | |
| "learning_rate": 1.571691107118861e-05, | |
| "loss": 0.7739, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.0593503804354485, | |
| "grad_norm": 0.8998621106147766, | |
| "learning_rate": 1.5601696240927076e-05, | |
| "loss": 0.8264, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 0.05939938983300048, | |
| "grad_norm": 0.7180477380752563, | |
| "learning_rate": 1.5486869523538638e-05, | |
| "loss": 1.1046, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.05944839923055246, | |
| "grad_norm": 0.7890926599502563, | |
| "learning_rate": 1.5372431447063807e-05, | |
| "loss": 0.8471, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 0.05949740862810444, | |
| "grad_norm": 0.879736065864563, | |
| "learning_rate": 1.5258382537755989e-05, | |
| "loss": 1.2106, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.05954641802565642, | |
| "grad_norm": 0.7328320145606995, | |
| "learning_rate": 1.5144723320078869e-05, | |
| "loss": 0.7876, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.0595954274232084, | |
| "grad_norm": 0.5559690594673157, | |
| "learning_rate": 1.5031454316704207e-05, | |
| "loss": 0.8941, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.05964443682076038, | |
| "grad_norm": 0.9444091320037842, | |
| "learning_rate": 1.4918576048509225e-05, | |
| "loss": 0.9342, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 0.05969344621831236, | |
| "grad_norm": 0.654117226600647, | |
| "learning_rate": 1.48060890345744e-05, | |
| "loss": 0.7734, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.05974245561586434, | |
| "grad_norm": 0.7489770650863647, | |
| "learning_rate": 1.469399379218096e-05, | |
| "loss": 0.9926, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 0.059791465013416326, | |
| "grad_norm": 0.6979839205741882, | |
| "learning_rate": 1.4582290836808543e-05, | |
| "loss": 0.9055, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.059840474410968306, | |
| "grad_norm": 0.5855501294136047, | |
| "learning_rate": 1.4470980682132762e-05, | |
| "loss": 0.8791, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 0.059889483808520286, | |
| "grad_norm": 0.8113967180252075, | |
| "learning_rate": 1.4360063840023008e-05, | |
| "loss": 0.9865, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.059938493206072266, | |
| "grad_norm": 0.7481881380081177, | |
| "learning_rate": 1.4249540820539941e-05, | |
| "loss": 1.0255, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 0.059987502603624246, | |
| "grad_norm": 0.636202335357666, | |
| "learning_rate": 1.413941213193316e-05, | |
| "loss": 0.8995, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.060036512001176226, | |
| "grad_norm": 0.7428346872329712, | |
| "learning_rate": 1.402967828063897e-05, | |
| "loss": 1.106, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.060085521398728206, | |
| "grad_norm": 1.2551703453063965, | |
| "learning_rate": 1.3920339771277891e-05, | |
| "loss": 0.8506, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.060134530796280186, | |
| "grad_norm": 0.6625940203666687, | |
| "learning_rate": 1.3811397106652524e-05, | |
| "loss": 0.7749, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 0.060183540193832166, | |
| "grad_norm": 0.8333196640014648, | |
| "learning_rate": 1.370285078774507e-05, | |
| "loss": 1.0665, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.060232549591384146, | |
| "grad_norm": 0.6439178586006165, | |
| "learning_rate": 1.3594701313715152e-05, | |
| "loss": 0.95, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 0.060281558988936126, | |
| "grad_norm": 0.7931140065193176, | |
| "learning_rate": 1.3486949181897434e-05, | |
| "loss": 1.0114, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.060330568386488106, | |
| "grad_norm": 0.8823480606079102, | |
| "learning_rate": 1.3379594887799384e-05, | |
| "loss": 1.0644, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 0.06037957778404009, | |
| "grad_norm": 0.9899863004684448, | |
| "learning_rate": 1.327263892509899e-05, | |
| "loss": 1.0687, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.060428587181592074, | |
| "grad_norm": 0.6067537069320679, | |
| "learning_rate": 1.316608178564246e-05, | |
| "loss": 0.9695, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 0.060477596579144054, | |
| "grad_norm": 0.5756759643554688, | |
| "learning_rate": 1.3059923959441944e-05, | |
| "loss": 0.9244, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.060526605976696034, | |
| "grad_norm": 0.5859988927841187, | |
| "learning_rate": 1.295416593467338e-05, | |
| "loss": 0.7473, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.060575615374248014, | |
| "grad_norm": 0.6280271410942078, | |
| "learning_rate": 1.284880819767419e-05, | |
| "loss": 0.8095, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.060624624771799994, | |
| "grad_norm": 0.6644018292427063, | |
| "learning_rate": 1.2743851232940951e-05, | |
| "loss": 0.8925, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 0.060673634169351974, | |
| "grad_norm": 0.6548195481300354, | |
| "learning_rate": 1.263929552312737e-05, | |
| "loss": 0.9308, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.060722643566903954, | |
| "grad_norm": 0.594926118850708, | |
| "learning_rate": 1.2535141549041829e-05, | |
| "loss": 0.9031, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 0.060771652964455934, | |
| "grad_norm": 0.7365604639053345, | |
| "learning_rate": 1.2431389789645399e-05, | |
| "loss": 0.9495, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.060820662362007914, | |
| "grad_norm": 0.7257662415504456, | |
| "learning_rate": 1.232804072204945e-05, | |
| "loss": 0.755, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 0.060869671759559894, | |
| "grad_norm": 0.7510412335395813, | |
| "learning_rate": 1.2225094821513616e-05, | |
| "loss": 0.8904, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.060918681157111874, | |
| "grad_norm": 0.7142137885093689, | |
| "learning_rate": 1.2122552561443456e-05, | |
| "loss": 1.0054, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 0.060967690554663855, | |
| "grad_norm": 1.0007776021957397, | |
| "learning_rate": 1.2020414413388403e-05, | |
| "loss": 1.0645, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.061016699952215835, | |
| "grad_norm": 0.6386979222297668, | |
| "learning_rate": 1.1918680847039554e-05, | |
| "loss": 0.857, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.061065709349767815, | |
| "grad_norm": 0.5809711813926697, | |
| "learning_rate": 1.181735233022746e-05, | |
| "loss": 0.817, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.0611147187473198, | |
| "grad_norm": 0.7974873781204224, | |
| "learning_rate": 1.1716429328919998e-05, | |
| "loss": 0.9746, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 0.06116372814487178, | |
| "grad_norm": 0.6156415939331055, | |
| "learning_rate": 1.1615912307220378e-05, | |
| "loss": 0.8998, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.06121273754242376, | |
| "grad_norm": 1.158423900604248, | |
| "learning_rate": 1.1515801727364727e-05, | |
| "loss": 0.9229, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 0.06126174693997574, | |
| "grad_norm": 0.5545446872711182, | |
| "learning_rate": 1.141609804972017e-05, | |
| "loss": 0.8738, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.06131075633752772, | |
| "grad_norm": 0.7248711585998535, | |
| "learning_rate": 1.1316801732782667e-05, | |
| "loss": 1.0821, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 0.0613597657350797, | |
| "grad_norm": 0.6983115077018738, | |
| "learning_rate": 1.1217913233174915e-05, | |
| "loss": 0.9231, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.06140877513263168, | |
| "grad_norm": 0.5736908316612244, | |
| "learning_rate": 1.1119433005644176e-05, | |
| "loss": 0.8463, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 0.06145778453018366, | |
| "grad_norm": 0.9075560569763184, | |
| "learning_rate": 1.102136150306028e-05, | |
| "loss": 0.7854, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.06150679392773564, | |
| "grad_norm": 0.6925746202468872, | |
| "learning_rate": 1.0923699176413448e-05, | |
| "loss": 1.0055, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.06155580332528762, | |
| "grad_norm": 0.6659478545188904, | |
| "learning_rate": 1.0826446474812368e-05, | |
| "loss": 0.8888, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.0616048127228396, | |
| "grad_norm": 2.326383352279663, | |
| "learning_rate": 1.0729603845481894e-05, | |
| "loss": 1.1274, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 0.06165382212039158, | |
| "grad_norm": 0.6801324486732483, | |
| "learning_rate": 1.0633171733761272e-05, | |
| "loss": 0.9899, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.06170283151794356, | |
| "grad_norm": 0.8603371381759644, | |
| "learning_rate": 1.0537150583101817e-05, | |
| "loss": 1.0472, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 0.06175184091549555, | |
| "grad_norm": 0.7354358434677124, | |
| "learning_rate": 1.04415408350651e-05, | |
| "loss": 1.0156, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.06180085031304753, | |
| "grad_norm": 0.593607485294342, | |
| "learning_rate": 1.0346342929320796e-05, | |
| "loss": 0.8159, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 0.06184985971059951, | |
| "grad_norm": 0.7112290263175964, | |
| "learning_rate": 1.0251557303644665e-05, | |
| "loss": 0.8867, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.06189886910815149, | |
| "grad_norm": 0.7560910582542419, | |
| "learning_rate": 1.0157184393916563e-05, | |
| "loss": 0.8092, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 0.06194787850570347, | |
| "grad_norm": 0.6165046095848083, | |
| "learning_rate": 1.006322463411845e-05, | |
| "loss": 0.9094, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.06199688790325545, | |
| "grad_norm": 1.0106430053710938, | |
| "learning_rate": 9.969678456332409e-06, | |
| "loss": 0.8011, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.06204589730080743, | |
| "grad_norm": 0.7880820035934448, | |
| "learning_rate": 9.876546290738564e-06, | |
| "loss": 1.0987, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.06209490669835941, | |
| "grad_norm": 0.6300559043884277, | |
| "learning_rate": 9.78382856561323e-06, | |
| "loss": 0.7252, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 0.06214391609591139, | |
| "grad_norm": 0.59274822473526, | |
| "learning_rate": 9.691525707326832e-06, | |
| "loss": 0.8166, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.06219292549346337, | |
| "grad_norm": 0.6262571811676025, | |
| "learning_rate": 9.599638140342049e-06, | |
| "loss": 0.9126, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 0.06224193489101535, | |
| "grad_norm": 0.6474040746688843, | |
| "learning_rate": 9.508166287211739e-06, | |
| "loss": 0.8498, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.06229094428856733, | |
| "grad_norm": 0.769927978515625, | |
| "learning_rate": 9.417110568577136e-06, | |
| "loss": 0.9494, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 0.06233995368611931, | |
| "grad_norm": 0.8656934499740601, | |
| "learning_rate": 9.326471403165782e-06, | |
| "loss": 0.9175, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.06238896308367129, | |
| "grad_norm": 0.5913616418838501, | |
| "learning_rate": 9.236249207789705e-06, | |
| "loss": 0.7902, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 0.06243797248122328, | |
| "grad_norm": 0.7363753318786621, | |
| "learning_rate": 9.146444397343457e-06, | |
| "loss": 0.9849, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 0.06248698187877526, | |
| "grad_norm": 0.6947307586669922, | |
| "learning_rate": 9.057057384802181e-06, | |
| "loss": 0.8358, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.06253599127632724, | |
| "grad_norm": 0.7444347739219666, | |
| "learning_rate": 8.968088581219746e-06, | |
| "loss": 0.9795, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.06258500067387922, | |
| "grad_norm": 0.6162583827972412, | |
| "learning_rate": 8.879538395726884e-06, | |
| "loss": 0.8446, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 0.0626340100714312, | |
| "grad_norm": 0.7605543732643127, | |
| "learning_rate": 8.791407235529247e-06, | |
| "loss": 0.9335, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 0.06268301946898318, | |
| "grad_norm": 0.6320784091949463, | |
| "learning_rate": 8.703695505905573e-06, | |
| "loss": 0.999, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 0.06273202886653516, | |
| "grad_norm": 1.867570400238037, | |
| "learning_rate": 8.616403610205814e-06, | |
| "loss": 1.066, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.06278103826408714, | |
| "grad_norm": 0.744623601436615, | |
| "learning_rate": 8.529531949849245e-06, | |
| "loss": 0.8478, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 0.06283004766163912, | |
| "grad_norm": 0.6711301207542419, | |
| "learning_rate": 8.443080924322733e-06, | |
| "loss": 0.7788, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 0.0628790570591911, | |
| "grad_norm": 0.6933310031890869, | |
| "learning_rate": 8.357050931178723e-06, | |
| "loss": 0.8071, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 0.06292806645674308, | |
| "grad_norm": 0.7776893973350525, | |
| "learning_rate": 8.271442366033577e-06, | |
| "loss": 1.0827, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 0.06297707585429506, | |
| "grad_norm": 0.5825070738792419, | |
| "learning_rate": 8.186255622565642e-06, | |
| "loss": 0.9195, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.06302608525184704, | |
| "grad_norm": 0.5672100782394409, | |
| "learning_rate": 8.101491092513513e-06, | |
| "loss": 0.8868, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 0.06307509464939902, | |
| "grad_norm": 0.8170425891876221, | |
| "learning_rate": 8.017149165674199e-06, | |
| "loss": 0.9684, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 0.063124104046951, | |
| "grad_norm": 0.6922415494918823, | |
| "learning_rate": 7.9332302299013e-06, | |
| "loss": 0.7434, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.06317311344450298, | |
| "grad_norm": 0.7274526357650757, | |
| "learning_rate": 7.849734671103259e-06, | |
| "loss": 0.9571, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 0.06322212284205496, | |
| "grad_norm": 0.7111207246780396, | |
| "learning_rate": 7.766662873241614e-06, | |
| "loss": 0.8063, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.06327113223960694, | |
| "grad_norm": 0.8042239546775818, | |
| "learning_rate": 7.684015218329221e-06, | |
| "loss": 1.1872, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 0.06332014163715892, | |
| "grad_norm": 0.6595145463943481, | |
| "learning_rate": 7.601792086428383e-06, | |
| "loss": 0.9201, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 0.0633691510347109, | |
| "grad_norm": 1.050703763961792, | |
| "learning_rate": 7.5199938556492984e-06, | |
| "loss": 1.2614, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 0.0634181604322629, | |
| "grad_norm": 0.5375563502311707, | |
| "learning_rate": 7.438620902148163e-06, | |
| "loss": 0.7987, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 0.06346716982981487, | |
| "grad_norm": 0.7361612319946289, | |
| "learning_rate": 7.357673600125525e-06, | |
| "loss": 1.0086, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.06351617922736685, | |
| "grad_norm": 0.854240357875824, | |
| "learning_rate": 7.277152321824521e-06, | |
| "loss": 0.9092, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 0.06356518862491883, | |
| "grad_norm": 0.5932867527008057, | |
| "learning_rate": 7.197057437529209e-06, | |
| "loss": 0.6281, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 0.06361419802247081, | |
| "grad_norm": 0.7344982624053955, | |
| "learning_rate": 7.117389315562772e-06, | |
| "loss": 0.7349, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 0.0636632074200228, | |
| "grad_norm": 0.6009116172790527, | |
| "learning_rate": 7.0381483222859754e-06, | |
| "loss": 0.7879, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 0.06371221681757477, | |
| "grad_norm": 0.7015933394432068, | |
| "learning_rate": 6.959334822095354e-06, | |
| "loss": 0.9432, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.06376122621512675, | |
| "grad_norm": 0.6471702456474304, | |
| "learning_rate": 6.88094917742157e-06, | |
| "loss": 0.9026, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 0.06381023561267873, | |
| "grad_norm": 0.7404037117958069, | |
| "learning_rate": 6.80299174872775e-06, | |
| "loss": 0.8975, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.06385924501023071, | |
| "grad_norm": 0.7222086787223816, | |
| "learning_rate": 6.725462894507861e-06, | |
| "loss": 0.9482, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 0.0639082544077827, | |
| "grad_norm": 0.8654917478561401, | |
| "learning_rate": 6.648362971285038e-06, | |
| "loss": 0.9204, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 0.06395726380533467, | |
| "grad_norm": 0.6940625309944153, | |
| "learning_rate": 6.571692333609891e-06, | |
| "loss": 0.8663, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.06400627320288665, | |
| "grad_norm": 0.6776160001754761, | |
| "learning_rate": 6.495451334058989e-06, | |
| "loss": 0.8543, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 0.06405528260043863, | |
| "grad_norm": 1.0531834363937378, | |
| "learning_rate": 6.4196403232331e-06, | |
| "loss": 0.8712, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 0.06410429199799061, | |
| "grad_norm": 0.7188198566436768, | |
| "learning_rate": 6.344259649755724e-06, | |
| "loss": 1.0192, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 0.0641533013955426, | |
| "grad_norm": 0.6964803338050842, | |
| "learning_rate": 6.269309660271361e-06, | |
| "loss": 0.8468, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 0.06420231079309457, | |
| "grad_norm": 0.7297788262367249, | |
| "learning_rate": 6.1947906994440195e-06, | |
| "loss": 0.8149, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.06425132019064655, | |
| "grad_norm": 0.8535167574882507, | |
| "learning_rate": 6.1207031099555276e-06, | |
| "loss": 0.9986, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 0.06430032958819853, | |
| "grad_norm": 0.6777470111846924, | |
| "learning_rate": 6.047047232504077e-06, | |
| "loss": 0.8337, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.06434933898575051, | |
| "grad_norm": 0.6274713277816772, | |
| "learning_rate": 5.973823405802581e-06, | |
| "loss": 0.8513, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 0.0643983483833025, | |
| "grad_norm": 0.8152085542678833, | |
| "learning_rate": 5.901031966577097e-06, | |
| "loss": 0.8394, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 0.06444735778085448, | |
| "grad_norm": 0.7193106412887573, | |
| "learning_rate": 5.8286732495653196e-06, | |
| "loss": 0.8796, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.06449636717840646, | |
| "grad_norm": 0.7336921095848083, | |
| "learning_rate": 5.756747587515055e-06, | |
| "loss": 0.8721, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 0.06454537657595844, | |
| "grad_norm": 0.6193203926086426, | |
| "learning_rate": 5.685255311182669e-06, | |
| "loss": 1.0327, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 0.06459438597351042, | |
| "grad_norm": 0.8311421275138855, | |
| "learning_rate": 5.614196749331546e-06, | |
| "loss": 1.0421, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 0.0646433953710624, | |
| "grad_norm": 0.5709735751152039, | |
| "learning_rate": 5.54357222873062e-06, | |
| "loss": 0.8931, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 0.06469240476861438, | |
| "grad_norm": 0.684586763381958, | |
| "learning_rate": 5.4733820741528e-06, | |
| "loss": 0.9502, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.06474141416616636, | |
| "grad_norm": 0.7916139364242554, | |
| "learning_rate": 5.403626608373602e-06, | |
| "loss": 0.8714, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 0.06479042356371835, | |
| "grad_norm": 0.6645509004592896, | |
| "learning_rate": 5.334306152169521e-06, | |
| "loss": 0.9467, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 0.06483943296127033, | |
| "grad_norm": 0.6344390511512756, | |
| "learning_rate": 5.265421024316664e-06, | |
| "loss": 0.6801, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 0.06488844235882231, | |
| "grad_norm": 0.7653583288192749, | |
| "learning_rate": 5.196971541589213e-06, | |
| "loss": 0.8782, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 0.06493745175637429, | |
| "grad_norm": 0.6185832619667053, | |
| "learning_rate": 5.128958018758012e-06, | |
| "loss": 0.8937, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.06498646115392627, | |
| "grad_norm": 0.5814158916473389, | |
| "learning_rate": 5.06138076858913e-06, | |
| "loss": 0.7603, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 0.06503547055147825, | |
| "grad_norm": 0.649247944355011, | |
| "learning_rate": 4.9942401018423625e-06, | |
| "loss": 0.9353, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 0.06508447994903023, | |
| "grad_norm": 0.6215224862098694, | |
| "learning_rate": 4.9275363272698215e-06, | |
| "loss": 0.8805, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 0.06513348934658221, | |
| "grad_norm": 0.8012757301330566, | |
| "learning_rate": 4.861269751614628e-06, | |
| "loss": 0.8529, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 0.06518249874413419, | |
| "grad_norm": 0.621527373790741, | |
| "learning_rate": 4.795440679609298e-06, | |
| "loss": 0.8713, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.06523150814168617, | |
| "grad_norm": 0.7184974551200867, | |
| "learning_rate": 4.73004941397448e-06, | |
| "loss": 0.9106, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 0.06528051753923815, | |
| "grad_norm": 0.7722237706184387, | |
| "learning_rate": 4.665096255417578e-06, | |
| "loss": 0.9184, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 0.06532952693679013, | |
| "grad_norm": 0.9010992050170898, | |
| "learning_rate": 4.600581502631263e-06, | |
| "loss": 0.9899, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 0.06537853633434211, | |
| "grad_norm": 0.762018620967865, | |
| "learning_rate": 4.536505452292206e-06, | |
| "loss": 0.8958, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 0.06542754573189409, | |
| "grad_norm": 0.6331391930580139, | |
| "learning_rate": 4.472868399059626e-06, | |
| "loss": 0.8438, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.06547655512944607, | |
| "grad_norm": 0.828467845916748, | |
| "learning_rate": 4.4096706355740145e-06, | |
| "loss": 0.8307, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 0.06552556452699805, | |
| "grad_norm": 0.6935224533081055, | |
| "learning_rate": 4.34691245245572e-06, | |
| "loss": 0.9423, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 0.06557457392455003, | |
| "grad_norm": 0.6905186176300049, | |
| "learning_rate": 4.284594138303655e-06, | |
| "loss": 0.9069, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 0.06562358332210201, | |
| "grad_norm": 1.1875556707382202, | |
| "learning_rate": 4.22271597969397e-06, | |
| "loss": 0.906, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 0.06567259271965399, | |
| "grad_norm": 0.5741354823112488, | |
| "learning_rate": 4.161278261178714e-06, | |
| "loss": 0.8514, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.06572160211720597, | |
| "grad_norm": 0.6324502825737, | |
| "learning_rate": 4.1002812652845e-06, | |
| "loss": 1.0126, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 0.06577061151475795, | |
| "grad_norm": 0.6402712464332581, | |
| "learning_rate": 4.039725272511308e-06, | |
| "loss": 0.7793, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 0.06581962091230993, | |
| "grad_norm": 0.6293900012969971, | |
| "learning_rate": 3.979610561331071e-06, | |
| "loss": 0.8921, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 0.06586863030986191, | |
| "grad_norm": 0.9233715534210205, | |
| "learning_rate": 3.919937408186447e-06, | |
| "loss": 0.9569, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.06591763970741389, | |
| "grad_norm": 0.6657326221466064, | |
| "learning_rate": 3.860706087489607e-06, | |
| "loss": 0.9189, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.06596664910496587, | |
| "grad_norm": 0.6525406837463379, | |
| "learning_rate": 3.801916871620881e-06, | |
| "loss": 0.884, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 0.06601565850251785, | |
| "grad_norm": 0.8163220882415771, | |
| "learning_rate": 3.7435700309275345e-06, | |
| "loss": 1.1409, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 0.06606466790006983, | |
| "grad_norm": 0.8402869701385498, | |
| "learning_rate": 3.6856658337225405e-06, | |
| "loss": 1.0371, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 0.06611367729762183, | |
| "grad_norm": 0.8356976509094238, | |
| "learning_rate": 3.6282045462833427e-06, | |
| "loss": 0.7902, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 0.0661626866951738, | |
| "grad_norm": 0.6106687188148499, | |
| "learning_rate": 3.571186432850626e-06, | |
| "loss": 0.8288, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.06621169609272579, | |
| "grad_norm": 0.7321956157684326, | |
| "learning_rate": 3.514611755627084e-06, | |
| "loss": 1.0251, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 0.06626070549027777, | |
| "grad_norm": 0.7708463072776794, | |
| "learning_rate": 3.458480774776274e-06, | |
| "loss": 1.0283, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 0.06630971488782975, | |
| "grad_norm": 0.7667643427848816, | |
| "learning_rate": 3.402793748421318e-06, | |
| "loss": 0.9402, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 0.06635872428538173, | |
| "grad_norm": 0.7186606526374817, | |
| "learning_rate": 3.3475509326438283e-06, | |
| "loss": 0.9398, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 0.0664077336829337, | |
| "grad_norm": 0.9855289459228516, | |
| "learning_rate": 3.29275258148265e-06, | |
| "loss": 1.1716, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.06645674308048569, | |
| "grad_norm": 0.7203282117843628, | |
| "learning_rate": 3.238398946932719e-06, | |
| "loss": 0.9221, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 0.06650575247803767, | |
| "grad_norm": 0.6085039377212524, | |
| "learning_rate": 3.184490278943897e-06, | |
| "loss": 0.8337, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 0.06655476187558965, | |
| "grad_norm": 0.8711732625961304, | |
| "learning_rate": 3.131026825419858e-06, | |
| "loss": 1.0989, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 0.06660377127314163, | |
| "grad_norm": 0.5561559200286865, | |
| "learning_rate": 3.078008832216894e-06, | |
| "loss": 0.7172, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 0.0666527806706936, | |
| "grad_norm": 1.0216760635375977, | |
| "learning_rate": 3.0254365431428013e-06, | |
| "loss": 1.0683, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.06670179006824559, | |
| "grad_norm": 0.6487326622009277, | |
| "learning_rate": 2.9733101999558142e-06, | |
| "loss": 0.9112, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 0.06675079946579757, | |
| "grad_norm": 0.643084704875946, | |
| "learning_rate": 2.9216300423633767e-06, | |
| "loss": 0.9172, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 0.06679980886334955, | |
| "grad_norm": 0.8597632646560669, | |
| "learning_rate": 2.8703963080211837e-06, | |
| "loss": 1.1109, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 0.06684881826090153, | |
| "grad_norm": 0.6921860575675964, | |
| "learning_rate": 2.819609232531939e-06, | |
| "loss": 0.8207, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 0.06689782765845351, | |
| "grad_norm": 0.8340615034103394, | |
| "learning_rate": 2.7692690494444227e-06, | |
| "loss": 0.7822, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.06694683705600549, | |
| "grad_norm": 0.8636243939399719, | |
| "learning_rate": 2.719375990252282e-06, | |
| "loss": 0.9578, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 0.06699584645355747, | |
| "grad_norm": 0.8949651122093201, | |
| "learning_rate": 2.669930284393052e-06, | |
| "loss": 1.0168, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 0.06704485585110945, | |
| "grad_norm": 0.6321136951446533, | |
| "learning_rate": 2.6209321592470804e-06, | |
| "loss": 0.7191, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 0.06709386524866143, | |
| "grad_norm": 0.5103330612182617, | |
| "learning_rate": 2.572381840136462e-06, | |
| "loss": 0.728, | |
| "step": 1369 | |
| }, | |
| { | |
| "epoch": 0.06714287464621341, | |
| "grad_norm": 0.6573939323425293, | |
| "learning_rate": 2.524279550324027e-06, | |
| "loss": 0.85, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.06719188404376539, | |
| "grad_norm": 0.6426239609718323, | |
| "learning_rate": 2.476625511012287e-06, | |
| "loss": 0.8577, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 0.06724089344131737, | |
| "grad_norm": 0.6823493838310242, | |
| "learning_rate": 2.42941994134247e-06, | |
| "loss": 0.9067, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 0.06728990283886935, | |
| "grad_norm": 0.5893593430519104, | |
| "learning_rate": 2.382663058393442e-06, | |
| "loss": 0.657, | |
| "step": 1373 | |
| }, | |
| { | |
| "epoch": 0.06733891223642133, | |
| "grad_norm": 0.7312384843826294, | |
| "learning_rate": 2.336355077180774e-06, | |
| "loss": 0.9672, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 0.06738792163397331, | |
| "grad_norm": 0.7566931247711182, | |
| "learning_rate": 2.2904962106556793e-06, | |
| "loss": 1.0775, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.0674369310315253, | |
| "grad_norm": 0.8246945738792419, | |
| "learning_rate": 2.245086669704144e-06, | |
| "loss": 0.9501, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 0.06748594042907728, | |
| "grad_norm": 1.02047598361969, | |
| "learning_rate": 2.2001266631458186e-06, | |
| "loss": 1.0639, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 0.06753494982662926, | |
| "grad_norm": 0.6248430013656616, | |
| "learning_rate": 2.1556163977331958e-06, | |
| "loss": 0.9738, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 0.06758395922418124, | |
| "grad_norm": 0.5300363898277283, | |
| "learning_rate": 2.1115560781505562e-06, | |
| "loss": 0.8054, | |
| "step": 1379 | |
| }, | |
| { | |
| "epoch": 0.06763296862173322, | |
| "grad_norm": 0.6251785755157471, | |
| "learning_rate": 2.067945907013069e-06, | |
| "loss": 0.9462, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.0676819780192852, | |
| "grad_norm": 0.6517499089241028, | |
| "learning_rate": 2.0247860848658815e-06, | |
| "loss": 0.8846, | |
| "step": 1381 | |
| }, | |
| { | |
| "epoch": 0.06773098741683718, | |
| "grad_norm": 0.703231930732727, | |
| "learning_rate": 1.982076810183153e-06, | |
| "loss": 0.8759, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 0.06777999681438916, | |
| "grad_norm": 0.6347089409828186, | |
| "learning_rate": 1.9398182793671447e-06, | |
| "loss": 0.8239, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 0.06782900621194114, | |
| "grad_norm": 0.7737944722175598, | |
| "learning_rate": 1.8980106867473536e-06, | |
| "loss": 0.9956, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 0.06787801560949312, | |
| "grad_norm": 0.6915851831436157, | |
| "learning_rate": 1.8566542245796347e-06, | |
| "loss": 0.8786, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.0679270250070451, | |
| "grad_norm": 0.6760699152946472, | |
| "learning_rate": 1.815749083045193e-06, | |
| "loss": 0.8288, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 0.06797603440459708, | |
| "grad_norm": 0.5723273158073425, | |
| "learning_rate": 1.775295450249892e-06, | |
| "loss": 0.9204, | |
| "step": 1387 | |
| }, | |
| { | |
| "epoch": 0.06802504380214906, | |
| "grad_norm": 0.880534291267395, | |
| "learning_rate": 1.7352935122232128e-06, | |
| "loss": 0.7802, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 0.06807405319970104, | |
| "grad_norm": 0.8262280821800232, | |
| "learning_rate": 1.6957434529175309e-06, | |
| "loss": 0.8427, | |
| "step": 1389 | |
| }, | |
| { | |
| "epoch": 0.06812306259725302, | |
| "grad_norm": 0.5815930366516113, | |
| "learning_rate": 1.6566454542071951e-06, | |
| "loss": 0.8462, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.068172071994805, | |
| "grad_norm": 0.768941342830658, | |
| "learning_rate": 1.6179996958877397e-06, | |
| "loss": 0.9763, | |
| "step": 1391 | |
| }, | |
| { | |
| "epoch": 0.06822108139235698, | |
| "grad_norm": 0.5803397297859192, | |
| "learning_rate": 1.5798063556749954e-06, | |
| "loss": 0.8994, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 0.06827009078990896, | |
| "grad_norm": 0.745637834072113, | |
| "learning_rate": 1.5420656092043352e-06, | |
| "loss": 0.8625, | |
| "step": 1393 | |
| }, | |
| { | |
| "epoch": 0.06831910018746094, | |
| "grad_norm": 1.1419793367385864, | |
| "learning_rate": 1.5047776300298411e-06, | |
| "loss": 0.9221, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 0.06836810958501292, | |
| "grad_norm": 0.8074661493301392, | |
| "learning_rate": 1.4679425896234833e-06, | |
| "loss": 1.1161, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.0684171189825649, | |
| "grad_norm": 0.5724306106567383, | |
| "learning_rate": 1.4315606573743755e-06, | |
| "loss": 0.9047, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 0.06846612838011688, | |
| "grad_norm": 1.6764434576034546, | |
| "learning_rate": 1.3956320005879765e-06, | |
| "loss": 0.9669, | |
| "step": 1397 | |
| }, | |
| { | |
| "epoch": 0.06851513777766886, | |
| "grad_norm": 0.7271180748939514, | |
| "learning_rate": 1.3601567844853114e-06, | |
| "loss": 0.9313, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 0.06856414717522084, | |
| "grad_norm": 0.6413068175315857, | |
| "learning_rate": 1.3251351722021964e-06, | |
| "loss": 0.8178, | |
| "step": 1399 | |
| }, | |
| { | |
| "epoch": 0.06861315657277282, | |
| "grad_norm": 0.6867620348930359, | |
| "learning_rate": 1.2905673247885718e-06, | |
| "loss": 0.9873, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.0686621659703248, | |
| "grad_norm": 0.6367160677909851, | |
| "learning_rate": 1.2564534012076245e-06, | |
| "loss": 0.9447, | |
| "step": 1401 | |
| }, | |
| { | |
| "epoch": 0.06871117536787678, | |
| "grad_norm": 0.6995184421539307, | |
| "learning_rate": 1.222793558335189e-06, | |
| "loss": 0.9478, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 0.06876018476542878, | |
| "grad_norm": 0.6374393701553345, | |
| "learning_rate": 1.1895879509589592e-06, | |
| "loss": 0.8458, | |
| "step": 1403 | |
| }, | |
| { | |
| "epoch": 0.06880919416298076, | |
| "grad_norm": 0.8276384472846985, | |
| "learning_rate": 1.1568367317777662e-06, | |
| "loss": 0.8084, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 0.06885820356053274, | |
| "grad_norm": 1.0978977680206299, | |
| "learning_rate": 1.1245400514009351e-06, | |
| "loss": 1.1747, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.06890721295808472, | |
| "grad_norm": 0.7997610569000244, | |
| "learning_rate": 1.0926980583475076e-06, | |
| "loss": 0.7353, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 0.0689562223556367, | |
| "grad_norm": 0.902769923210144, | |
| "learning_rate": 1.0613108990456643e-06, | |
| "loss": 0.7245, | |
| "step": 1407 | |
| }, | |
| { | |
| "epoch": 0.06900523175318868, | |
| "grad_norm": 0.5651756525039673, | |
| "learning_rate": 1.0303787178319368e-06, | |
| "loss": 0.9087, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 0.06905424115074066, | |
| "grad_norm": 0.6706457138061523, | |
| "learning_rate": 9.999016569506304e-07, | |
| "loss": 1.0007, | |
| "step": 1409 | |
| }, | |
| { | |
| "epoch": 0.06910325054829264, | |
| "grad_norm": 0.6027235984802246, | |
| "learning_rate": 9.698798565531464e-07, | |
| "loss": 0.8687, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.06915225994584462, | |
| "grad_norm": 0.7728469371795654, | |
| "learning_rate": 9.403134546973058e-07, | |
| "loss": 0.6449, | |
| "step": 1411 | |
| }, | |
| { | |
| "epoch": 0.0692012693433966, | |
| "grad_norm": 0.6937982439994812, | |
| "learning_rate": 9.112025873467711e-07, | |
| "loss": 0.8034, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 0.06925027874094858, | |
| "grad_norm": 0.7616922855377197, | |
| "learning_rate": 8.825473883703695e-07, | |
| "loss": 1.0196, | |
| "step": 1413 | |
| }, | |
| { | |
| "epoch": 0.06929928813850056, | |
| "grad_norm": 0.6613472104072571, | |
| "learning_rate": 8.543479895415041e-07, | |
| "loss": 0.9019, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 0.06934829753605254, | |
| "grad_norm": 0.751873791217804, | |
| "learning_rate": 8.266045205375328e-07, | |
| "loss": 0.9623, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.06939730693360452, | |
| "grad_norm": 0.6417289972305298, | |
| "learning_rate": 7.993171089391905e-07, | |
| "loss": 1.0231, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 0.0694463163311565, | |
| "grad_norm": 0.7124348878860474, | |
| "learning_rate": 7.724858802300006e-07, | |
| "loss": 0.9813, | |
| "step": 1417 | |
| }, | |
| { | |
| "epoch": 0.06949532572870848, | |
| "grad_norm": 0.7025142908096313, | |
| "learning_rate": 7.461109577956648e-07, | |
| "loss": 0.9227, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 0.06954433512626046, | |
| "grad_norm": 0.9039905071258545, | |
| "learning_rate": 7.201924629235524e-07, | |
| "loss": 1.0741, | |
| "step": 1419 | |
| }, | |
| { | |
| "epoch": 0.06959334452381244, | |
| "grad_norm": 0.6852249503135681, | |
| "learning_rate": 6.947305148020889e-07, | |
| "loss": 0.985, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.06964235392136442, | |
| "grad_norm": 0.8314114809036255, | |
| "learning_rate": 6.697252305202461e-07, | |
| "loss": 0.8991, | |
| "step": 1421 | |
| }, | |
| { | |
| "epoch": 0.0696913633189164, | |
| "grad_norm": 0.6999631524085999, | |
| "learning_rate": 6.451767250669538e-07, | |
| "loss": 0.9926, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 0.06974037271646838, | |
| "grad_norm": 0.6860081553459167, | |
| "learning_rate": 6.210851113306548e-07, | |
| "loss": 0.9839, | |
| "step": 1423 | |
| }, | |
| { | |
| "epoch": 0.06978938211402036, | |
| "grad_norm": 0.6958408355712891, | |
| "learning_rate": 5.974505000987062e-07, | |
| "loss": 0.9177, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 0.06983839151157234, | |
| "grad_norm": 0.9045180082321167, | |
| "learning_rate": 5.742730000568908e-07, | |
| "loss": 1.0029, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.06988740090912432, | |
| "grad_norm": 0.6793363690376282, | |
| "learning_rate": 5.515527177889501e-07, | |
| "loss": 0.9631, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 0.0699364103066763, | |
| "grad_norm": 0.7411555647850037, | |
| "learning_rate": 5.292897577760747e-07, | |
| "loss": 1.0294, | |
| "step": 1427 | |
| }, | |
| { | |
| "epoch": 0.06998541970422828, | |
| "grad_norm": 0.7518298625946045, | |
| "learning_rate": 5.074842223963816e-07, | |
| "loss": 0.8648, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 0.07003442910178026, | |
| "grad_norm": 0.8642678260803223, | |
| "learning_rate": 4.861362119245039e-07, | |
| "loss": 1.0929, | |
| "step": 1429 | |
| }, | |
| { | |
| "epoch": 0.07008343849933224, | |
| "grad_norm": 0.7154537439346313, | |
| "learning_rate": 4.652458245311242e-07, | |
| "loss": 0.8986, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.07013244789688423, | |
| "grad_norm": 0.6140034794807434, | |
| "learning_rate": 4.448131562824864e-07, | |
| "loss": 1.0537, | |
| "step": 1431 | |
| }, | |
| { | |
| "epoch": 0.07018145729443621, | |
| "grad_norm": 0.6018050909042358, | |
| "learning_rate": 4.248383011399626e-07, | |
| "loss": 0.8818, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 0.0702304666919882, | |
| "grad_norm": 0.5931532979011536, | |
| "learning_rate": 4.053213509596532e-07, | |
| "loss": 0.8915, | |
| "step": 1433 | |
| }, | |
| { | |
| "epoch": 0.07027947608954017, | |
| "grad_norm": 0.5891003012657166, | |
| "learning_rate": 3.862623954919431e-07, | |
| "loss": 0.9231, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 0.07032848548709215, | |
| "grad_norm": 0.7145758867263794, | |
| "learning_rate": 3.6766152238106865e-07, | |
| "loss": 0.8431, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.07037749488464413, | |
| "grad_norm": 0.7791634798049927, | |
| "learning_rate": 3.495188171647512e-07, | |
| "loss": 0.8727, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 0.07042650428219611, | |
| "grad_norm": 0.6540659070014954, | |
| "learning_rate": 3.3183436327379744e-07, | |
| "loss": 0.8934, | |
| "step": 1437 | |
| }, | |
| { | |
| "epoch": 0.0704755136797481, | |
| "grad_norm": 0.677821934223175, | |
| "learning_rate": 3.146082420316776e-07, | |
| "loss": 1.0389, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 0.07052452307730007, | |
| "grad_norm": 0.5481241941452026, | |
| "learning_rate": 2.978405326541922e-07, | |
| "loss": 0.8784, | |
| "step": 1439 | |
| }, | |
| { | |
| "epoch": 0.07057353247485205, | |
| "grad_norm": 0.6913087368011475, | |
| "learning_rate": 2.81531312249117e-07, | |
| "loss": 0.8306, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.07062254187240403, | |
| "grad_norm": 0.7169864177703857, | |
| "learning_rate": 2.6568065581579207e-07, | |
| "loss": 0.6832, | |
| "step": 1441 | |
| }, | |
| { | |
| "epoch": 0.07067155126995601, | |
| "grad_norm": 0.6380186080932617, | |
| "learning_rate": 2.5028863624482204e-07, | |
| "loss": 0.9776, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 0.070720560667508, | |
| "grad_norm": 0.7633563280105591, | |
| "learning_rate": 2.353553243177542e-07, | |
| "loss": 0.9383, | |
| "step": 1443 | |
| }, | |
| { | |
| "epoch": 0.07076957006505998, | |
| "grad_norm": 0.6005712747573853, | |
| "learning_rate": 2.2088078870668994e-07, | |
| "loss": 0.8413, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 0.07081857946261196, | |
| "grad_norm": 1.0024440288543701, | |
| "learning_rate": 2.0686509597404037e-07, | |
| "loss": 0.8163, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.07086758886016394, | |
| "grad_norm": 0.8160889148712158, | |
| "learning_rate": 1.9330831057218223e-07, | |
| "loss": 0.9675, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 0.07091659825771592, | |
| "grad_norm": 0.77981036901474, | |
| "learning_rate": 1.8021049484314712e-07, | |
| "loss": 0.9325, | |
| "step": 1447 | |
| }, | |
| { | |
| "epoch": 0.0709656076552679, | |
| "grad_norm": 0.6795889735221863, | |
| "learning_rate": 1.6757170901837703e-07, | |
| "loss": 0.9341, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 0.07101461705281988, | |
| "grad_norm": 0.7234419584274292, | |
| "learning_rate": 1.5539201121841373e-07, | |
| "loss": 0.7137, | |
| "step": 1449 | |
| }, | |
| { | |
| "epoch": 0.07106362645037186, | |
| "grad_norm": 0.6544097661972046, | |
| "learning_rate": 1.436714574526543e-07, | |
| "loss": 0.8777, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.07111263584792384, | |
| "grad_norm": 0.7138416171073914, | |
| "learning_rate": 1.3241010161907375e-07, | |
| "loss": 0.8288, | |
| "step": 1451 | |
| }, | |
| { | |
| "epoch": 0.07116164524547582, | |
| "grad_norm": 0.6424012184143066, | |
| "learning_rate": 1.216079955039806e-07, | |
| "loss": 1.0348, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 0.0712106546430278, | |
| "grad_norm": 0.6267834901809692, | |
| "learning_rate": 1.1126518878179504e-07, | |
| "loss": 0.9566, | |
| "step": 1453 | |
| }, | |
| { | |
| "epoch": 0.07125966404057978, | |
| "grad_norm": 0.846564531326294, | |
| "learning_rate": 1.0138172901480447e-07, | |
| "loss": 0.9682, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 0.07130867343813176, | |
| "grad_norm": 1.1881537437438965, | |
| "learning_rate": 9.195766165295272e-08, | |
| "loss": 1.1735, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.07135768283568374, | |
| "grad_norm": 0.5580865144729614, | |
| "learning_rate": 8.299303003361791e-08, | |
| "loss": 0.7716, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 0.07140669223323572, | |
| "grad_norm": 0.7786732912063599, | |
| "learning_rate": 7.448787538144597e-08, | |
| "loss": 0.7723, | |
| "step": 1457 | |
| }, | |
| { | |
| "epoch": 0.07145570163078771, | |
| "grad_norm": 0.7427374720573425, | |
| "learning_rate": 6.644223680810635e-08, | |
| "loss": 0.8426, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 0.07150471102833969, | |
| "grad_norm": 0.7211986184120178, | |
| "learning_rate": 5.885615131216993e-08, | |
| "loss": 0.8432, | |
| "step": 1459 | |
| }, | |
| { | |
| "epoch": 0.07155372042589167, | |
| "grad_norm": 0.7002434730529785, | |
| "learning_rate": 5.172965377890915e-08, | |
| "loss": 0.9946, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.07160272982344365, | |
| "grad_norm": 0.5924804210662842, | |
| "learning_rate": 4.50627769801315e-08, | |
| "loss": 0.9195, | |
| "step": 1461 | |
| }, | |
| { | |
| "epoch": 0.07165173922099563, | |
| "grad_norm": 0.7211676239967346, | |
| "learning_rate": 3.88555515740463e-08, | |
| "loss": 0.7864, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 0.07170074861854761, | |
| "grad_norm": 0.5475707650184631, | |
| "learning_rate": 3.310800610510922e-08, | |
| "loss": 0.9374, | |
| "step": 1463 | |
| }, | |
| { | |
| "epoch": 0.07174975801609959, | |
| "grad_norm": 0.8015599846839905, | |
| "learning_rate": 2.7820167003911324e-08, | |
| "loss": 0.8218, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 0.07179876741365157, | |
| "grad_norm": 0.7530079483985901, | |
| "learning_rate": 2.2992058587023578e-08, | |
| "loss": 0.7691, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.07184777681120355, | |
| "grad_norm": 0.6978147625923157, | |
| "learning_rate": 1.862370305694139e-08, | |
| "loss": 0.7911, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 0.07189678620875553, | |
| "grad_norm": 1.2115317583084106, | |
| "learning_rate": 1.4715120501895829e-08, | |
| "loss": 0.9976, | |
| "step": 1467 | |
| }, | |
| { | |
| "epoch": 0.07194579560630751, | |
| "grad_norm": 0.6246762275695801, | |
| "learning_rate": 1.1266328895864764e-08, | |
| "loss": 1.0398, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 0.07199480500385949, | |
| "grad_norm": 0.86592036485672, | |
| "learning_rate": 8.277344098406303e-09, | |
| "loss": 1.0155, | |
| "step": 1469 | |
| }, | |
| { | |
| "epoch": 0.07204381440141147, | |
| "grad_norm": 0.6934499740600586, | |
| "learning_rate": 5.7481798546144e-09, | |
| "loss": 0.8013, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.07209282379896345, | |
| "grad_norm": 0.7887607216835022, | |
| "learning_rate": 3.678847795085538e-09, | |
| "loss": 0.9494, | |
| "step": 1471 | |
| }, | |
| { | |
| "epoch": 0.07214183319651543, | |
| "grad_norm": 0.7494127154350281, | |
| "learning_rate": 2.069357435796615e-09, | |
| "loss": 0.8994, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 0.07219084259406741, | |
| "grad_norm": 0.6598345637321472, | |
| "learning_rate": 9.197161781604458e-10, | |
| "loss": 0.8317, | |
| "step": 1473 | |
| }, | |
| { | |
| "epoch": 0.07223985199161939, | |
| "grad_norm": 0.6405826807022095, | |
| "learning_rate": 2.2992930888143804e-10, | |
| "loss": 0.8109, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 0.07228886138917137, | |
| "grad_norm": 0.7148447632789612, | |
| "learning_rate": 0.0, | |
| "loss": 0.9846, | |
| "step": 1475 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1475, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 369, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.063103052008653e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |