diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,50282 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 7177, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00013934369121438027, + "grad_norm": 62.34126663208008, + "learning_rate": 0.0, + "loss": 0.7340087890625, + "step": 1 + }, + { + "epoch": 0.00027868738242876054, + "grad_norm": 76.04585266113281, + "learning_rate": 7.8125e-08, + "loss": 0.74371337890625, + "step": 2 + }, + { + "epoch": 0.0004180310736431408, + "grad_norm": 74.93118286132812, + "learning_rate": 1.5625e-07, + "loss": 0.74176025390625, + "step": 3 + }, + { + "epoch": 0.0005573747648575211, + "grad_norm": 76.86418914794922, + "learning_rate": 2.3437500000000003e-07, + "loss": 0.74249267578125, + "step": 4 + }, + { + "epoch": 0.0006967184560719013, + "grad_norm": 80.23001098632812, + "learning_rate": 3.125e-07, + "loss": 0.74761962890625, + "step": 5 + }, + { + "epoch": 0.0008360621472862816, + "grad_norm": 74.72649383544922, + "learning_rate": 3.90625e-07, + "loss": 0.7421875, + "step": 6 + }, + { + "epoch": 0.0009754058385006619, + "grad_norm": 81.1721420288086, + "learning_rate": 4.6875000000000006e-07, + "loss": 0.74530029296875, + "step": 7 + }, + { + "epoch": 0.0011147495297150422, + "grad_norm": 78.1029052734375, + "learning_rate": 5.468750000000001e-07, + "loss": 0.74542236328125, + "step": 8 + }, + { + "epoch": 0.0012540932209294225, + "grad_norm": 77.72759246826172, + "learning_rate": 6.25e-07, + "loss": 0.7427978515625, + "step": 9 + }, + { + "epoch": 0.0013934369121438026, + "grad_norm": 79.11392211914062, + "learning_rate": 7.03125e-07, + "loss": 0.74542236328125, + "step": 10 + }, + { + "epoch": 0.001532780603358183, + "grad_norm": 77.76611328125, + "learning_rate": 7.8125e-07, + "loss": 0.74395751953125, + "step": 11 + }, + { + "epoch": 0.0016721242945725633, + "grad_norm": 75.68639373779297, + "learning_rate": 8.59375e-07, + "loss": 0.7366943359375, + "step": 12 + }, + { + "epoch": 0.0018114679857869436, + "grad_norm": 74.71607971191406, + "learning_rate": 9.375000000000001e-07, + "loss": 0.73565673828125, + "step": 13 + }, + { + "epoch": 0.0019508116770013237, + "grad_norm": 76.45935821533203, + "learning_rate": 1.0156250000000001e-06, + "loss": 0.73614501953125, + "step": 14 + }, + { + "epoch": 0.0020901553682157042, + "grad_norm": 74.24329376220703, + "learning_rate": 1.0937500000000001e-06, + "loss": 0.7337646484375, + "step": 15 + }, + { + "epoch": 0.0022294990594300844, + "grad_norm": 72.44110870361328, + "learning_rate": 1.1718750000000001e-06, + "loss": 0.7198486328125, + "step": 16 + }, + { + "epoch": 0.0023688427506444645, + "grad_norm": 70.95374298095703, + "learning_rate": 1.25e-06, + "loss": 0.72052001953125, + "step": 17 + }, + { + "epoch": 0.002508186441858845, + "grad_norm": 74.66172790527344, + "learning_rate": 1.328125e-06, + "loss": 0.72174072265625, + "step": 18 + }, + { + "epoch": 0.002647530133073225, + "grad_norm": 77.27931213378906, + "learning_rate": 1.40625e-06, + "loss": 0.71795654296875, + "step": 19 + }, + { + "epoch": 0.0027868738242876052, + "grad_norm": 68.34194946289062, + "learning_rate": 1.484375e-06, + "loss": 0.71368408203125, + "step": 20 + }, + { + "epoch": 0.0029262175155019858, + "grad_norm": 70.93338012695312, + "learning_rate": 1.5625e-06, + "loss": 0.71026611328125, + "step": 21 + }, + { + "epoch": 0.003065561206716366, + "grad_norm": 66.80960083007812, + "learning_rate": 1.640625e-06, + "loss": 0.66912841796875, + "step": 22 + }, + { + "epoch": 0.003204904897930746, + "grad_norm": 68.910888671875, + "learning_rate": 1.71875e-06, + "loss": 0.66455078125, + "step": 23 + }, + { + "epoch": 0.0033442485891451265, + "grad_norm": 64.29890441894531, + "learning_rate": 1.796875e-06, + "loss": 0.665283203125, + "step": 24 + }, + { + "epoch": 0.0034835922803595066, + "grad_norm": 62.51341247558594, + "learning_rate": 1.8750000000000003e-06, + "loss": 0.665771484375, + "step": 25 + }, + { + "epoch": 0.003622935971573887, + "grad_norm": 62.852195739746094, + "learning_rate": 1.953125e-06, + "loss": 0.650634765625, + "step": 26 + }, + { + "epoch": 0.0037622796627882673, + "grad_norm": 65.60198974609375, + "learning_rate": 2.0312500000000002e-06, + "loss": 0.64501953125, + "step": 27 + }, + { + "epoch": 0.0039016233540026474, + "grad_norm": 68.4287338256836, + "learning_rate": 2.109375e-06, + "loss": 0.640380859375, + "step": 28 + }, + { + "epoch": 0.0040409670452170275, + "grad_norm": 64.33171844482422, + "learning_rate": 2.1875000000000002e-06, + "loss": 0.63739013671875, + "step": 29 + }, + { + "epoch": 0.0041803107364314085, + "grad_norm": 54.46392822265625, + "learning_rate": 2.265625e-06, + "loss": 0.556640625, + "step": 30 + }, + { + "epoch": 0.004319654427645789, + "grad_norm": 51.8431396484375, + "learning_rate": 2.3437500000000002e-06, + "loss": 0.552490234375, + "step": 31 + }, + { + "epoch": 0.004458998118860169, + "grad_norm": 54.7401123046875, + "learning_rate": 2.421875e-06, + "loss": 0.54986572265625, + "step": 32 + }, + { + "epoch": 0.004598341810074549, + "grad_norm": 51.9644775390625, + "learning_rate": 2.5e-06, + "loss": 0.536529541015625, + "step": 33 + }, + { + "epoch": 0.004737685501288929, + "grad_norm": 42.461185455322266, + "learning_rate": 2.5781250000000004e-06, + "loss": 0.55792236328125, + "step": 34 + }, + { + "epoch": 0.004877029192503309, + "grad_norm": 46.63920211791992, + "learning_rate": 2.65625e-06, + "loss": 0.53765869140625, + "step": 35 + }, + { + "epoch": 0.00501637288371769, + "grad_norm": 45.58441162109375, + "learning_rate": 2.7343750000000004e-06, + "loss": 0.5164794921875, + "step": 36 + }, + { + "epoch": 0.00515571657493207, + "grad_norm": 38.332000732421875, + "learning_rate": 2.8125e-06, + "loss": 0.52667236328125, + "step": 37 + }, + { + "epoch": 0.00529506026614645, + "grad_norm": 39.08782958984375, + "learning_rate": 2.8906250000000004e-06, + "loss": 0.51177978515625, + "step": 38 + }, + { + "epoch": 0.00543440395736083, + "grad_norm": 44.8343620300293, + "learning_rate": 2.96875e-06, + "loss": 0.4725341796875, + "step": 39 + }, + { + "epoch": 0.0055737476485752105, + "grad_norm": 38.693748474121094, + "learning_rate": 3.0468750000000004e-06, + "loss": 0.4581298828125, + "step": 40 + }, + { + "epoch": 0.005713091339789591, + "grad_norm": 25.418514251708984, + "learning_rate": 3.125e-06, + "loss": 0.457122802734375, + "step": 41 + }, + { + "epoch": 0.0058524350310039715, + "grad_norm": 24.029638290405273, + "learning_rate": 3.2031250000000004e-06, + "loss": 0.43280029296875, + "step": 42 + }, + { + "epoch": 0.005991778722218352, + "grad_norm": 16.478883743286133, + "learning_rate": 3.28125e-06, + "loss": 0.46063232421875, + "step": 43 + }, + { + "epoch": 0.006131122413432732, + "grad_norm": 22.53028678894043, + "learning_rate": 3.3593750000000003e-06, + "loss": 0.411407470703125, + "step": 44 + }, + { + "epoch": 0.006270466104647112, + "grad_norm": 12.247846603393555, + "learning_rate": 3.4375e-06, + "loss": 0.466552734375, + "step": 45 + }, + { + "epoch": 0.006409809795861492, + "grad_norm": 20.37785530090332, + "learning_rate": 3.5156250000000003e-06, + "loss": 0.40386962890625, + "step": 46 + }, + { + "epoch": 0.006549153487075873, + "grad_norm": 8.093513488769531, + "learning_rate": 3.59375e-06, + "loss": 0.474212646484375, + "step": 47 + }, + { + "epoch": 0.006688497178290253, + "grad_norm": 13.000631332397461, + "learning_rate": 3.6718750000000003e-06, + "loss": 0.42449951171875, + "step": 48 + }, + { + "epoch": 0.006827840869504633, + "grad_norm": 17.221866607666016, + "learning_rate": 3.7500000000000005e-06, + "loss": 0.386322021484375, + "step": 49 + }, + { + "epoch": 0.006967184560719013, + "grad_norm": 7.733644485473633, + "learning_rate": 3.828125000000001e-06, + "loss": 0.436676025390625, + "step": 50 + }, + { + "epoch": 0.007106528251933393, + "grad_norm": 11.239167213439941, + "learning_rate": 3.90625e-06, + "loss": 0.4019317626953125, + "step": 51 + }, + { + "epoch": 0.007245871943147774, + "grad_norm": 4.6299285888671875, + "learning_rate": 3.984375e-06, + "loss": 0.436676025390625, + "step": 52 + }, + { + "epoch": 0.0073852156343621545, + "grad_norm": 10.800525665283203, + "learning_rate": 4.0625000000000005e-06, + "loss": 0.3664703369140625, + "step": 53 + }, + { + "epoch": 0.007524559325576535, + "grad_norm": 4.318990230560303, + "learning_rate": 4.140625000000001e-06, + "loss": 0.4152679443359375, + "step": 54 + }, + { + "epoch": 0.007663903016790915, + "grad_norm": 6.919592380523682, + "learning_rate": 4.21875e-06, + "loss": 0.376708984375, + "step": 55 + }, + { + "epoch": 0.007803246708005295, + "grad_norm": 5.507626056671143, + "learning_rate": 4.296875e-06, + "loss": 0.4770355224609375, + "step": 56 + }, + { + "epoch": 0.007942590399219676, + "grad_norm": 1.4001505374908447, + "learning_rate": 4.3750000000000005e-06, + "loss": 0.4373016357421875, + "step": 57 + }, + { + "epoch": 0.008081934090434055, + "grad_norm": 3.9668099880218506, + "learning_rate": 4.453125000000001e-06, + "loss": 0.38494873046875, + "step": 58 + }, + { + "epoch": 0.008221277781648436, + "grad_norm": 4.9355902671813965, + "learning_rate": 4.53125e-06, + "loss": 0.3683013916015625, + "step": 59 + }, + { + "epoch": 0.008360621472862817, + "grad_norm": 1.801935076713562, + "learning_rate": 4.609375e-06, + "loss": 0.40863037109375, + "step": 60 + }, + { + "epoch": 0.008499965164077196, + "grad_norm": 2.3567073345184326, + "learning_rate": 4.6875000000000004e-06, + "loss": 0.413116455078125, + "step": 61 + }, + { + "epoch": 0.008639308855291577, + "grad_norm": 3.354320526123047, + "learning_rate": 4.765625000000001e-06, + "loss": 0.3906707763671875, + "step": 62 + }, + { + "epoch": 0.008778652546505956, + "grad_norm": 2.8657212257385254, + "learning_rate": 4.84375e-06, + "loss": 0.436553955078125, + "step": 63 + }, + { + "epoch": 0.008917996237720337, + "grad_norm": 4.132837295532227, + "learning_rate": 4.921875e-06, + "loss": 0.3879547119140625, + "step": 64 + }, + { + "epoch": 0.009057339928934717, + "grad_norm": 3.973177671432495, + "learning_rate": 5e-06, + "loss": 0.3724822998046875, + "step": 65 + }, + { + "epoch": 0.009196683620149098, + "grad_norm": 5.895086288452148, + "learning_rate": 5.078125000000001e-06, + "loss": 0.333343505859375, + "step": 66 + }, + { + "epoch": 0.009336027311363479, + "grad_norm": 7.861949443817139, + "learning_rate": 5.156250000000001e-06, + "loss": 0.30877685546875, + "step": 67 + }, + { + "epoch": 0.009475371002577858, + "grad_norm": 6.9557719230651855, + "learning_rate": 5.234375e-06, + "loss": 0.294158935546875, + "step": 68 + }, + { + "epoch": 0.009614714693792239, + "grad_norm": 8.759953498840332, + "learning_rate": 5.3125e-06, + "loss": 0.3767242431640625, + "step": 69 + }, + { + "epoch": 0.009754058385006618, + "grad_norm": 12.213696479797363, + "learning_rate": 5.390625000000001e-06, + "loss": 0.3021240234375, + "step": 70 + }, + { + "epoch": 0.009893402076220999, + "grad_norm": 9.209831237792969, + "learning_rate": 5.468750000000001e-06, + "loss": 0.37567138671875, + "step": 71 + }, + { + "epoch": 0.01003274576743538, + "grad_norm": 18.2492733001709, + "learning_rate": 5.546875e-06, + "loss": 0.3045501708984375, + "step": 72 + }, + { + "epoch": 0.01017208945864976, + "grad_norm": 10.091484069824219, + "learning_rate": 5.625e-06, + "loss": 0.3849945068359375, + "step": 73 + }, + { + "epoch": 0.01031143314986414, + "grad_norm": 27.06923484802246, + "learning_rate": 5.7031250000000006e-06, + "loss": 0.2933197021484375, + "step": 74 + }, + { + "epoch": 0.01045077684107852, + "grad_norm": 33.39762878417969, + "learning_rate": 5.781250000000001e-06, + "loss": 0.28076171875, + "step": 75 + }, + { + "epoch": 0.0105901205322929, + "grad_norm": 6.225371837615967, + "learning_rate": 5.859375e-06, + "loss": 0.3713531494140625, + "step": 76 + }, + { + "epoch": 0.010729464223507281, + "grad_norm": 17.93594741821289, + "learning_rate": 5.9375e-06, + "loss": 0.36346435546875, + "step": 77 + }, + { + "epoch": 0.01086880791472166, + "grad_norm": 12.738279342651367, + "learning_rate": 6.0156250000000005e-06, + "loss": 0.2913360595703125, + "step": 78 + }, + { + "epoch": 0.011008151605936042, + "grad_norm": 26.582416534423828, + "learning_rate": 6.093750000000001e-06, + "loss": 0.329833984375, + "step": 79 + }, + { + "epoch": 0.011147495297150421, + "grad_norm": 27.38011360168457, + "learning_rate": 6.171875e-06, + "loss": 0.343017578125, + "step": 80 + }, + { + "epoch": 0.011286838988364802, + "grad_norm": 6.471011161804199, + "learning_rate": 6.25e-06, + "loss": 0.363861083984375, + "step": 81 + }, + { + "epoch": 0.011426182679579183, + "grad_norm": 26.024539947509766, + "learning_rate": 6.3281250000000005e-06, + "loss": 0.3722381591796875, + "step": 82 + }, + { + "epoch": 0.011565526370793562, + "grad_norm": 16.425132751464844, + "learning_rate": 6.406250000000001e-06, + "loss": 0.37847137451171875, + "step": 83 + }, + { + "epoch": 0.011704870062007943, + "grad_norm": 6.577551364898682, + "learning_rate": 6.484375000000001e-06, + "loss": 0.3098297119140625, + "step": 84 + }, + { + "epoch": 0.011844213753222322, + "grad_norm": 21.977825164794922, + "learning_rate": 6.5625e-06, + "loss": 0.3199920654296875, + "step": 85 + }, + { + "epoch": 0.011983557444436703, + "grad_norm": 27.84907341003418, + "learning_rate": 6.6406250000000005e-06, + "loss": 0.3336334228515625, + "step": 86 + }, + { + "epoch": 0.012122901135651083, + "grad_norm": 25.033903121948242, + "learning_rate": 6.718750000000001e-06, + "loss": 0.2919769287109375, + "step": 87 + }, + { + "epoch": 0.012262244826865464, + "grad_norm": 21.01466941833496, + "learning_rate": 6.796875000000001e-06, + "loss": 0.3365325927734375, + "step": 88 + }, + { + "epoch": 0.012401588518079844, + "grad_norm": 20.404619216918945, + "learning_rate": 6.875e-06, + "loss": 0.3393402099609375, + "step": 89 + }, + { + "epoch": 0.012540932209294224, + "grad_norm": 22.600536346435547, + "learning_rate": 6.9531250000000004e-06, + "loss": 0.3045806884765625, + "step": 90 + }, + { + "epoch": 0.012680275900508605, + "grad_norm": 20.3632755279541, + "learning_rate": 7.031250000000001e-06, + "loss": 0.3455047607421875, + "step": 91 + }, + { + "epoch": 0.012819619591722984, + "grad_norm": 10.40807056427002, + "learning_rate": 7.109375000000001e-06, + "loss": 0.276885986328125, + "step": 92 + }, + { + "epoch": 0.012958963282937365, + "grad_norm": 11.00056266784668, + "learning_rate": 7.1875e-06, + "loss": 0.37176513671875, + "step": 93 + }, + { + "epoch": 0.013098306974151746, + "grad_norm": 10.657183647155762, + "learning_rate": 7.265625e-06, + "loss": 0.3013916015625, + "step": 94 + }, + { + "epoch": 0.013237650665366125, + "grad_norm": 6.7510833740234375, + "learning_rate": 7.343750000000001e-06, + "loss": 0.278228759765625, + "step": 95 + }, + { + "epoch": 0.013376994356580506, + "grad_norm": 9.210660934448242, + "learning_rate": 7.421875000000001e-06, + "loss": 0.27777099609375, + "step": 96 + }, + { + "epoch": 0.013516338047794885, + "grad_norm": 11.156201362609863, + "learning_rate": 7.500000000000001e-06, + "loss": 0.2919921875, + "step": 97 + }, + { + "epoch": 0.013655681739009266, + "grad_norm": 7.530241012573242, + "learning_rate": 7.578125e-06, + "loss": 0.2903594970703125, + "step": 98 + }, + { + "epoch": 0.013795025430223647, + "grad_norm": 5.452517986297607, + "learning_rate": 7.656250000000001e-06, + "loss": 0.3092498779296875, + "step": 99 + }, + { + "epoch": 0.013934369121438027, + "grad_norm": 6.121318817138672, + "learning_rate": 7.734375e-06, + "loss": 0.29443359375, + "step": 100 + }, + { + "epoch": 0.014073712812652408, + "grad_norm": 12.478734016418457, + "learning_rate": 7.8125e-06, + "loss": 0.2588653564453125, + "step": 101 + }, + { + "epoch": 0.014213056503866787, + "grad_norm": 4.904646396636963, + "learning_rate": 7.890625e-06, + "loss": 0.3369140625, + "step": 102 + }, + { + "epoch": 0.014352400195081168, + "grad_norm": 18.539011001586914, + "learning_rate": 7.96875e-06, + "loss": 0.3047332763671875, + "step": 103 + }, + { + "epoch": 0.014491743886295549, + "grad_norm": 22.8604679107666, + "learning_rate": 8.046875e-06, + "loss": 0.31790924072265625, + "step": 104 + }, + { + "epoch": 0.014631087577509928, + "grad_norm": 13.456306457519531, + "learning_rate": 8.125000000000001e-06, + "loss": 0.2469940185546875, + "step": 105 + }, + { + "epoch": 0.014770431268724309, + "grad_norm": 3.3287200927734375, + "learning_rate": 8.203125000000001e-06, + "loss": 0.3122711181640625, + "step": 106 + }, + { + "epoch": 0.014909774959938688, + "grad_norm": 4.818490028381348, + "learning_rate": 8.281250000000001e-06, + "loss": 0.26700592041015625, + "step": 107 + }, + { + "epoch": 0.01504911865115307, + "grad_norm": 12.691768646240234, + "learning_rate": 8.359375e-06, + "loss": 0.20655059814453125, + "step": 108 + }, + { + "epoch": 0.01518846234236745, + "grad_norm": 5.591887474060059, + "learning_rate": 8.4375e-06, + "loss": 0.27243804931640625, + "step": 109 + }, + { + "epoch": 0.01532780603358183, + "grad_norm": 3.402745246887207, + "learning_rate": 8.515625e-06, + "loss": 0.26631927490234375, + "step": 110 + }, + { + "epoch": 0.01546714972479621, + "grad_norm": 9.289361953735352, + "learning_rate": 8.59375e-06, + "loss": 0.25142669677734375, + "step": 111 + }, + { + "epoch": 0.01560649341601059, + "grad_norm": 7.318595886230469, + "learning_rate": 8.671875e-06, + "loss": 0.28067779541015625, + "step": 112 + }, + { + "epoch": 0.01574583710722497, + "grad_norm": 9.700470924377441, + "learning_rate": 8.750000000000001e-06, + "loss": 0.3552093505859375, + "step": 113 + }, + { + "epoch": 0.01588518079843935, + "grad_norm": 12.45793628692627, + "learning_rate": 8.828125000000001e-06, + "loss": 0.2666168212890625, + "step": 114 + }, + { + "epoch": 0.016024524489653733, + "grad_norm": 5.740286350250244, + "learning_rate": 8.906250000000001e-06, + "loss": 0.223175048828125, + "step": 115 + }, + { + "epoch": 0.01616386818086811, + "grad_norm": 25.221811294555664, + "learning_rate": 8.984375000000002e-06, + "loss": 0.31703948974609375, + "step": 116 + }, + { + "epoch": 0.01630321187208249, + "grad_norm": 34.88270568847656, + "learning_rate": 9.0625e-06, + "loss": 0.2826690673828125, + "step": 117 + }, + { + "epoch": 0.016442555563296872, + "grad_norm": 5.641410827636719, + "learning_rate": 9.140625e-06, + "loss": 0.22713470458984375, + "step": 118 + }, + { + "epoch": 0.016581899254511253, + "grad_norm": 10.959922790527344, + "learning_rate": 9.21875e-06, + "loss": 0.30878448486328125, + "step": 119 + }, + { + "epoch": 0.016721242945725634, + "grad_norm": 4.234172344207764, + "learning_rate": 9.296875e-06, + "loss": 0.29724884033203125, + "step": 120 + }, + { + "epoch": 0.01686058663694001, + "grad_norm": 6.197820663452148, + "learning_rate": 9.375000000000001e-06, + "loss": 0.33477020263671875, + "step": 121 + }, + { + "epoch": 0.016999930328154392, + "grad_norm": 6.155436038970947, + "learning_rate": 9.453125000000001e-06, + "loss": 0.2446746826171875, + "step": 122 + }, + { + "epoch": 0.017139274019368773, + "grad_norm": 5.535401344299316, + "learning_rate": 9.531250000000001e-06, + "loss": 0.244659423828125, + "step": 123 + }, + { + "epoch": 0.017278617710583154, + "grad_norm": 4.212581634521484, + "learning_rate": 9.609375000000001e-06, + "loss": 0.298675537109375, + "step": 124 + }, + { + "epoch": 0.017417961401797532, + "grad_norm": 15.57044792175293, + "learning_rate": 9.6875e-06, + "loss": 0.22971343994140625, + "step": 125 + }, + { + "epoch": 0.017557305093011913, + "grad_norm": 11.872027397155762, + "learning_rate": 9.765625e-06, + "loss": 0.23000335693359375, + "step": 126 + }, + { + "epoch": 0.017696648784226294, + "grad_norm": 21.840192794799805, + "learning_rate": 9.84375e-06, + "loss": 0.31493377685546875, + "step": 127 + }, + { + "epoch": 0.017835992475440675, + "grad_norm": 24.7724609375, + "learning_rate": 9.921875e-06, + "loss": 0.295166015625, + "step": 128 + }, + { + "epoch": 0.017975336166655056, + "grad_norm": 6.634998321533203, + "learning_rate": 1e-05, + "loss": 0.24636077880859375, + "step": 129 + }, + { + "epoch": 0.018114679857869433, + "grad_norm": 6.213815212249756, + "learning_rate": 1.0078125000000001e-05, + "loss": 0.196380615234375, + "step": 130 + }, + { + "epoch": 0.018254023549083814, + "grad_norm": 7.242341995239258, + "learning_rate": 1.0156250000000001e-05, + "loss": 0.2684783935546875, + "step": 131 + }, + { + "epoch": 0.018393367240298195, + "grad_norm": 7.278617858886719, + "learning_rate": 1.0234375000000001e-05, + "loss": 0.31252288818359375, + "step": 132 + }, + { + "epoch": 0.018532710931512576, + "grad_norm": 4.0710906982421875, + "learning_rate": 1.0312500000000002e-05, + "loss": 0.181396484375, + "step": 133 + }, + { + "epoch": 0.018672054622726957, + "grad_norm": 7.896337985992432, + "learning_rate": 1.0390625e-05, + "loss": 0.207427978515625, + "step": 134 + }, + { + "epoch": 0.018811398313941335, + "grad_norm": 22.568559646606445, + "learning_rate": 1.046875e-05, + "loss": 0.23416900634765625, + "step": 135 + }, + { + "epoch": 0.018950742005155716, + "grad_norm": 13.51980972290039, + "learning_rate": 1.0546875e-05, + "loss": 0.30426788330078125, + "step": 136 + }, + { + "epoch": 0.019090085696370097, + "grad_norm": 9.42369556427002, + "learning_rate": 1.0625e-05, + "loss": 0.2425079345703125, + "step": 137 + }, + { + "epoch": 0.019229429387584478, + "grad_norm": 13.862987518310547, + "learning_rate": 1.0703125000000001e-05, + "loss": 0.256500244140625, + "step": 138 + }, + { + "epoch": 0.01936877307879886, + "grad_norm": 10.453695297241211, + "learning_rate": 1.0781250000000001e-05, + "loss": 0.2783355712890625, + "step": 139 + }, + { + "epoch": 0.019508116770013236, + "grad_norm": 9.497563362121582, + "learning_rate": 1.0859375000000001e-05, + "loss": 0.22650909423828125, + "step": 140 + }, + { + "epoch": 0.019647460461227617, + "grad_norm": 13.539013862609863, + "learning_rate": 1.0937500000000002e-05, + "loss": 0.31668853759765625, + "step": 141 + }, + { + "epoch": 0.019786804152441998, + "grad_norm": 6.241253852844238, + "learning_rate": 1.1015625e-05, + "loss": 0.24674224853515625, + "step": 142 + }, + { + "epoch": 0.01992614784365638, + "grad_norm": 17.11236000061035, + "learning_rate": 1.109375e-05, + "loss": 0.28078460693359375, + "step": 143 + }, + { + "epoch": 0.02006549153487076, + "grad_norm": 8.290376663208008, + "learning_rate": 1.1171875e-05, + "loss": 0.2430267333984375, + "step": 144 + }, + { + "epoch": 0.020204835226085138, + "grad_norm": 10.518574714660645, + "learning_rate": 1.125e-05, + "loss": 0.22647857666015625, + "step": 145 + }, + { + "epoch": 0.02034417891729952, + "grad_norm": 7.108603000640869, + "learning_rate": 1.1328125000000001e-05, + "loss": 0.20803070068359375, + "step": 146 + }, + { + "epoch": 0.0204835226085139, + "grad_norm": 9.853582382202148, + "learning_rate": 1.1406250000000001e-05, + "loss": 0.230072021484375, + "step": 147 + }, + { + "epoch": 0.02062286629972828, + "grad_norm": 15.838859558105469, + "learning_rate": 1.1484375000000001e-05, + "loss": 0.220458984375, + "step": 148 + }, + { + "epoch": 0.02076220999094266, + "grad_norm": 5.105030536651611, + "learning_rate": 1.1562500000000002e-05, + "loss": 0.18965911865234375, + "step": 149 + }, + { + "epoch": 0.02090155368215704, + "grad_norm": 12.321122169494629, + "learning_rate": 1.1640625000000002e-05, + "loss": 0.20442962646484375, + "step": 150 + }, + { + "epoch": 0.02104089737337142, + "grad_norm": 9.529899597167969, + "learning_rate": 1.171875e-05, + "loss": 0.24550628662109375, + "step": 151 + }, + { + "epoch": 0.0211802410645858, + "grad_norm": 15.119621276855469, + "learning_rate": 1.1796875e-05, + "loss": 0.24440765380859375, + "step": 152 + }, + { + "epoch": 0.021319584755800182, + "grad_norm": 23.26072883605957, + "learning_rate": 1.1875e-05, + "loss": 0.1783294677734375, + "step": 153 + }, + { + "epoch": 0.021458928447014563, + "grad_norm": 11.65670394897461, + "learning_rate": 1.1953125000000001e-05, + "loss": 0.20865631103515625, + "step": 154 + }, + { + "epoch": 0.02159827213822894, + "grad_norm": 8.901400566101074, + "learning_rate": 1.2031250000000001e-05, + "loss": 0.20742034912109375, + "step": 155 + }, + { + "epoch": 0.02173761582944332, + "grad_norm": 11.471776008605957, + "learning_rate": 1.2109375000000001e-05, + "loss": 0.202850341796875, + "step": 156 + }, + { + "epoch": 0.021876959520657702, + "grad_norm": 29.51309585571289, + "learning_rate": 1.2187500000000001e-05, + "loss": 0.22093963623046875, + "step": 157 + }, + { + "epoch": 0.022016303211872083, + "grad_norm": 19.724519729614258, + "learning_rate": 1.2265625000000002e-05, + "loss": 0.24786376953125, + "step": 158 + }, + { + "epoch": 0.022155646903086464, + "grad_norm": 37.50714874267578, + "learning_rate": 1.234375e-05, + "loss": 0.24099349975585938, + "step": 159 + }, + { + "epoch": 0.022294990594300842, + "grad_norm": 21.124534606933594, + "learning_rate": 1.2421875e-05, + "loss": 0.24327850341796875, + "step": 160 + }, + { + "epoch": 0.022434334285515223, + "grad_norm": 11.881414413452148, + "learning_rate": 1.25e-05, + "loss": 0.24684906005859375, + "step": 161 + }, + { + "epoch": 0.022573677976729604, + "grad_norm": 13.590072631835938, + "learning_rate": 1.2578125e-05, + "loss": 0.18216705322265625, + "step": 162 + }, + { + "epoch": 0.022713021667943985, + "grad_norm": 39.54505920410156, + "learning_rate": 1.2656250000000001e-05, + "loss": 0.23303985595703125, + "step": 163 + }, + { + "epoch": 0.022852365359158366, + "grad_norm": 11.864312171936035, + "learning_rate": 1.2734375000000001e-05, + "loss": 0.16033172607421875, + "step": 164 + }, + { + "epoch": 0.022991709050372743, + "grad_norm": 31.244199752807617, + "learning_rate": 1.2812500000000001e-05, + "loss": 0.21959686279296875, + "step": 165 + }, + { + "epoch": 0.023131052741587124, + "grad_norm": 39.424442291259766, + "learning_rate": 1.2890625000000002e-05, + "loss": 0.2705230712890625, + "step": 166 + }, + { + "epoch": 0.023270396432801505, + "grad_norm": 33.785240173339844, + "learning_rate": 1.2968750000000002e-05, + "loss": 0.2431793212890625, + "step": 167 + }, + { + "epoch": 0.023409740124015886, + "grad_norm": 7.322317600250244, + "learning_rate": 1.3046875e-05, + "loss": 0.15431976318359375, + "step": 168 + }, + { + "epoch": 0.023549083815230267, + "grad_norm": 23.51140785217285, + "learning_rate": 1.3125e-05, + "loss": 0.1785430908203125, + "step": 169 + }, + { + "epoch": 0.023688427506444645, + "grad_norm": 3.697800874710083, + "learning_rate": 1.3203125e-05, + "loss": 0.15843963623046875, + "step": 170 + }, + { + "epoch": 0.023827771197659026, + "grad_norm": 5.891458034515381, + "learning_rate": 1.3281250000000001e-05, + "loss": 0.21923828125, + "step": 171 + }, + { + "epoch": 0.023967114888873407, + "grad_norm": 16.665109634399414, + "learning_rate": 1.3359375000000001e-05, + "loss": 0.19211578369140625, + "step": 172 + }, + { + "epoch": 0.024106458580087788, + "grad_norm": 5.376789093017578, + "learning_rate": 1.3437500000000001e-05, + "loss": 0.198089599609375, + "step": 173 + }, + { + "epoch": 0.024245802271302165, + "grad_norm": 15.600678443908691, + "learning_rate": 1.3515625000000002e-05, + "loss": 0.19437408447265625, + "step": 174 + }, + { + "epoch": 0.024385145962516546, + "grad_norm": 7.356106281280518, + "learning_rate": 1.3593750000000002e-05, + "loss": 0.2178802490234375, + "step": 175 + }, + { + "epoch": 0.024524489653730927, + "grad_norm": 4.382321834564209, + "learning_rate": 1.3671875e-05, + "loss": 0.180419921875, + "step": 176 + }, + { + "epoch": 0.024663833344945308, + "grad_norm": 15.27723217010498, + "learning_rate": 1.375e-05, + "loss": 0.15828704833984375, + "step": 177 + }, + { + "epoch": 0.02480317703615969, + "grad_norm": 7.633199214935303, + "learning_rate": 1.3828125e-05, + "loss": 0.1562652587890625, + "step": 178 + }, + { + "epoch": 0.024942520727374067, + "grad_norm": 32.21489715576172, + "learning_rate": 1.3906250000000001e-05, + "loss": 0.177001953125, + "step": 179 + }, + { + "epoch": 0.025081864418588447, + "grad_norm": 28.462493896484375, + "learning_rate": 1.3984375000000001e-05, + "loss": 0.15540313720703125, + "step": 180 + }, + { + "epoch": 0.02522120810980283, + "grad_norm": 23.451152801513672, + "learning_rate": 1.4062500000000001e-05, + "loss": 0.178375244140625, + "step": 181 + }, + { + "epoch": 0.02536055180101721, + "grad_norm": 10.595033645629883, + "learning_rate": 1.4140625000000002e-05, + "loss": 0.19839096069335938, + "step": 182 + }, + { + "epoch": 0.02549989549223159, + "grad_norm": 16.64572525024414, + "learning_rate": 1.4218750000000002e-05, + "loss": 0.239837646484375, + "step": 183 + }, + { + "epoch": 0.025639239183445968, + "grad_norm": 13.344659805297852, + "learning_rate": 1.4296875000000002e-05, + "loss": 0.21224212646484375, + "step": 184 + }, + { + "epoch": 0.02577858287466035, + "grad_norm": 16.271451950073242, + "learning_rate": 1.4375e-05, + "loss": 0.18000030517578125, + "step": 185 + }, + { + "epoch": 0.02591792656587473, + "grad_norm": 29.737112045288086, + "learning_rate": 1.4453125e-05, + "loss": 0.232452392578125, + "step": 186 + }, + { + "epoch": 0.02605727025708911, + "grad_norm": 14.649554252624512, + "learning_rate": 1.453125e-05, + "loss": 0.20813751220703125, + "step": 187 + }, + { + "epoch": 0.026196613948303492, + "grad_norm": 11.020486831665039, + "learning_rate": 1.4609375000000001e-05, + "loss": 0.20354461669921875, + "step": 188 + }, + { + "epoch": 0.02633595763951787, + "grad_norm": 14.113497734069824, + "learning_rate": 1.4687500000000001e-05, + "loss": 0.19490814208984375, + "step": 189 + }, + { + "epoch": 0.02647530133073225, + "grad_norm": 7.872939109802246, + "learning_rate": 1.4765625000000001e-05, + "loss": 0.20699310302734375, + "step": 190 + }, + { + "epoch": 0.02661464502194663, + "grad_norm": 7.132872581481934, + "learning_rate": 1.4843750000000002e-05, + "loss": 0.17124176025390625, + "step": 191 + }, + { + "epoch": 0.026753988713161012, + "grad_norm": 12.31869888305664, + "learning_rate": 1.4921875000000002e-05, + "loss": 0.2261505126953125, + "step": 192 + }, + { + "epoch": 0.026893332404375393, + "grad_norm": 7.6175537109375, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.2263031005859375, + "step": 193 + }, + { + "epoch": 0.02703267609558977, + "grad_norm": 9.104379653930664, + "learning_rate": 1.5078125e-05, + "loss": 0.1780548095703125, + "step": 194 + }, + { + "epoch": 0.02717201978680415, + "grad_norm": 6.897692680358887, + "learning_rate": 1.515625e-05, + "loss": 0.189544677734375, + "step": 195 + }, + { + "epoch": 0.027311363478018533, + "grad_norm": 12.192371368408203, + "learning_rate": 1.5234375000000001e-05, + "loss": 0.16973495483398438, + "step": 196 + }, + { + "epoch": 0.027450707169232914, + "grad_norm": 4.066462993621826, + "learning_rate": 1.5312500000000003e-05, + "loss": 0.21714019775390625, + "step": 197 + }, + { + "epoch": 0.027590050860447295, + "grad_norm": 7.649482727050781, + "learning_rate": 1.5390625e-05, + "loss": 0.16823196411132812, + "step": 198 + }, + { + "epoch": 0.027729394551661672, + "grad_norm": 3.1419341564178467, + "learning_rate": 1.546875e-05, + "loss": 0.1820220947265625, + "step": 199 + }, + { + "epoch": 0.027868738242876053, + "grad_norm": 9.499337196350098, + "learning_rate": 1.5546875e-05, + "loss": 0.18516159057617188, + "step": 200 + }, + { + "epoch": 0.028008081934090434, + "grad_norm": 4.072226047515869, + "learning_rate": 1.5625e-05, + "loss": 0.16107940673828125, + "step": 201 + }, + { + "epoch": 0.028147425625304815, + "grad_norm": 9.89794921875, + "learning_rate": 1.5703125e-05, + "loss": 0.12831878662109375, + "step": 202 + }, + { + "epoch": 0.028286769316519196, + "grad_norm": 7.544033050537109, + "learning_rate": 1.578125e-05, + "loss": 0.1446990966796875, + "step": 203 + }, + { + "epoch": 0.028426113007733574, + "grad_norm": 6.846496105194092, + "learning_rate": 1.5859375e-05, + "loss": 0.1604461669921875, + "step": 204 + }, + { + "epoch": 0.028565456698947955, + "grad_norm": 2.4870805740356445, + "learning_rate": 1.59375e-05, + "loss": 0.14762115478515625, + "step": 205 + }, + { + "epoch": 0.028704800390162336, + "grad_norm": 6.904366970062256, + "learning_rate": 1.6015625e-05, + "loss": 0.15484619140625, + "step": 206 + }, + { + "epoch": 0.028844144081376717, + "grad_norm": 5.631534576416016, + "learning_rate": 1.609375e-05, + "loss": 0.164276123046875, + "step": 207 + }, + { + "epoch": 0.028983487772591097, + "grad_norm": 5.7419281005859375, + "learning_rate": 1.6171875000000002e-05, + "loss": 0.1723785400390625, + "step": 208 + }, + { + "epoch": 0.029122831463805475, + "grad_norm": 21.210580825805664, + "learning_rate": 1.6250000000000002e-05, + "loss": 0.1407012939453125, + "step": 209 + }, + { + "epoch": 0.029262175155019856, + "grad_norm": 11.999072074890137, + "learning_rate": 1.6328125000000002e-05, + "loss": 0.19600677490234375, + "step": 210 + }, + { + "epoch": 0.029401518846234237, + "grad_norm": 12.69387149810791, + "learning_rate": 1.6406250000000002e-05, + "loss": 0.159698486328125, + "step": 211 + }, + { + "epoch": 0.029540862537448618, + "grad_norm": 10.724947929382324, + "learning_rate": 1.6484375000000003e-05, + "loss": 0.17156982421875, + "step": 212 + }, + { + "epoch": 0.029680206228663, + "grad_norm": 8.453863143920898, + "learning_rate": 1.6562500000000003e-05, + "loss": 0.1804046630859375, + "step": 213 + }, + { + "epoch": 0.029819549919877376, + "grad_norm": 4.5546698570251465, + "learning_rate": 1.6640625000000003e-05, + "loss": 0.1972503662109375, + "step": 214 + }, + { + "epoch": 0.029958893611091757, + "grad_norm": 22.82716178894043, + "learning_rate": 1.671875e-05, + "loss": 0.22902679443359375, + "step": 215 + }, + { + "epoch": 0.03009823730230614, + "grad_norm": 10.317280769348145, + "learning_rate": 1.6796875e-05, + "loss": 0.22607421875, + "step": 216 + }, + { + "epoch": 0.03023758099352052, + "grad_norm": 26.870990753173828, + "learning_rate": 1.6875e-05, + "loss": 0.178497314453125, + "step": 217 + }, + { + "epoch": 0.0303769246847349, + "grad_norm": 12.715118408203125, + "learning_rate": 1.6953125e-05, + "loss": 0.1957244873046875, + "step": 218 + }, + { + "epoch": 0.030516268375949278, + "grad_norm": 28.58599281311035, + "learning_rate": 1.703125e-05, + "loss": 0.1845245361328125, + "step": 219 + }, + { + "epoch": 0.03065561206716366, + "grad_norm": 24.998645782470703, + "learning_rate": 1.7109375e-05, + "loss": 0.18060302734375, + "step": 220 + }, + { + "epoch": 0.03079495575837804, + "grad_norm": 6.701875686645508, + "learning_rate": 1.71875e-05, + "loss": 0.168853759765625, + "step": 221 + }, + { + "epoch": 0.03093429944959242, + "grad_norm": 25.65248680114746, + "learning_rate": 1.7265625e-05, + "loss": 0.22797393798828125, + "step": 222 + }, + { + "epoch": 0.031073643140806798, + "grad_norm": 11.242730140686035, + "learning_rate": 1.734375e-05, + "loss": 0.217010498046875, + "step": 223 + }, + { + "epoch": 0.03121298683202118, + "grad_norm": 3.523857355117798, + "learning_rate": 1.7421875e-05, + "loss": 0.17070770263671875, + "step": 224 + }, + { + "epoch": 0.031352330523235564, + "grad_norm": 6.597501754760742, + "learning_rate": 1.7500000000000002e-05, + "loss": 0.16447067260742188, + "step": 225 + }, + { + "epoch": 0.03149167421444994, + "grad_norm": 5.415345191955566, + "learning_rate": 1.7578125000000002e-05, + "loss": 0.16066360473632812, + "step": 226 + }, + { + "epoch": 0.03163101790566432, + "grad_norm": 10.379007339477539, + "learning_rate": 1.7656250000000002e-05, + "loss": 0.16402816772460938, + "step": 227 + }, + { + "epoch": 0.0317703615968787, + "grad_norm": 8.428119659423828, + "learning_rate": 1.7734375000000002e-05, + "loss": 0.1723175048828125, + "step": 228 + }, + { + "epoch": 0.03190970528809308, + "grad_norm": 7.824216842651367, + "learning_rate": 1.7812500000000003e-05, + "loss": 0.16522216796875, + "step": 229 + }, + { + "epoch": 0.032049048979307465, + "grad_norm": 10.98877239227295, + "learning_rate": 1.7890625000000003e-05, + "loss": 0.20270156860351562, + "step": 230 + }, + { + "epoch": 0.03218839267052184, + "grad_norm": 3.7734591960906982, + "learning_rate": 1.7968750000000003e-05, + "loss": 0.15793228149414062, + "step": 231 + }, + { + "epoch": 0.03232773636173622, + "grad_norm": 2.78013014793396, + "learning_rate": 1.8046875e-05, + "loss": 0.17957305908203125, + "step": 232 + }, + { + "epoch": 0.032467080052950605, + "grad_norm": 4.939957141876221, + "learning_rate": 1.8125e-05, + "loss": 0.14278411865234375, + "step": 233 + }, + { + "epoch": 0.03260642374416498, + "grad_norm": 23.295881271362305, + "learning_rate": 1.8203125e-05, + "loss": 0.20469284057617188, + "step": 234 + }, + { + "epoch": 0.032745767435379367, + "grad_norm": 3.9423696994781494, + "learning_rate": 1.828125e-05, + "loss": 0.14154815673828125, + "step": 235 + }, + { + "epoch": 0.032885111126593744, + "grad_norm": 7.224527835845947, + "learning_rate": 1.8359375e-05, + "loss": 0.13896942138671875, + "step": 236 + }, + { + "epoch": 0.03302445481780812, + "grad_norm": 4.426887512207031, + "learning_rate": 1.84375e-05, + "loss": 0.1268157958984375, + "step": 237 + }, + { + "epoch": 0.033163798509022506, + "grad_norm": 3.92598819732666, + "learning_rate": 1.8515625e-05, + "loss": 0.13848114013671875, + "step": 238 + }, + { + "epoch": 0.033303142200236883, + "grad_norm": 8.23882007598877, + "learning_rate": 1.859375e-05, + "loss": 0.11963272094726562, + "step": 239 + }, + { + "epoch": 0.03344248589145127, + "grad_norm": 5.870124816894531, + "learning_rate": 1.8671875e-05, + "loss": 0.16698455810546875, + "step": 240 + }, + { + "epoch": 0.033581829582665645, + "grad_norm": 9.310465812683105, + "learning_rate": 1.8750000000000002e-05, + "loss": 0.13766098022460938, + "step": 241 + }, + { + "epoch": 0.03372117327388002, + "grad_norm": 15.924727439880371, + "learning_rate": 1.8828125000000002e-05, + "loss": 0.15920639038085938, + "step": 242 + }, + { + "epoch": 0.03386051696509441, + "grad_norm": 5.291313648223877, + "learning_rate": 1.8906250000000002e-05, + "loss": 0.17458343505859375, + "step": 243 + }, + { + "epoch": 0.033999860656308785, + "grad_norm": 19.371849060058594, + "learning_rate": 1.8984375000000002e-05, + "loss": 0.18967056274414062, + "step": 244 + }, + { + "epoch": 0.03413920434752317, + "grad_norm": 22.01350975036621, + "learning_rate": 1.9062500000000003e-05, + "loss": 0.1800079345703125, + "step": 245 + }, + { + "epoch": 0.03427854803873755, + "grad_norm": 3.4605321884155273, + "learning_rate": 1.9140625000000003e-05, + "loss": 0.10721588134765625, + "step": 246 + }, + { + "epoch": 0.034417891729951924, + "grad_norm": 11.952702522277832, + "learning_rate": 1.9218750000000003e-05, + "loss": 0.14481353759765625, + "step": 247 + }, + { + "epoch": 0.03455723542116631, + "grad_norm": 15.554340362548828, + "learning_rate": 1.9296875000000003e-05, + "loss": 0.16975784301757812, + "step": 248 + }, + { + "epoch": 0.034696579112380686, + "grad_norm": 6.216955661773682, + "learning_rate": 1.9375e-05, + "loss": 0.16730880737304688, + "step": 249 + }, + { + "epoch": 0.034835922803595064, + "grad_norm": 7.059545993804932, + "learning_rate": 1.9453125e-05, + "loss": 0.13675689697265625, + "step": 250 + }, + { + "epoch": 0.03497526649480945, + "grad_norm": 10.996842384338379, + "learning_rate": 1.953125e-05, + "loss": 0.17966461181640625, + "step": 251 + }, + { + "epoch": 0.035114610186023826, + "grad_norm": 8.27406120300293, + "learning_rate": 1.9609375e-05, + "loss": 0.203399658203125, + "step": 252 + }, + { + "epoch": 0.03525395387723821, + "grad_norm": 12.968804359436035, + "learning_rate": 1.96875e-05, + "loss": 0.19990158081054688, + "step": 253 + }, + { + "epoch": 0.03539329756845259, + "grad_norm": 15.24231243133545, + "learning_rate": 1.9765625e-05, + "loss": 0.17612457275390625, + "step": 254 + }, + { + "epoch": 0.035532641259666965, + "grad_norm": 10.577461242675781, + "learning_rate": 1.984375e-05, + "loss": 0.16556167602539062, + "step": 255 + }, + { + "epoch": 0.03567198495088135, + "grad_norm": 4.81093692779541, + "learning_rate": 1.9921875e-05, + "loss": 0.15096664428710938, + "step": 256 + }, + { + "epoch": 0.03581132864209573, + "grad_norm": 12.774297714233398, + "learning_rate": 2e-05, + "loss": 0.17815399169921875, + "step": 257 + }, + { + "epoch": 0.03595067233331011, + "grad_norm": 2.8266847133636475, + "learning_rate": 2.0078125000000002e-05, + "loss": 0.13339614868164062, + "step": 258 + }, + { + "epoch": 0.03609001602452449, + "grad_norm": 20.113176345825195, + "learning_rate": 2.0156250000000002e-05, + "loss": 0.18735504150390625, + "step": 259 + }, + { + "epoch": 0.03622935971573887, + "grad_norm": 10.461869239807129, + "learning_rate": 2.0234375000000002e-05, + "loss": 0.14319992065429688, + "step": 260 + }, + { + "epoch": 0.03636870340695325, + "grad_norm": 11.342765808105469, + "learning_rate": 2.0312500000000002e-05, + "loss": 0.16077804565429688, + "step": 261 + }, + { + "epoch": 0.03650804709816763, + "grad_norm": 12.149606704711914, + "learning_rate": 2.0390625000000003e-05, + "loss": 0.17730712890625, + "step": 262 + }, + { + "epoch": 0.03664739078938201, + "grad_norm": 21.526506423950195, + "learning_rate": 2.0468750000000003e-05, + "loss": 0.17575836181640625, + "step": 263 + }, + { + "epoch": 0.03678673448059639, + "grad_norm": 12.917749404907227, + "learning_rate": 2.0546875000000003e-05, + "loss": 0.20618438720703125, + "step": 264 + }, + { + "epoch": 0.03692607817181077, + "grad_norm": 8.688334465026855, + "learning_rate": 2.0625000000000003e-05, + "loss": 0.17574310302734375, + "step": 265 + }, + { + "epoch": 0.03706542186302515, + "grad_norm": 9.381072998046875, + "learning_rate": 2.0703125e-05, + "loss": 0.15082168579101562, + "step": 266 + }, + { + "epoch": 0.03720476555423953, + "grad_norm": 5.075772285461426, + "learning_rate": 2.078125e-05, + "loss": 0.14011383056640625, + "step": 267 + }, + { + "epoch": 0.037344109245453914, + "grad_norm": 20.617652893066406, + "learning_rate": 2.0859375e-05, + "loss": 0.18639373779296875, + "step": 268 + }, + { + "epoch": 0.03748345293666829, + "grad_norm": 17.21690559387207, + "learning_rate": 2.09375e-05, + "loss": 0.167388916015625, + "step": 269 + }, + { + "epoch": 0.03762279662788267, + "grad_norm": 7.061341285705566, + "learning_rate": 2.1015625e-05, + "loss": 0.15079498291015625, + "step": 270 + }, + { + "epoch": 0.037762140319097054, + "grad_norm": 6.114952564239502, + "learning_rate": 2.109375e-05, + "loss": 0.1452789306640625, + "step": 271 + }, + { + "epoch": 0.03790148401031143, + "grad_norm": 11.467823028564453, + "learning_rate": 2.1171875e-05, + "loss": 0.1541614532470703, + "step": 272 + }, + { + "epoch": 0.038040827701525816, + "grad_norm": 10.475001335144043, + "learning_rate": 2.125e-05, + "loss": 0.1524658203125, + "step": 273 + }, + { + "epoch": 0.03818017139274019, + "grad_norm": 3.58603572845459, + "learning_rate": 2.1328125000000002e-05, + "loss": 0.1180267333984375, + "step": 274 + }, + { + "epoch": 0.03831951508395457, + "grad_norm": 6.524663925170898, + "learning_rate": 2.1406250000000002e-05, + "loss": 0.14548873901367188, + "step": 275 + }, + { + "epoch": 0.038458858775168955, + "grad_norm": 2.20135498046875, + "learning_rate": 2.1484375000000002e-05, + "loss": 0.11176300048828125, + "step": 276 + }, + { + "epoch": 0.03859820246638333, + "grad_norm": 2.243330240249634, + "learning_rate": 2.1562500000000002e-05, + "loss": 0.15746688842773438, + "step": 277 + }, + { + "epoch": 0.03873754615759772, + "grad_norm": 5.031515598297119, + "learning_rate": 2.1640625000000003e-05, + "loss": 0.14963912963867188, + "step": 278 + }, + { + "epoch": 0.038876889848812095, + "grad_norm": 3.3487393856048584, + "learning_rate": 2.1718750000000003e-05, + "loss": 0.1318817138671875, + "step": 279 + }, + { + "epoch": 0.03901623354002647, + "grad_norm": 10.993124961853027, + "learning_rate": 2.1796875000000003e-05, + "loss": 0.15652084350585938, + "step": 280 + }, + { + "epoch": 0.03915557723124086, + "grad_norm": 16.914213180541992, + "learning_rate": 2.1875000000000003e-05, + "loss": 0.18676376342773438, + "step": 281 + }, + { + "epoch": 0.039294920922455234, + "grad_norm": 3.316898822784424, + "learning_rate": 2.1953125000000003e-05, + "loss": 0.1385955810546875, + "step": 282 + }, + { + "epoch": 0.03943426461366962, + "grad_norm": 11.58234977722168, + "learning_rate": 2.203125e-05, + "loss": 0.1541290283203125, + "step": 283 + }, + { + "epoch": 0.039573608304883996, + "grad_norm": 17.89069366455078, + "learning_rate": 2.2109375e-05, + "loss": 0.20914459228515625, + "step": 284 + }, + { + "epoch": 0.039712951996098374, + "grad_norm": 12.752640724182129, + "learning_rate": 2.21875e-05, + "loss": 0.15747833251953125, + "step": 285 + }, + { + "epoch": 0.03985229568731276, + "grad_norm": 5.277968883514404, + "learning_rate": 2.2265625e-05, + "loss": 0.14974594116210938, + "step": 286 + }, + { + "epoch": 0.039991639378527136, + "grad_norm": 7.034120559692383, + "learning_rate": 2.234375e-05, + "loss": 0.14376068115234375, + "step": 287 + }, + { + "epoch": 0.04013098306974152, + "grad_norm": 1.818902611732483, + "learning_rate": 2.2421875e-05, + "loss": 0.11521148681640625, + "step": 288 + }, + { + "epoch": 0.0402703267609559, + "grad_norm": 5.902029514312744, + "learning_rate": 2.25e-05, + "loss": 0.14117431640625, + "step": 289 + }, + { + "epoch": 0.040409670452170275, + "grad_norm": 2.3946919441223145, + "learning_rate": 2.2578125e-05, + "loss": 0.12673187255859375, + "step": 290 + }, + { + "epoch": 0.04054901414338466, + "grad_norm": 10.03725814819336, + "learning_rate": 2.2656250000000002e-05, + "loss": 0.15714645385742188, + "step": 291 + }, + { + "epoch": 0.04068835783459904, + "grad_norm": 4.221160411834717, + "learning_rate": 2.2734375000000002e-05, + "loss": 0.16352081298828125, + "step": 292 + }, + { + "epoch": 0.04082770152581342, + "grad_norm": 2.4171268939971924, + "learning_rate": 2.2812500000000002e-05, + "loss": 0.17289352416992188, + "step": 293 + }, + { + "epoch": 0.0409670452170278, + "grad_norm": 2.8405871391296387, + "learning_rate": 2.2890625000000002e-05, + "loss": 0.13330841064453125, + "step": 294 + }, + { + "epoch": 0.04110638890824218, + "grad_norm": 1.9959015846252441, + "learning_rate": 2.2968750000000003e-05, + "loss": 0.12543487548828125, + "step": 295 + }, + { + "epoch": 0.04124573259945656, + "grad_norm": 3.393723249435425, + "learning_rate": 2.3046875000000003e-05, + "loss": 0.153900146484375, + "step": 296 + }, + { + "epoch": 0.04138507629067094, + "grad_norm": 2.285865068435669, + "learning_rate": 2.3125000000000003e-05, + "loss": 0.12562179565429688, + "step": 297 + }, + { + "epoch": 0.04152441998188532, + "grad_norm": 7.105337619781494, + "learning_rate": 2.3203125000000003e-05, + "loss": 0.12676239013671875, + "step": 298 + }, + { + "epoch": 0.0416637636730997, + "grad_norm": 6.265249729156494, + "learning_rate": 2.3281250000000003e-05, + "loss": 0.118927001953125, + "step": 299 + }, + { + "epoch": 0.04180310736431408, + "grad_norm": 2.807976484298706, + "learning_rate": 2.3359375e-05, + "loss": 0.1427154541015625, + "step": 300 + }, + { + "epoch": 0.04194245105552846, + "grad_norm": 11.650022506713867, + "learning_rate": 2.34375e-05, + "loss": 0.16907501220703125, + "step": 301 + }, + { + "epoch": 0.04208179474674284, + "grad_norm": 7.4744391441345215, + "learning_rate": 2.3515625e-05, + "loss": 0.14818954467773438, + "step": 302 + }, + { + "epoch": 0.042221138437957224, + "grad_norm": 7.770161151885986, + "learning_rate": 2.359375e-05, + "loss": 0.12781524658203125, + "step": 303 + }, + { + "epoch": 0.0423604821291716, + "grad_norm": 16.090797424316406, + "learning_rate": 2.3671875e-05, + "loss": 0.22275543212890625, + "step": 304 + }, + { + "epoch": 0.04249982582038598, + "grad_norm": 2.917875051498413, + "learning_rate": 2.375e-05, + "loss": 0.13833236694335938, + "step": 305 + }, + { + "epoch": 0.042639169511600364, + "grad_norm": 9.697542190551758, + "learning_rate": 2.3828125e-05, + "loss": 0.19214630126953125, + "step": 306 + }, + { + "epoch": 0.04277851320281474, + "grad_norm": 12.708673477172852, + "learning_rate": 2.3906250000000002e-05, + "loss": 0.17413330078125, + "step": 307 + }, + { + "epoch": 0.042917856894029126, + "grad_norm": 5.884083271026611, + "learning_rate": 2.3984375000000002e-05, + "loss": 0.14495849609375, + "step": 308 + }, + { + "epoch": 0.0430572005852435, + "grad_norm": 1.7070584297180176, + "learning_rate": 2.4062500000000002e-05, + "loss": 0.09543609619140625, + "step": 309 + }, + { + "epoch": 0.04319654427645788, + "grad_norm": 16.183382034301758, + "learning_rate": 2.4140625000000002e-05, + "loss": 0.19139480590820312, + "step": 310 + }, + { + "epoch": 0.043335887967672265, + "grad_norm": 3.3002467155456543, + "learning_rate": 2.4218750000000003e-05, + "loss": 0.13222122192382812, + "step": 311 + }, + { + "epoch": 0.04347523165888664, + "grad_norm": 5.430037498474121, + "learning_rate": 2.4296875000000003e-05, + "loss": 0.16632080078125, + "step": 312 + }, + { + "epoch": 0.04361457535010103, + "grad_norm": 11.327288627624512, + "learning_rate": 2.4375000000000003e-05, + "loss": 0.14043807983398438, + "step": 313 + }, + { + "epoch": 0.043753919041315405, + "grad_norm": 1.7214336395263672, + "learning_rate": 2.4453125000000003e-05, + "loss": 0.13000106811523438, + "step": 314 + }, + { + "epoch": 0.04389326273252978, + "grad_norm": 3.7365095615386963, + "learning_rate": 2.4531250000000003e-05, + "loss": 0.13901138305664062, + "step": 315 + }, + { + "epoch": 0.04403260642374417, + "grad_norm": 4.003068447113037, + "learning_rate": 2.4609375000000004e-05, + "loss": 0.13152694702148438, + "step": 316 + }, + { + "epoch": 0.044171950114958544, + "grad_norm": 8.028225898742676, + "learning_rate": 2.46875e-05, + "loss": 0.144561767578125, + "step": 317 + }, + { + "epoch": 0.04431129380617293, + "grad_norm": 2.3922972679138184, + "learning_rate": 2.4765625e-05, + "loss": 0.17046356201171875, + "step": 318 + }, + { + "epoch": 0.044450637497387306, + "grad_norm": 3.501657724380493, + "learning_rate": 2.484375e-05, + "loss": 0.1304779052734375, + "step": 319 + }, + { + "epoch": 0.044589981188601684, + "grad_norm": 2.379070997238159, + "learning_rate": 2.4921875e-05, + "loss": 0.11922454833984375, + "step": 320 + }, + { + "epoch": 0.04472932487981607, + "grad_norm": 9.816899299621582, + "learning_rate": 2.5e-05, + "loss": 0.14653396606445312, + "step": 321 + }, + { + "epoch": 0.044868668571030446, + "grad_norm": 10.88025951385498, + "learning_rate": 2.5078125e-05, + "loss": 0.1541595458984375, + "step": 322 + }, + { + "epoch": 0.04500801226224483, + "grad_norm": 5.904546737670898, + "learning_rate": 2.515625e-05, + "loss": 0.14318084716796875, + "step": 323 + }, + { + "epoch": 0.04514735595345921, + "grad_norm": 8.23379135131836, + "learning_rate": 2.5234375000000002e-05, + "loss": 0.11903762817382812, + "step": 324 + }, + { + "epoch": 0.045286699644673585, + "grad_norm": 14.856677055358887, + "learning_rate": 2.5312500000000002e-05, + "loss": 0.18276214599609375, + "step": 325 + }, + { + "epoch": 0.04542604333588797, + "grad_norm": 9.612829208374023, + "learning_rate": 2.5390625000000002e-05, + "loss": 0.156280517578125, + "step": 326 + }, + { + "epoch": 0.04556538702710235, + "grad_norm": 1.3654321432113647, + "learning_rate": 2.5468750000000002e-05, + "loss": 0.10762786865234375, + "step": 327 + }, + { + "epoch": 0.04570473071831673, + "grad_norm": 6.5090813636779785, + "learning_rate": 2.5546875000000003e-05, + "loss": 0.141815185546875, + "step": 328 + }, + { + "epoch": 0.04584407440953111, + "grad_norm": 7.9262003898620605, + "learning_rate": 2.5625000000000003e-05, + "loss": 0.16822433471679688, + "step": 329 + }, + { + "epoch": 0.045983418100745486, + "grad_norm": 4.166744232177734, + "learning_rate": 2.5703125000000003e-05, + "loss": 0.16649246215820312, + "step": 330 + }, + { + "epoch": 0.04612276179195987, + "grad_norm": 6.227190017700195, + "learning_rate": 2.5781250000000003e-05, + "loss": 0.13357925415039062, + "step": 331 + }, + { + "epoch": 0.04626210548317425, + "grad_norm": 2.677619218826294, + "learning_rate": 2.5859375000000003e-05, + "loss": 0.135498046875, + "step": 332 + }, + { + "epoch": 0.04640144917438863, + "grad_norm": 2.2948086261749268, + "learning_rate": 2.5937500000000004e-05, + "loss": 0.11031341552734375, + "step": 333 + }, + { + "epoch": 0.04654079286560301, + "grad_norm": 7.669824600219727, + "learning_rate": 2.6015625e-05, + "loss": 0.136932373046875, + "step": 334 + }, + { + "epoch": 0.04668013655681739, + "grad_norm": 9.659308433532715, + "learning_rate": 2.609375e-05, + "loss": 0.17742156982421875, + "step": 335 + }, + { + "epoch": 0.04681948024803177, + "grad_norm": 3.2917637825012207, + "learning_rate": 2.6171875e-05, + "loss": 0.132476806640625, + "step": 336 + }, + { + "epoch": 0.04695882393924615, + "grad_norm": 3.9384686946868896, + "learning_rate": 2.625e-05, + "loss": 0.1186065673828125, + "step": 337 + }, + { + "epoch": 0.047098167630460534, + "grad_norm": 3.8799619674682617, + "learning_rate": 2.6328125e-05, + "loss": 0.12781429290771484, + "step": 338 + }, + { + "epoch": 0.04723751132167491, + "grad_norm": 3.398435592651367, + "learning_rate": 2.640625e-05, + "loss": 0.11231231689453125, + "step": 339 + }, + { + "epoch": 0.04737685501288929, + "grad_norm": 1.8629547357559204, + "learning_rate": 2.6484375000000002e-05, + "loss": 0.11902618408203125, + "step": 340 + }, + { + "epoch": 0.047516198704103674, + "grad_norm": 1.992445468902588, + "learning_rate": 2.6562500000000002e-05, + "loss": 0.12783432006835938, + "step": 341 + }, + { + "epoch": 0.04765554239531805, + "grad_norm": 2.645453929901123, + "learning_rate": 2.6640625000000002e-05, + "loss": 0.15143585205078125, + "step": 342 + }, + { + "epoch": 0.047794886086532436, + "grad_norm": 3.5040268898010254, + "learning_rate": 2.6718750000000002e-05, + "loss": 0.13952255249023438, + "step": 343 + }, + { + "epoch": 0.04793422977774681, + "grad_norm": 5.521175384521484, + "learning_rate": 2.6796875000000003e-05, + "loss": 0.13457107543945312, + "step": 344 + }, + { + "epoch": 0.04807357346896119, + "grad_norm": 8.195466995239258, + "learning_rate": 2.6875000000000003e-05, + "loss": 0.1166229248046875, + "step": 345 + }, + { + "epoch": 0.048212917160175575, + "grad_norm": 8.657694816589355, + "learning_rate": 2.6953125000000003e-05, + "loss": 0.1279144287109375, + "step": 346 + }, + { + "epoch": 0.04835226085138995, + "grad_norm": 5.033594608306885, + "learning_rate": 2.7031250000000003e-05, + "loss": 0.15729904174804688, + "step": 347 + }, + { + "epoch": 0.04849160454260433, + "grad_norm": 2.908384084701538, + "learning_rate": 2.7109375000000003e-05, + "loss": 0.13243484497070312, + "step": 348 + }, + { + "epoch": 0.048630948233818715, + "grad_norm": 9.848067283630371, + "learning_rate": 2.7187500000000004e-05, + "loss": 0.15314102172851562, + "step": 349 + }, + { + "epoch": 0.04877029192503309, + "grad_norm": 2.104504346847534, + "learning_rate": 2.7265625000000004e-05, + "loss": 0.12220382690429688, + "step": 350 + }, + { + "epoch": 0.04890963561624748, + "grad_norm": 4.027978420257568, + "learning_rate": 2.734375e-05, + "loss": 0.12494659423828125, + "step": 351 + }, + { + "epoch": 0.049048979307461854, + "grad_norm": 7.607560157775879, + "learning_rate": 2.7421875e-05, + "loss": 0.12941360473632812, + "step": 352 + }, + { + "epoch": 0.04918832299867623, + "grad_norm": 1.7413184642791748, + "learning_rate": 2.75e-05, + "loss": 0.10313034057617188, + "step": 353 + }, + { + "epoch": 0.049327666689890616, + "grad_norm": 2.4723892211914062, + "learning_rate": 2.7578125e-05, + "loss": 0.1409912109375, + "step": 354 + }, + { + "epoch": 0.049467010381104994, + "grad_norm": 4.698057174682617, + "learning_rate": 2.765625e-05, + "loss": 0.13390731811523438, + "step": 355 + }, + { + "epoch": 0.04960635407231938, + "grad_norm": 6.201010227203369, + "learning_rate": 2.7734375e-05, + "loss": 0.16281509399414062, + "step": 356 + }, + { + "epoch": 0.049745697763533755, + "grad_norm": 4.560400009155273, + "learning_rate": 2.7812500000000002e-05, + "loss": 0.14498138427734375, + "step": 357 + }, + { + "epoch": 0.04988504145474813, + "grad_norm": 5.392146110534668, + "learning_rate": 2.7890625000000002e-05, + "loss": 0.13381576538085938, + "step": 358 + }, + { + "epoch": 0.05002438514596252, + "grad_norm": 2.7424020767211914, + "learning_rate": 2.7968750000000002e-05, + "loss": 0.12769317626953125, + "step": 359 + }, + { + "epoch": 0.050163728837176895, + "grad_norm": 4.7111711502075195, + "learning_rate": 2.8046875000000002e-05, + "loss": 0.17494583129882812, + "step": 360 + }, + { + "epoch": 0.05030307252839128, + "grad_norm": 6.59081506729126, + "learning_rate": 2.8125000000000003e-05, + "loss": 0.1469268798828125, + "step": 361 + }, + { + "epoch": 0.05044241621960566, + "grad_norm": 2.2323391437530518, + "learning_rate": 2.8203125000000003e-05, + "loss": 0.127166748046875, + "step": 362 + }, + { + "epoch": 0.050581759910820034, + "grad_norm": 4.9665207862854, + "learning_rate": 2.8281250000000003e-05, + "loss": 0.13751220703125, + "step": 363 + }, + { + "epoch": 0.05072110360203442, + "grad_norm": 2.907015085220337, + "learning_rate": 2.8359375000000003e-05, + "loss": 0.13743209838867188, + "step": 364 + }, + { + "epoch": 0.050860447293248796, + "grad_norm": 4.1403656005859375, + "learning_rate": 2.8437500000000003e-05, + "loss": 0.11430740356445312, + "step": 365 + }, + { + "epoch": 0.05099979098446318, + "grad_norm": 3.9225199222564697, + "learning_rate": 2.8515625000000004e-05, + "loss": 0.14166641235351562, + "step": 366 + }, + { + "epoch": 0.05113913467567756, + "grad_norm": 2.6616833209991455, + "learning_rate": 2.8593750000000004e-05, + "loss": 0.11994743347167969, + "step": 367 + }, + { + "epoch": 0.051278478366891936, + "grad_norm": 5.586734771728516, + "learning_rate": 2.8671875e-05, + "loss": 0.1314697265625, + "step": 368 + }, + { + "epoch": 0.05141782205810632, + "grad_norm": 2.8066041469573975, + "learning_rate": 2.875e-05, + "loss": 0.14867782592773438, + "step": 369 + }, + { + "epoch": 0.0515571657493207, + "grad_norm": 1.880692720413208, + "learning_rate": 2.8828125e-05, + "loss": 0.16294097900390625, + "step": 370 + }, + { + "epoch": 0.05169650944053508, + "grad_norm": 3.855336904525757, + "learning_rate": 2.890625e-05, + "loss": 0.1315631866455078, + "step": 371 + }, + { + "epoch": 0.05183585313174946, + "grad_norm": 1.6804821491241455, + "learning_rate": 2.8984375e-05, + "loss": 0.08749771118164062, + "step": 372 + }, + { + "epoch": 0.05197519682296384, + "grad_norm": 4.604068756103516, + "learning_rate": 2.90625e-05, + "loss": 0.14107894897460938, + "step": 373 + }, + { + "epoch": 0.05211454051417822, + "grad_norm": 5.655059337615967, + "learning_rate": 2.9140625000000002e-05, + "loss": 0.133087158203125, + "step": 374 + }, + { + "epoch": 0.0522538842053926, + "grad_norm": 3.773108959197998, + "learning_rate": 2.9218750000000002e-05, + "loss": 0.14210891723632812, + "step": 375 + }, + { + "epoch": 0.052393227896606984, + "grad_norm": 3.5761513710021973, + "learning_rate": 2.9296875000000002e-05, + "loss": 0.14535903930664062, + "step": 376 + }, + { + "epoch": 0.05253257158782136, + "grad_norm": 1.9816220998764038, + "learning_rate": 2.9375000000000003e-05, + "loss": 0.1269092559814453, + "step": 377 + }, + { + "epoch": 0.05267191527903574, + "grad_norm": 7.910952091217041, + "learning_rate": 2.9453125000000003e-05, + "loss": 0.1534442901611328, + "step": 378 + }, + { + "epoch": 0.05281125897025012, + "grad_norm": 2.2611303329467773, + "learning_rate": 2.9531250000000003e-05, + "loss": 0.15328598022460938, + "step": 379 + }, + { + "epoch": 0.0529506026614645, + "grad_norm": 2.1094727516174316, + "learning_rate": 2.9609375000000003e-05, + "loss": 0.13796615600585938, + "step": 380 + }, + { + "epoch": 0.053089946352678885, + "grad_norm": 3.915283203125, + "learning_rate": 2.9687500000000003e-05, + "loss": 0.14343643188476562, + "step": 381 + }, + { + "epoch": 0.05322929004389326, + "grad_norm": 5.646178722381592, + "learning_rate": 2.9765625000000004e-05, + "loss": 0.13836669921875, + "step": 382 + }, + { + "epoch": 0.05336863373510764, + "grad_norm": 3.2940049171447754, + "learning_rate": 2.9843750000000004e-05, + "loss": 0.14098358154296875, + "step": 383 + }, + { + "epoch": 0.053507977426322025, + "grad_norm": 4.756617069244385, + "learning_rate": 2.9921875000000004e-05, + "loss": 0.12392425537109375, + "step": 384 + }, + { + "epoch": 0.0536473211175364, + "grad_norm": 1.4059078693389893, + "learning_rate": 3.0000000000000004e-05, + "loss": 0.11438369750976562, + "step": 385 + }, + { + "epoch": 0.053786664808750786, + "grad_norm": 2.7788634300231934, + "learning_rate": 3.0078125e-05, + "loss": 0.14267730712890625, + "step": 386 + }, + { + "epoch": 0.053926008499965164, + "grad_norm": 9.308500289916992, + "learning_rate": 3.015625e-05, + "loss": 0.13502120971679688, + "step": 387 + }, + { + "epoch": 0.05406535219117954, + "grad_norm": 9.400507926940918, + "learning_rate": 3.0234375e-05, + "loss": 0.18524169921875, + "step": 388 + }, + { + "epoch": 0.054204695882393926, + "grad_norm": 3.2617523670196533, + "learning_rate": 3.03125e-05, + "loss": 0.12083053588867188, + "step": 389 + }, + { + "epoch": 0.0543440395736083, + "grad_norm": 9.919500350952148, + "learning_rate": 3.0390625000000002e-05, + "loss": 0.16698455810546875, + "step": 390 + }, + { + "epoch": 0.05448338326482269, + "grad_norm": 8.446266174316406, + "learning_rate": 3.0468750000000002e-05, + "loss": 0.1509990692138672, + "step": 391 + }, + { + "epoch": 0.054622726956037065, + "grad_norm": 4.400516986846924, + "learning_rate": 3.0546875e-05, + "loss": 0.14417266845703125, + "step": 392 + }, + { + "epoch": 0.05476207064725144, + "grad_norm": 2.5208187103271484, + "learning_rate": 3.0625000000000006e-05, + "loss": 0.14043045043945312, + "step": 393 + }, + { + "epoch": 0.05490141433846583, + "grad_norm": 7.2738518714904785, + "learning_rate": 3.0703125e-05, + "loss": 0.14445877075195312, + "step": 394 + }, + { + "epoch": 0.055040758029680205, + "grad_norm": 7.416927337646484, + "learning_rate": 3.078125e-05, + "loss": 0.14923477172851562, + "step": 395 + }, + { + "epoch": 0.05518010172089459, + "grad_norm": 1.5641915798187256, + "learning_rate": 3.0859375e-05, + "loss": 0.10800933837890625, + "step": 396 + }, + { + "epoch": 0.05531944541210897, + "grad_norm": 3.767225503921509, + "learning_rate": 3.09375e-05, + "loss": 0.13551712036132812, + "step": 397 + }, + { + "epoch": 0.055458789103323344, + "grad_norm": 1.774584412574768, + "learning_rate": 3.1015625000000003e-05, + "loss": 0.14743804931640625, + "step": 398 + }, + { + "epoch": 0.05559813279453773, + "grad_norm": 3.1852917671203613, + "learning_rate": 3.109375e-05, + "loss": 0.11072540283203125, + "step": 399 + }, + { + "epoch": 0.055737476485752106, + "grad_norm": 2.076260566711426, + "learning_rate": 3.1171875000000004e-05, + "loss": 0.1057891845703125, + "step": 400 + }, + { + "epoch": 0.05587682017696649, + "grad_norm": 5.548718452453613, + "learning_rate": 3.125e-05, + "loss": 0.16573333740234375, + "step": 401 + }, + { + "epoch": 0.05601616386818087, + "grad_norm": 3.886274576187134, + "learning_rate": 3.1328125000000004e-05, + "loss": 0.11401748657226562, + "step": 402 + }, + { + "epoch": 0.056155507559395246, + "grad_norm": 2.48211669921875, + "learning_rate": 3.140625e-05, + "loss": 0.14301681518554688, + "step": 403 + }, + { + "epoch": 0.05629485125060963, + "grad_norm": 3.893672227859497, + "learning_rate": 3.1484375000000005e-05, + "loss": 0.10957717895507812, + "step": 404 + }, + { + "epoch": 0.05643419494182401, + "grad_norm": 2.2722084522247314, + "learning_rate": 3.15625e-05, + "loss": 0.12091445922851562, + "step": 405 + }, + { + "epoch": 0.05657353863303839, + "grad_norm": 1.4712495803833008, + "learning_rate": 3.1640625000000005e-05, + "loss": 0.11108779907226562, + "step": 406 + }, + { + "epoch": 0.05671288232425277, + "grad_norm": 1.7143059968948364, + "learning_rate": 3.171875e-05, + "loss": 0.1330108642578125, + "step": 407 + }, + { + "epoch": 0.05685222601546715, + "grad_norm": 5.499020576477051, + "learning_rate": 3.1796875000000005e-05, + "loss": 0.12516021728515625, + "step": 408 + }, + { + "epoch": 0.05699156970668153, + "grad_norm": 3.048825740814209, + "learning_rate": 3.1875e-05, + "loss": 0.1291656494140625, + "step": 409 + }, + { + "epoch": 0.05713091339789591, + "grad_norm": 1.7014979124069214, + "learning_rate": 3.1953125000000006e-05, + "loss": 0.11047744750976562, + "step": 410 + }, + { + "epoch": 0.057270257089110294, + "grad_norm": 5.369425296783447, + "learning_rate": 3.203125e-05, + "loss": 0.13873672485351562, + "step": 411 + }, + { + "epoch": 0.05740960078032467, + "grad_norm": 2.3293750286102295, + "learning_rate": 3.2109375e-05, + "loss": 0.11191177368164062, + "step": 412 + }, + { + "epoch": 0.05754894447153905, + "grad_norm": 1.442236065864563, + "learning_rate": 3.21875e-05, + "loss": 0.11031341552734375, + "step": 413 + }, + { + "epoch": 0.05768828816275343, + "grad_norm": 3.0530261993408203, + "learning_rate": 3.2265625e-05, + "loss": 0.13765716552734375, + "step": 414 + }, + { + "epoch": 0.05782763185396781, + "grad_norm": 3.979896306991577, + "learning_rate": 3.2343750000000004e-05, + "loss": 0.12455368041992188, + "step": 415 + }, + { + "epoch": 0.057966975545182195, + "grad_norm": 4.007043361663818, + "learning_rate": 3.2421875e-05, + "loss": 0.11407470703125, + "step": 416 + }, + { + "epoch": 0.05810631923639657, + "grad_norm": 4.071215629577637, + "learning_rate": 3.2500000000000004e-05, + "loss": 0.138458251953125, + "step": 417 + }, + { + "epoch": 0.05824566292761095, + "grad_norm": 1.7491230964660645, + "learning_rate": 3.2578125e-05, + "loss": 0.14777755737304688, + "step": 418 + }, + { + "epoch": 0.058385006618825334, + "grad_norm": 4.229414939880371, + "learning_rate": 3.2656250000000004e-05, + "loss": 0.11205291748046875, + "step": 419 + }, + { + "epoch": 0.05852435031003971, + "grad_norm": 1.9756709337234497, + "learning_rate": 3.2734375e-05, + "loss": 0.12688064575195312, + "step": 420 + }, + { + "epoch": 0.058663694001254096, + "grad_norm": 8.841416358947754, + "learning_rate": 3.2812500000000005e-05, + "loss": 0.15166091918945312, + "step": 421 + }, + { + "epoch": 0.058803037692468474, + "grad_norm": 5.926768779754639, + "learning_rate": 3.2890625e-05, + "loss": 0.11324310302734375, + "step": 422 + }, + { + "epoch": 0.05894238138368285, + "grad_norm": 4.093699932098389, + "learning_rate": 3.2968750000000005e-05, + "loss": 0.1289520263671875, + "step": 423 + }, + { + "epoch": 0.059081725074897236, + "grad_norm": 4.479770183563232, + "learning_rate": 3.3046875e-05, + "loss": 0.1318511962890625, + "step": 424 + }, + { + "epoch": 0.05922106876611161, + "grad_norm": 7.320713520050049, + "learning_rate": 3.3125000000000006e-05, + "loss": 0.17034912109375, + "step": 425 + }, + { + "epoch": 0.059360412457326, + "grad_norm": 4.637919902801514, + "learning_rate": 3.3203125e-05, + "loss": 0.15718841552734375, + "step": 426 + }, + { + "epoch": 0.059499756148540375, + "grad_norm": 1.5829510688781738, + "learning_rate": 3.3281250000000006e-05, + "loss": 0.13342666625976562, + "step": 427 + }, + { + "epoch": 0.05963909983975475, + "grad_norm": 5.163240909576416, + "learning_rate": 3.3359375e-05, + "loss": 0.1404132843017578, + "step": 428 + }, + { + "epoch": 0.05977844353096914, + "grad_norm": 2.8508288860321045, + "learning_rate": 3.34375e-05, + "loss": 0.1317596435546875, + "step": 429 + }, + { + "epoch": 0.059917787222183515, + "grad_norm": 2.1155521869659424, + "learning_rate": 3.3515625e-05, + "loss": 0.1241912841796875, + "step": 430 + }, + { + "epoch": 0.0600571309133979, + "grad_norm": 4.112156867980957, + "learning_rate": 3.359375e-05, + "loss": 0.13152694702148438, + "step": 431 + }, + { + "epoch": 0.06019647460461228, + "grad_norm": 4.972734451293945, + "learning_rate": 3.3671875000000004e-05, + "loss": 0.11899948120117188, + "step": 432 + }, + { + "epoch": 0.060335818295826654, + "grad_norm": 1.7366862297058105, + "learning_rate": 3.375e-05, + "loss": 0.10955810546875, + "step": 433 + }, + { + "epoch": 0.06047516198704104, + "grad_norm": 2.7259936332702637, + "learning_rate": 3.3828125000000004e-05, + "loss": 0.1493988037109375, + "step": 434 + }, + { + "epoch": 0.060614505678255416, + "grad_norm": 4.8301873207092285, + "learning_rate": 3.390625e-05, + "loss": 0.12696266174316406, + "step": 435 + }, + { + "epoch": 0.0607538493694698, + "grad_norm": 3.9330475330352783, + "learning_rate": 3.3984375000000004e-05, + "loss": 0.11937332153320312, + "step": 436 + }, + { + "epoch": 0.06089319306068418, + "grad_norm": 5.159908294677734, + "learning_rate": 3.40625e-05, + "loss": 0.13390541076660156, + "step": 437 + }, + { + "epoch": 0.061032536751898556, + "grad_norm": 4.419372081756592, + "learning_rate": 3.4140625000000005e-05, + "loss": 0.126953125, + "step": 438 + }, + { + "epoch": 0.06117188044311294, + "grad_norm": 2.4666006565093994, + "learning_rate": 3.421875e-05, + "loss": 0.10955047607421875, + "step": 439 + }, + { + "epoch": 0.06131122413432732, + "grad_norm": 3.8889145851135254, + "learning_rate": 3.4296875000000005e-05, + "loss": 0.14739608764648438, + "step": 440 + }, + { + "epoch": 0.0614505678255417, + "grad_norm": 5.32292366027832, + "learning_rate": 3.4375e-05, + "loss": 0.14748764038085938, + "step": 441 + }, + { + "epoch": 0.06158991151675608, + "grad_norm": 1.4702866077423096, + "learning_rate": 3.4453125000000006e-05, + "loss": 0.128997802734375, + "step": 442 + }, + { + "epoch": 0.06172925520797046, + "grad_norm": 3.621009111404419, + "learning_rate": 3.453125e-05, + "loss": 0.13852691650390625, + "step": 443 + }, + { + "epoch": 0.06186859889918484, + "grad_norm": 1.721809983253479, + "learning_rate": 3.4609375000000006e-05, + "loss": 0.13240432739257812, + "step": 444 + }, + { + "epoch": 0.06200794259039922, + "grad_norm": 1.6103589534759521, + "learning_rate": 3.46875e-05, + "loss": 0.13892364501953125, + "step": 445 + }, + { + "epoch": 0.062147286281613597, + "grad_norm": 2.5106561183929443, + "learning_rate": 3.4765625e-05, + "loss": 0.11012840270996094, + "step": 446 + }, + { + "epoch": 0.06228662997282798, + "grad_norm": 2.7145278453826904, + "learning_rate": 3.484375e-05, + "loss": 0.12872695922851562, + "step": 447 + }, + { + "epoch": 0.06242597366404236, + "grad_norm": 2.8473188877105713, + "learning_rate": 3.4921875e-05, + "loss": 0.12524795532226562, + "step": 448 + }, + { + "epoch": 0.06256531735525674, + "grad_norm": 3.7716329097747803, + "learning_rate": 3.5000000000000004e-05, + "loss": 0.1205902099609375, + "step": 449 + }, + { + "epoch": 0.06270466104647113, + "grad_norm": 4.147580146789551, + "learning_rate": 3.5078125e-05, + "loss": 0.17473983764648438, + "step": 450 + }, + { + "epoch": 0.0628440047376855, + "grad_norm": 3.5474328994750977, + "learning_rate": 3.5156250000000004e-05, + "loss": 0.15946197509765625, + "step": 451 + }, + { + "epoch": 0.06298334842889988, + "grad_norm": 1.9494801759719849, + "learning_rate": 3.5234375e-05, + "loss": 0.15173721313476562, + "step": 452 + }, + { + "epoch": 0.06312269212011426, + "grad_norm": 5.228839874267578, + "learning_rate": 3.5312500000000005e-05, + "loss": 0.12580490112304688, + "step": 453 + }, + { + "epoch": 0.06326203581132864, + "grad_norm": 5.427779674530029, + "learning_rate": 3.5390625e-05, + "loss": 0.12604141235351562, + "step": 454 + }, + { + "epoch": 0.06340137950254303, + "grad_norm": 2.176074266433716, + "learning_rate": 3.5468750000000005e-05, + "loss": 0.10713577270507812, + "step": 455 + }, + { + "epoch": 0.0635407231937574, + "grad_norm": 2.4117329120635986, + "learning_rate": 3.5546875e-05, + "loss": 0.10607528686523438, + "step": 456 + }, + { + "epoch": 0.06368006688497178, + "grad_norm": 2.6359939575195312, + "learning_rate": 3.5625000000000005e-05, + "loss": 0.11248397827148438, + "step": 457 + }, + { + "epoch": 0.06381941057618616, + "grad_norm": 1.8986053466796875, + "learning_rate": 3.5703125e-05, + "loss": 0.13222503662109375, + "step": 458 + }, + { + "epoch": 0.06395875426740054, + "grad_norm": 1.4631023406982422, + "learning_rate": 3.5781250000000006e-05, + "loss": 0.09749221801757812, + "step": 459 + }, + { + "epoch": 0.06409809795861493, + "grad_norm": 1.4928110837936401, + "learning_rate": 3.5859375e-05, + "loss": 0.12688636779785156, + "step": 460 + }, + { + "epoch": 0.06423744164982931, + "grad_norm": 3.4844894409179688, + "learning_rate": 3.5937500000000006e-05, + "loss": 0.14579391479492188, + "step": 461 + }, + { + "epoch": 0.06437678534104369, + "grad_norm": 1.6381232738494873, + "learning_rate": 3.6015625e-05, + "loss": 0.10846519470214844, + "step": 462 + }, + { + "epoch": 0.06451612903225806, + "grad_norm": 1.635528564453125, + "learning_rate": 3.609375e-05, + "loss": 0.0964813232421875, + "step": 463 + }, + { + "epoch": 0.06465547272347244, + "grad_norm": 3.211627721786499, + "learning_rate": 3.6171875000000003e-05, + "loss": 0.10479736328125, + "step": 464 + }, + { + "epoch": 0.06479481641468683, + "grad_norm": 3.4336962699890137, + "learning_rate": 3.625e-05, + "loss": 0.13736915588378906, + "step": 465 + }, + { + "epoch": 0.06493416010590121, + "grad_norm": 3.0173003673553467, + "learning_rate": 3.6328125000000004e-05, + "loss": 0.14548873901367188, + "step": 466 + }, + { + "epoch": 0.06507350379711559, + "grad_norm": 4.823855876922607, + "learning_rate": 3.640625e-05, + "loss": 0.15468215942382812, + "step": 467 + }, + { + "epoch": 0.06521284748832996, + "grad_norm": 1.3094842433929443, + "learning_rate": 3.6484375000000004e-05, + "loss": 0.10417938232421875, + "step": 468 + }, + { + "epoch": 0.06535219117954434, + "grad_norm": 2.7268495559692383, + "learning_rate": 3.65625e-05, + "loss": 0.12029647827148438, + "step": 469 + }, + { + "epoch": 0.06549153487075873, + "grad_norm": 2.7913684844970703, + "learning_rate": 3.6640625000000005e-05, + "loss": 0.09646987915039062, + "step": 470 + }, + { + "epoch": 0.06563087856197311, + "grad_norm": 4.4617085456848145, + "learning_rate": 3.671875e-05, + "loss": 0.144866943359375, + "step": 471 + }, + { + "epoch": 0.06577022225318749, + "grad_norm": 1.9952373504638672, + "learning_rate": 3.6796875000000005e-05, + "loss": 0.13555526733398438, + "step": 472 + }, + { + "epoch": 0.06590956594440187, + "grad_norm": 3.8279013633728027, + "learning_rate": 3.6875e-05, + "loss": 0.14617538452148438, + "step": 473 + }, + { + "epoch": 0.06604890963561624, + "grad_norm": 3.8824334144592285, + "learning_rate": 3.6953125000000005e-05, + "loss": 0.1311054229736328, + "step": 474 + }, + { + "epoch": 0.06618825332683063, + "grad_norm": 2.4181454181671143, + "learning_rate": 3.703125e-05, + "loss": 0.14231491088867188, + "step": 475 + }, + { + "epoch": 0.06632759701804501, + "grad_norm": 1.380698323249817, + "learning_rate": 3.7109375000000006e-05, + "loss": 0.10101318359375, + "step": 476 + }, + { + "epoch": 0.06646694070925939, + "grad_norm": 1.7238048315048218, + "learning_rate": 3.71875e-05, + "loss": 0.11988067626953125, + "step": 477 + }, + { + "epoch": 0.06660628440047377, + "grad_norm": 2.957953929901123, + "learning_rate": 3.7265625000000006e-05, + "loss": 0.10755538940429688, + "step": 478 + }, + { + "epoch": 0.06674562809168814, + "grad_norm": 2.6893792152404785, + "learning_rate": 3.734375e-05, + "loss": 0.14199447631835938, + "step": 479 + }, + { + "epoch": 0.06688497178290254, + "grad_norm": 1.6886773109436035, + "learning_rate": 3.7421875e-05, + "loss": 0.14385986328125, + "step": 480 + }, + { + "epoch": 0.06702431547411691, + "grad_norm": 1.8061015605926514, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.139404296875, + "step": 481 + }, + { + "epoch": 0.06716365916533129, + "grad_norm": 2.5976362228393555, + "learning_rate": 3.7578125e-05, + "loss": 0.11618804931640625, + "step": 482 + }, + { + "epoch": 0.06730300285654567, + "grad_norm": 1.5609452724456787, + "learning_rate": 3.7656250000000004e-05, + "loss": 0.15631103515625, + "step": 483 + }, + { + "epoch": 0.06744234654776005, + "grad_norm": 1.951782464981079, + "learning_rate": 3.7734375e-05, + "loss": 0.11980819702148438, + "step": 484 + }, + { + "epoch": 0.06758169023897444, + "grad_norm": 2.552130699157715, + "learning_rate": 3.7812500000000004e-05, + "loss": 0.12523460388183594, + "step": 485 + }, + { + "epoch": 0.06772103393018881, + "grad_norm": 4.840607643127441, + "learning_rate": 3.7890625e-05, + "loss": 0.1316986083984375, + "step": 486 + }, + { + "epoch": 0.06786037762140319, + "grad_norm": 1.504185676574707, + "learning_rate": 3.7968750000000005e-05, + "loss": 0.115142822265625, + "step": 487 + }, + { + "epoch": 0.06799972131261757, + "grad_norm": 1.4297614097595215, + "learning_rate": 3.8046875e-05, + "loss": 0.09502410888671875, + "step": 488 + }, + { + "epoch": 0.06813906500383195, + "grad_norm": 1.887446403503418, + "learning_rate": 3.8125000000000005e-05, + "loss": 0.1237640380859375, + "step": 489 + }, + { + "epoch": 0.06827840869504634, + "grad_norm": 3.166134834289551, + "learning_rate": 3.8203125e-05, + "loss": 0.13968658447265625, + "step": 490 + }, + { + "epoch": 0.06841775238626072, + "grad_norm": 2.0282509326934814, + "learning_rate": 3.8281250000000006e-05, + "loss": 0.12541580200195312, + "step": 491 + }, + { + "epoch": 0.0685570960774751, + "grad_norm": 1.2534290552139282, + "learning_rate": 3.8359375e-05, + "loss": 0.125762939453125, + "step": 492 + }, + { + "epoch": 0.06869643976868947, + "grad_norm": 2.328026294708252, + "learning_rate": 3.8437500000000006e-05, + "loss": 0.11131668090820312, + "step": 493 + }, + { + "epoch": 0.06883578345990385, + "grad_norm": 1.8155877590179443, + "learning_rate": 3.8515625e-05, + "loss": 0.14750289916992188, + "step": 494 + }, + { + "epoch": 0.06897512715111823, + "grad_norm": 1.3850457668304443, + "learning_rate": 3.8593750000000006e-05, + "loss": 0.10431289672851562, + "step": 495 + }, + { + "epoch": 0.06911447084233262, + "grad_norm": 1.1127197742462158, + "learning_rate": 3.8671875e-05, + "loss": 0.10576629638671875, + "step": 496 + }, + { + "epoch": 0.069253814533547, + "grad_norm": 1.3155763149261475, + "learning_rate": 3.875e-05, + "loss": 0.09729194641113281, + "step": 497 + }, + { + "epoch": 0.06939315822476137, + "grad_norm": 2.038994312286377, + "learning_rate": 3.8828125000000004e-05, + "loss": 0.12589645385742188, + "step": 498 + }, + { + "epoch": 0.06953250191597575, + "grad_norm": 1.4296114444732666, + "learning_rate": 3.890625e-05, + "loss": 0.1354217529296875, + "step": 499 + }, + { + "epoch": 0.06967184560719013, + "grad_norm": 0.9476801156997681, + "learning_rate": 3.8984375000000004e-05, + "loss": 0.1292266845703125, + "step": 500 + }, + { + "epoch": 0.06981118929840452, + "grad_norm": 2.58512282371521, + "learning_rate": 3.90625e-05, + "loss": 0.11761856079101562, + "step": 501 + }, + { + "epoch": 0.0699505329896189, + "grad_norm": 4.787256240844727, + "learning_rate": 3.9140625000000004e-05, + "loss": 0.15732765197753906, + "step": 502 + }, + { + "epoch": 0.07008987668083327, + "grad_norm": 2.0710058212280273, + "learning_rate": 3.921875e-05, + "loss": 0.13161087036132812, + "step": 503 + }, + { + "epoch": 0.07022922037204765, + "grad_norm": 4.266611576080322, + "learning_rate": 3.9296875000000005e-05, + "loss": 0.1299285888671875, + "step": 504 + }, + { + "epoch": 0.07036856406326203, + "grad_norm": 2.856719970703125, + "learning_rate": 3.9375e-05, + "loss": 0.14955711364746094, + "step": 505 + }, + { + "epoch": 0.07050790775447642, + "grad_norm": 1.2977595329284668, + "learning_rate": 3.9453125000000005e-05, + "loss": 0.11955642700195312, + "step": 506 + }, + { + "epoch": 0.0706472514456908, + "grad_norm": 3.974900722503662, + "learning_rate": 3.953125e-05, + "loss": 0.13746070861816406, + "step": 507 + }, + { + "epoch": 0.07078659513690518, + "grad_norm": 3.4972245693206787, + "learning_rate": 3.9609375000000006e-05, + "loss": 0.13118553161621094, + "step": 508 + }, + { + "epoch": 0.07092593882811955, + "grad_norm": 4.485108375549316, + "learning_rate": 3.96875e-05, + "loss": 0.1211090087890625, + "step": 509 + }, + { + "epoch": 0.07106528251933393, + "grad_norm": 2.8259153366088867, + "learning_rate": 3.9765625000000006e-05, + "loss": 0.12386703491210938, + "step": 510 + }, + { + "epoch": 0.07120462621054832, + "grad_norm": 2.3447604179382324, + "learning_rate": 3.984375e-05, + "loss": 0.13647842407226562, + "step": 511 + }, + { + "epoch": 0.0713439699017627, + "grad_norm": 3.9234423637390137, + "learning_rate": 3.9921875000000006e-05, + "loss": 0.13368606567382812, + "step": 512 + }, + { + "epoch": 0.07148331359297708, + "grad_norm": 2.6184463500976562, + "learning_rate": 4e-05, + "loss": 0.11807632446289062, + "step": 513 + }, + { + "epoch": 0.07162265728419145, + "grad_norm": 0.822776734828949, + "learning_rate": 3.999999777822831e-05, + "loss": 0.10339736938476562, + "step": 514 + }, + { + "epoch": 0.07176200097540583, + "grad_norm": 2.839972496032715, + "learning_rate": 3.999999111291371e-05, + "loss": 0.11426544189453125, + "step": 515 + }, + { + "epoch": 0.07190134466662022, + "grad_norm": 0.9693661332130432, + "learning_rate": 3.9999980004057696e-05, + "loss": 0.12184906005859375, + "step": 516 + }, + { + "epoch": 0.0720406883578346, + "grad_norm": 1.214519739151001, + "learning_rate": 3.999996445166274e-05, + "loss": 0.12931060791015625, + "step": 517 + }, + { + "epoch": 0.07218003204904898, + "grad_norm": 1.4069898128509521, + "learning_rate": 3.9999944455732284e-05, + "loss": 0.1411418914794922, + "step": 518 + }, + { + "epoch": 0.07231937574026336, + "grad_norm": 2.6690611839294434, + "learning_rate": 3.9999920016270776e-05, + "loss": 0.12273788452148438, + "step": 519 + }, + { + "epoch": 0.07245871943147773, + "grad_norm": 1.1753342151641846, + "learning_rate": 3.999989113328364e-05, + "loss": 0.1236419677734375, + "step": 520 + }, + { + "epoch": 0.07259806312269212, + "grad_norm": 1.8818222284317017, + "learning_rate": 3.999985780677731e-05, + "loss": 0.12533187866210938, + "step": 521 + }, + { + "epoch": 0.0727374068139065, + "grad_norm": 1.4653863906860352, + "learning_rate": 3.999982003675918e-05, + "loss": 0.10535812377929688, + "step": 522 + }, + { + "epoch": 0.07287675050512088, + "grad_norm": 3.514106035232544, + "learning_rate": 3.999977782323763e-05, + "loss": 0.14447402954101562, + "step": 523 + }, + { + "epoch": 0.07301609419633526, + "grad_norm": 5.4844136238098145, + "learning_rate": 3.9999731166222065e-05, + "loss": 0.15041351318359375, + "step": 524 + }, + { + "epoch": 0.07315543788754963, + "grad_norm": 4.237921714782715, + "learning_rate": 3.9999680065722826e-05, + "loss": 0.15274810791015625, + "step": 525 + }, + { + "epoch": 0.07329478157876403, + "grad_norm": 3.5381581783294678, + "learning_rate": 3.999962452175128e-05, + "loss": 0.16823196411132812, + "step": 526 + }, + { + "epoch": 0.0734341252699784, + "grad_norm": 4.438521862030029, + "learning_rate": 3.9999564534319764e-05, + "loss": 0.12203025817871094, + "step": 527 + }, + { + "epoch": 0.07357346896119278, + "grad_norm": 2.7971794605255127, + "learning_rate": 3.9999500103441604e-05, + "loss": 0.12574195861816406, + "step": 528 + }, + { + "epoch": 0.07371281265240716, + "grad_norm": 1.192266583442688, + "learning_rate": 3.999943122913112e-05, + "loss": 0.15298080444335938, + "step": 529 + }, + { + "epoch": 0.07385215634362154, + "grad_norm": 2.0007553100585938, + "learning_rate": 3.9999357911403613e-05, + "loss": 0.126190185546875, + "step": 530 + }, + { + "epoch": 0.07399150003483593, + "grad_norm": 2.0570294857025146, + "learning_rate": 3.9999280150275375e-05, + "loss": 0.1028594970703125, + "step": 531 + }, + { + "epoch": 0.0741308437260503, + "grad_norm": 1.9041540622711182, + "learning_rate": 3.999919794576367e-05, + "loss": 0.12729835510253906, + "step": 532 + }, + { + "epoch": 0.07427018741726468, + "grad_norm": 1.8569375276565552, + "learning_rate": 3.9999111297886774e-05, + "loss": 0.11019134521484375, + "step": 533 + }, + { + "epoch": 0.07440953110847906, + "grad_norm": 0.8470864295959473, + "learning_rate": 3.999902020666394e-05, + "loss": 0.09790992736816406, + "step": 534 + }, + { + "epoch": 0.07454887479969344, + "grad_norm": 1.407055139541626, + "learning_rate": 3.99989246721154e-05, + "loss": 0.11622810363769531, + "step": 535 + }, + { + "epoch": 0.07468821849090783, + "grad_norm": 4.166196346282959, + "learning_rate": 3.9998824694262376e-05, + "loss": 0.1158599853515625, + "step": 536 + }, + { + "epoch": 0.0748275621821222, + "grad_norm": 3.4478037357330322, + "learning_rate": 3.999872027312709e-05, + "loss": 0.1285247802734375, + "step": 537 + }, + { + "epoch": 0.07496690587333658, + "grad_norm": 1.8398157358169556, + "learning_rate": 3.999861140873274e-05, + "loss": 0.14141845703125, + "step": 538 + }, + { + "epoch": 0.07510624956455096, + "grad_norm": 4.192670822143555, + "learning_rate": 3.99984981011035e-05, + "loss": 0.14142990112304688, + "step": 539 + }, + { + "epoch": 0.07524559325576534, + "grad_norm": 5.373982906341553, + "learning_rate": 3.999838035026456e-05, + "loss": 0.15409088134765625, + "step": 540 + }, + { + "epoch": 0.07538493694697973, + "grad_norm": 2.7747888565063477, + "learning_rate": 3.999825815624208e-05, + "loss": 0.11669540405273438, + "step": 541 + }, + { + "epoch": 0.07552428063819411, + "grad_norm": 1.1899428367614746, + "learning_rate": 3.9998131519063204e-05, + "loss": 0.12557220458984375, + "step": 542 + }, + { + "epoch": 0.07566362432940849, + "grad_norm": 3.206754446029663, + "learning_rate": 3.999800043875607e-05, + "loss": 0.15814590454101562, + "step": 543 + }, + { + "epoch": 0.07580296802062286, + "grad_norm": 2.567349672317505, + "learning_rate": 3.999786491534981e-05, + "loss": 0.10853958129882812, + "step": 544 + }, + { + "epoch": 0.07594231171183724, + "grad_norm": 3.6399178504943848, + "learning_rate": 3.9997724948874514e-05, + "loss": 0.12878036499023438, + "step": 545 + }, + { + "epoch": 0.07608165540305163, + "grad_norm": 2.8854386806488037, + "learning_rate": 3.999758053936129e-05, + "loss": 0.13994598388671875, + "step": 546 + }, + { + "epoch": 0.07622099909426601, + "grad_norm": 0.9817878007888794, + "learning_rate": 3.999743168684223e-05, + "loss": 0.09542465209960938, + "step": 547 + }, + { + "epoch": 0.07636034278548039, + "grad_norm": 3.4385974407196045, + "learning_rate": 3.9997278391350395e-05, + "loss": 0.1431903839111328, + "step": 548 + }, + { + "epoch": 0.07649968647669476, + "grad_norm": 1.1352037191390991, + "learning_rate": 3.999712065291984e-05, + "loss": 0.11184310913085938, + "step": 549 + }, + { + "epoch": 0.07663903016790914, + "grad_norm": 2.050690174102783, + "learning_rate": 3.999695847158562e-05, + "loss": 0.12324905395507812, + "step": 550 + }, + { + "epoch": 0.07677837385912353, + "grad_norm": 1.5649875402450562, + "learning_rate": 3.999679184738377e-05, + "loss": 0.14053726196289062, + "step": 551 + }, + { + "epoch": 0.07691771755033791, + "grad_norm": 1.5763839483261108, + "learning_rate": 3.9996620780351306e-05, + "loss": 0.12506675720214844, + "step": 552 + }, + { + "epoch": 0.07705706124155229, + "grad_norm": 1.6124186515808105, + "learning_rate": 3.9996445270526235e-05, + "loss": 0.12255668640136719, + "step": 553 + }, + { + "epoch": 0.07719640493276667, + "grad_norm": 1.2539173364639282, + "learning_rate": 3.999626531794755e-05, + "loss": 0.1411266326904297, + "step": 554 + }, + { + "epoch": 0.07733574862398104, + "grad_norm": 0.8215686678886414, + "learning_rate": 3.9996080922655236e-05, + "loss": 0.10073089599609375, + "step": 555 + }, + { + "epoch": 0.07747509231519543, + "grad_norm": 0.9087092876434326, + "learning_rate": 3.9995892084690256e-05, + "loss": 0.11696243286132812, + "step": 556 + }, + { + "epoch": 0.07761443600640981, + "grad_norm": 1.0993878841400146, + "learning_rate": 3.999569880409458e-05, + "loss": 0.12771987915039062, + "step": 557 + }, + { + "epoch": 0.07775377969762419, + "grad_norm": 0.95680171251297, + "learning_rate": 3.9995501080911124e-05, + "loss": 0.11429214477539062, + "step": 558 + }, + { + "epoch": 0.07789312338883857, + "grad_norm": 1.2004928588867188, + "learning_rate": 3.999529891518384e-05, + "loss": 0.095458984375, + "step": 559 + }, + { + "epoch": 0.07803246708005294, + "grad_norm": 5.074276447296143, + "learning_rate": 3.9995092306957636e-05, + "loss": 0.1392669677734375, + "step": 560 + }, + { + "epoch": 0.07817181077126734, + "grad_norm": 3.300252676010132, + "learning_rate": 3.9994881256278424e-05, + "loss": 0.11614036560058594, + "step": 561 + }, + { + "epoch": 0.07831115446248171, + "grad_norm": 1.6207990646362305, + "learning_rate": 3.999466576319308e-05, + "loss": 0.12645339965820312, + "step": 562 + }, + { + "epoch": 0.07845049815369609, + "grad_norm": 3.3293297290802, + "learning_rate": 3.999444582774949e-05, + "loss": 0.1427459716796875, + "step": 563 + }, + { + "epoch": 0.07858984184491047, + "grad_norm": 2.966750144958496, + "learning_rate": 3.999422144999652e-05, + "loss": 0.14469528198242188, + "step": 564 + }, + { + "epoch": 0.07872918553612485, + "grad_norm": 1.2516913414001465, + "learning_rate": 3.9993992629984016e-05, + "loss": 0.13843727111816406, + "step": 565 + }, + { + "epoch": 0.07886852922733924, + "grad_norm": 1.7742544412612915, + "learning_rate": 3.9993759367762825e-05, + "loss": 0.12029266357421875, + "step": 566 + }, + { + "epoch": 0.07900787291855361, + "grad_norm": 1.5628852844238281, + "learning_rate": 3.9993521663384766e-05, + "loss": 0.13542556762695312, + "step": 567 + }, + { + "epoch": 0.07914721660976799, + "grad_norm": 1.306313395500183, + "learning_rate": 3.999327951690265e-05, + "loss": 0.16187286376953125, + "step": 568 + }, + { + "epoch": 0.07928656030098237, + "grad_norm": 1.016982078552246, + "learning_rate": 3.9993032928370284e-05, + "loss": 0.14037322998046875, + "step": 569 + }, + { + "epoch": 0.07942590399219675, + "grad_norm": 0.8395294547080994, + "learning_rate": 3.999278189784245e-05, + "loss": 0.12171173095703125, + "step": 570 + }, + { + "epoch": 0.07956524768341114, + "grad_norm": 1.2827494144439697, + "learning_rate": 3.9992526425374916e-05, + "loss": 0.13222122192382812, + "step": 571 + }, + { + "epoch": 0.07970459137462552, + "grad_norm": 1.1822994947433472, + "learning_rate": 3.999226651102445e-05, + "loss": 0.12479400634765625, + "step": 572 + }, + { + "epoch": 0.0798439350658399, + "grad_norm": 2.395176410675049, + "learning_rate": 3.99920021548488e-05, + "loss": 0.11441612243652344, + "step": 573 + }, + { + "epoch": 0.07998327875705427, + "grad_norm": 1.7266473770141602, + "learning_rate": 3.99917333569067e-05, + "loss": 0.12455558776855469, + "step": 574 + }, + { + "epoch": 0.08012262244826865, + "grad_norm": 1.9790352582931519, + "learning_rate": 3.9991460117257856e-05, + "loss": 0.1428089141845703, + "step": 575 + }, + { + "epoch": 0.08026196613948304, + "grad_norm": 1.5058035850524902, + "learning_rate": 3.9991182435962994e-05, + "loss": 0.15181350708007812, + "step": 576 + }, + { + "epoch": 0.08040130983069742, + "grad_norm": 1.8110665082931519, + "learning_rate": 3.99909003130838e-05, + "loss": 0.11397171020507812, + "step": 577 + }, + { + "epoch": 0.0805406535219118, + "grad_norm": 2.12797212600708, + "learning_rate": 3.9990613748682954e-05, + "loss": 0.13140296936035156, + "step": 578 + }, + { + "epoch": 0.08067999721312617, + "grad_norm": 1.286681056022644, + "learning_rate": 3.9990322742824126e-05, + "loss": 0.11044883728027344, + "step": 579 + }, + { + "epoch": 0.08081934090434055, + "grad_norm": 1.4707584381103516, + "learning_rate": 3.999002729557197e-05, + "loss": 0.11823272705078125, + "step": 580 + }, + { + "epoch": 0.08095868459555494, + "grad_norm": 1.8197648525238037, + "learning_rate": 3.9989727406992136e-05, + "loss": 0.119476318359375, + "step": 581 + }, + { + "epoch": 0.08109802828676932, + "grad_norm": 2.242061138153076, + "learning_rate": 3.998942307715124e-05, + "loss": 0.13702011108398438, + "step": 582 + }, + { + "epoch": 0.0812373719779837, + "grad_norm": 2.5707595348358154, + "learning_rate": 3.998911430611691e-05, + "loss": 0.1223907470703125, + "step": 583 + }, + { + "epoch": 0.08137671566919807, + "grad_norm": 1.003810167312622, + "learning_rate": 3.9988801093957735e-05, + "loss": 0.1099853515625, + "step": 584 + }, + { + "epoch": 0.08151605936041245, + "grad_norm": 0.9128933548927307, + "learning_rate": 3.9988483440743306e-05, + "loss": 0.10852813720703125, + "step": 585 + }, + { + "epoch": 0.08165540305162684, + "grad_norm": 1.4836663007736206, + "learning_rate": 3.998816134654421e-05, + "loss": 0.09893417358398438, + "step": 586 + }, + { + "epoch": 0.08179474674284122, + "grad_norm": 1.0268858671188354, + "learning_rate": 3.9987834811431986e-05, + "loss": 0.11932754516601562, + "step": 587 + }, + { + "epoch": 0.0819340904340556, + "grad_norm": 1.4726641178131104, + "learning_rate": 3.998750383547921e-05, + "loss": 0.10371208190917969, + "step": 588 + }, + { + "epoch": 0.08207343412526998, + "grad_norm": 1.1538609266281128, + "learning_rate": 3.9987168418759396e-05, + "loss": 0.10941696166992188, + "step": 589 + }, + { + "epoch": 0.08221277781648435, + "grad_norm": 1.646634578704834, + "learning_rate": 3.998682856134708e-05, + "loss": 0.10530471801757812, + "step": 590 + }, + { + "epoch": 0.08235212150769874, + "grad_norm": 1.7815392017364502, + "learning_rate": 3.9986484263317766e-05, + "loss": 0.11213302612304688, + "step": 591 + }, + { + "epoch": 0.08249146519891312, + "grad_norm": 3.2004342079162598, + "learning_rate": 3.9986135524747945e-05, + "loss": 0.117218017578125, + "step": 592 + }, + { + "epoch": 0.0826308088901275, + "grad_norm": 1.6569088697433472, + "learning_rate": 3.9985782345715105e-05, + "loss": 0.13239669799804688, + "step": 593 + }, + { + "epoch": 0.08277015258134188, + "grad_norm": 1.388911485671997, + "learning_rate": 3.99854247262977e-05, + "loss": 0.103118896484375, + "step": 594 + }, + { + "epoch": 0.08290949627255625, + "grad_norm": 2.638531446456909, + "learning_rate": 3.99850626665752e-05, + "loss": 0.1288604736328125, + "step": 595 + }, + { + "epoch": 0.08304883996377065, + "grad_norm": 1.2141910791397095, + "learning_rate": 3.998469616662805e-05, + "loss": 0.10183334350585938, + "step": 596 + }, + { + "epoch": 0.08318818365498502, + "grad_norm": 2.1090993881225586, + "learning_rate": 3.9984325226537665e-05, + "loss": 0.11148452758789062, + "step": 597 + }, + { + "epoch": 0.0833275273461994, + "grad_norm": 1.7591581344604492, + "learning_rate": 3.998394984638647e-05, + "loss": 0.09261322021484375, + "step": 598 + }, + { + "epoch": 0.08346687103741378, + "grad_norm": 1.0490654706954956, + "learning_rate": 3.9983570026257844e-05, + "loss": 0.10947418212890625, + "step": 599 + }, + { + "epoch": 0.08360621472862816, + "grad_norm": 1.554352879524231, + "learning_rate": 3.998318576623621e-05, + "loss": 0.10037040710449219, + "step": 600 + }, + { + "epoch": 0.08374555841984255, + "grad_norm": 1.6197351217269897, + "learning_rate": 3.998279706640691e-05, + "loss": 0.10555267333984375, + "step": 601 + }, + { + "epoch": 0.08388490211105692, + "grad_norm": 1.0546890497207642, + "learning_rate": 3.998240392685633e-05, + "loss": 0.1088409423828125, + "step": 602 + }, + { + "epoch": 0.0840242458022713, + "grad_norm": 1.1002198457717896, + "learning_rate": 3.9982006347671796e-05, + "loss": 0.11896324157714844, + "step": 603 + }, + { + "epoch": 0.08416358949348568, + "grad_norm": 2.6173956394195557, + "learning_rate": 3.998160432894164e-05, + "loss": 0.12821388244628906, + "step": 604 + }, + { + "epoch": 0.08430293318470006, + "grad_norm": 1.2729194164276123, + "learning_rate": 3.99811978707552e-05, + "loss": 0.08441925048828125, + "step": 605 + }, + { + "epoch": 0.08444227687591445, + "grad_norm": 2.692823648452759, + "learning_rate": 3.998078697320277e-05, + "loss": 0.13408279418945312, + "step": 606 + }, + { + "epoch": 0.08458162056712883, + "grad_norm": 1.5508193969726562, + "learning_rate": 3.9980371636375645e-05, + "loss": 0.11621475219726562, + "step": 607 + }, + { + "epoch": 0.0847209642583432, + "grad_norm": 1.6721742153167725, + "learning_rate": 3.99799518603661e-05, + "loss": 0.11021804809570312, + "step": 608 + }, + { + "epoch": 0.08486030794955758, + "grad_norm": 0.621113657951355, + "learning_rate": 3.9979527645267395e-05, + "loss": 0.10602951049804688, + "step": 609 + }, + { + "epoch": 0.08499965164077196, + "grad_norm": 1.1255561113357544, + "learning_rate": 3.99790989911738e-05, + "loss": 0.10942649841308594, + "step": 610 + }, + { + "epoch": 0.08513899533198635, + "grad_norm": 1.8715317249298096, + "learning_rate": 3.997866589818053e-05, + "loss": 0.11733627319335938, + "step": 611 + }, + { + "epoch": 0.08527833902320073, + "grad_norm": 1.6630016565322876, + "learning_rate": 3.997822836638382e-05, + "loss": 0.11240005493164062, + "step": 612 + }, + { + "epoch": 0.0854176827144151, + "grad_norm": 1.711911678314209, + "learning_rate": 3.997778639588088e-05, + "loss": 0.11293411254882812, + "step": 613 + }, + { + "epoch": 0.08555702640562948, + "grad_norm": 1.3268662691116333, + "learning_rate": 3.9977339986769905e-05, + "loss": 0.11964035034179688, + "step": 614 + }, + { + "epoch": 0.08569637009684386, + "grad_norm": 2.7521893978118896, + "learning_rate": 3.9976889139150074e-05, + "loss": 0.11651039123535156, + "step": 615 + }, + { + "epoch": 0.08583571378805825, + "grad_norm": 1.9348469972610474, + "learning_rate": 3.997643385312156e-05, + "loss": 0.10770988464355469, + "step": 616 + }, + { + "epoch": 0.08597505747927263, + "grad_norm": 3.5101633071899414, + "learning_rate": 3.9975974128785505e-05, + "loss": 0.12965774536132812, + "step": 617 + }, + { + "epoch": 0.086114401170487, + "grad_norm": 1.6035350561141968, + "learning_rate": 3.997550996624406e-05, + "loss": 0.12910079956054688, + "step": 618 + }, + { + "epoch": 0.08625374486170138, + "grad_norm": 2.1761186122894287, + "learning_rate": 3.997504136560036e-05, + "loss": 0.11957359313964844, + "step": 619 + }, + { + "epoch": 0.08639308855291576, + "grad_norm": 5.387199878692627, + "learning_rate": 3.997456832695849e-05, + "loss": 0.128814697265625, + "step": 620 + }, + { + "epoch": 0.08653243224413015, + "grad_norm": 4.6885247230529785, + "learning_rate": 3.997409085042358e-05, + "loss": 0.11867904663085938, + "step": 621 + }, + { + "epoch": 0.08667177593534453, + "grad_norm": 3.040632963180542, + "learning_rate": 3.997360893610169e-05, + "loss": 0.129364013671875, + "step": 622 + }, + { + "epoch": 0.08681111962655891, + "grad_norm": 1.2839354276657104, + "learning_rate": 3.99731225840999e-05, + "loss": 0.15457534790039062, + "step": 623 + }, + { + "epoch": 0.08695046331777329, + "grad_norm": 0.9281060099601746, + "learning_rate": 3.9972631794526265e-05, + "loss": 0.12524032592773438, + "step": 624 + }, + { + "epoch": 0.08708980700898766, + "grad_norm": 1.7115317583084106, + "learning_rate": 3.9972136567489836e-05, + "loss": 0.13205718994140625, + "step": 625 + }, + { + "epoch": 0.08722915070020205, + "grad_norm": 0.7480992674827576, + "learning_rate": 3.997163690310063e-05, + "loss": 0.10530853271484375, + "step": 626 + }, + { + "epoch": 0.08736849439141643, + "grad_norm": 1.0997077226638794, + "learning_rate": 3.997113280146966e-05, + "loss": 0.1295928955078125, + "step": 627 + }, + { + "epoch": 0.08750783808263081, + "grad_norm": 1.7303742170333862, + "learning_rate": 3.9970624262708934e-05, + "loss": 0.13366317749023438, + "step": 628 + }, + { + "epoch": 0.08764718177384519, + "grad_norm": 1.0268585681915283, + "learning_rate": 3.997011128693143e-05, + "loss": 0.0887908935546875, + "step": 629 + }, + { + "epoch": 0.08778652546505956, + "grad_norm": 0.6037856340408325, + "learning_rate": 3.996959387425113e-05, + "loss": 0.09893417358398438, + "step": 630 + }, + { + "epoch": 0.08792586915627396, + "grad_norm": 1.8382023572921753, + "learning_rate": 3.996907202478298e-05, + "loss": 0.1303882598876953, + "step": 631 + }, + { + "epoch": 0.08806521284748833, + "grad_norm": 1.2984817028045654, + "learning_rate": 3.996854573864293e-05, + "loss": 0.12491989135742188, + "step": 632 + }, + { + "epoch": 0.08820455653870271, + "grad_norm": 2.6226625442504883, + "learning_rate": 3.9968015015947904e-05, + "loss": 0.13571929931640625, + "step": 633 + }, + { + "epoch": 0.08834390022991709, + "grad_norm": 1.1549209356307983, + "learning_rate": 3.996747985681582e-05, + "loss": 0.09842300415039062, + "step": 634 + }, + { + "epoch": 0.08848324392113147, + "grad_norm": 1.9254469871520996, + "learning_rate": 3.9966940261365576e-05, + "loss": 0.126007080078125, + "step": 635 + }, + { + "epoch": 0.08862258761234586, + "grad_norm": 0.9597679972648621, + "learning_rate": 3.996639622971706e-05, + "loss": 0.10372352600097656, + "step": 636 + }, + { + "epoch": 0.08876193130356023, + "grad_norm": 3.118487596511841, + "learning_rate": 3.996584776199114e-05, + "loss": 0.154571533203125, + "step": 637 + }, + { + "epoch": 0.08890127499477461, + "grad_norm": 0.9069197773933411, + "learning_rate": 3.9965294858309685e-05, + "loss": 0.10735130310058594, + "step": 638 + }, + { + "epoch": 0.08904061868598899, + "grad_norm": 1.4038259983062744, + "learning_rate": 3.996473751879552e-05, + "loss": 0.116241455078125, + "step": 639 + }, + { + "epoch": 0.08917996237720337, + "grad_norm": 1.2533254623413086, + "learning_rate": 3.996417574357248e-05, + "loss": 0.12218856811523438, + "step": 640 + }, + { + "epoch": 0.08931930606841776, + "grad_norm": 3.0609092712402344, + "learning_rate": 3.996360953276538e-05, + "loss": 0.13055419921875, + "step": 641 + }, + { + "epoch": 0.08945864975963214, + "grad_norm": 1.1876765489578247, + "learning_rate": 3.996303888650002e-05, + "loss": 0.11069202423095703, + "step": 642 + }, + { + "epoch": 0.08959799345084651, + "grad_norm": 1.6790541410446167, + "learning_rate": 3.996246380490319e-05, + "loss": 0.11560440063476562, + "step": 643 + }, + { + "epoch": 0.08973733714206089, + "grad_norm": 0.8547704219818115, + "learning_rate": 3.996188428810264e-05, + "loss": 0.12773895263671875, + "step": 644 + }, + { + "epoch": 0.08987668083327527, + "grad_norm": 1.1759178638458252, + "learning_rate": 3.9961300336227146e-05, + "loss": 0.13372230529785156, + "step": 645 + }, + { + "epoch": 0.09001602452448966, + "grad_norm": 1.6206737756729126, + "learning_rate": 3.996071194940644e-05, + "loss": 0.1079559326171875, + "step": 646 + }, + { + "epoch": 0.09015536821570404, + "grad_norm": 0.7383829355239868, + "learning_rate": 3.996011912777126e-05, + "loss": 0.1084136962890625, + "step": 647 + }, + { + "epoch": 0.09029471190691842, + "grad_norm": 1.0509989261627197, + "learning_rate": 3.995952187145329e-05, + "loss": 0.11505317687988281, + "step": 648 + }, + { + "epoch": 0.09043405559813279, + "grad_norm": 2.526982307434082, + "learning_rate": 3.995892018058525e-05, + "loss": 0.11395645141601562, + "step": 649 + }, + { + "epoch": 0.09057339928934717, + "grad_norm": 2.198802947998047, + "learning_rate": 3.995831405530082e-05, + "loss": 0.17830276489257812, + "step": 650 + }, + { + "epoch": 0.09071274298056156, + "grad_norm": 0.8278115391731262, + "learning_rate": 3.9957703495734666e-05, + "loss": 0.1174468994140625, + "step": 651 + }, + { + "epoch": 0.09085208667177594, + "grad_norm": 1.283538818359375, + "learning_rate": 3.9957088502022426e-05, + "loss": 0.1059112548828125, + "step": 652 + }, + { + "epoch": 0.09099143036299032, + "grad_norm": 1.0186541080474854, + "learning_rate": 3.995646907430076e-05, + "loss": 0.12388992309570312, + "step": 653 + }, + { + "epoch": 0.0911307740542047, + "grad_norm": 0.647256076335907, + "learning_rate": 3.995584521270727e-05, + "loss": 0.11123085021972656, + "step": 654 + }, + { + "epoch": 0.09127011774541907, + "grad_norm": 1.8642301559448242, + "learning_rate": 3.995521691738058e-05, + "loss": 0.12607383728027344, + "step": 655 + }, + { + "epoch": 0.09140946143663346, + "grad_norm": 1.06022047996521, + "learning_rate": 3.995458418846028e-05, + "loss": 0.11605453491210938, + "step": 656 + }, + { + "epoch": 0.09154880512784784, + "grad_norm": 1.1503651142120361, + "learning_rate": 3.995394702608693e-05, + "loss": 0.13846206665039062, + "step": 657 + }, + { + "epoch": 0.09168814881906222, + "grad_norm": 0.7887946963310242, + "learning_rate": 3.995330543040212e-05, + "loss": 0.094390869140625, + "step": 658 + }, + { + "epoch": 0.0918274925102766, + "grad_norm": 1.8002413511276245, + "learning_rate": 3.995265940154838e-05, + "loss": 0.13138580322265625, + "step": 659 + }, + { + "epoch": 0.09196683620149097, + "grad_norm": 0.6641622185707092, + "learning_rate": 3.995200893966925e-05, + "loss": 0.13469505310058594, + "step": 660 + }, + { + "epoch": 0.09210617989270536, + "grad_norm": 1.4932410717010498, + "learning_rate": 3.9951354044909246e-05, + "loss": 0.1217803955078125, + "step": 661 + }, + { + "epoch": 0.09224552358391974, + "grad_norm": 0.978024959564209, + "learning_rate": 3.995069471741387e-05, + "loss": 0.12249374389648438, + "step": 662 + }, + { + "epoch": 0.09238486727513412, + "grad_norm": 1.7842825651168823, + "learning_rate": 3.9950030957329604e-05, + "loss": 0.13315963745117188, + "step": 663 + }, + { + "epoch": 0.0925242109663485, + "grad_norm": 1.1142187118530273, + "learning_rate": 3.9949362764803934e-05, + "loss": 0.10288429260253906, + "step": 664 + }, + { + "epoch": 0.09266355465756287, + "grad_norm": 0.7137280106544495, + "learning_rate": 3.9948690139985305e-05, + "loss": 0.10126876831054688, + "step": 665 + }, + { + "epoch": 0.09280289834877727, + "grad_norm": 3.2856662273406982, + "learning_rate": 3.994801308302316e-05, + "loss": 0.09999465942382812, + "step": 666 + }, + { + "epoch": 0.09294224203999164, + "grad_norm": 3.529743194580078, + "learning_rate": 3.994733159406794e-05, + "loss": 0.12188148498535156, + "step": 667 + }, + { + "epoch": 0.09308158573120602, + "grad_norm": 4.317653656005859, + "learning_rate": 3.9946645673271034e-05, + "loss": 0.16497802734375, + "step": 668 + }, + { + "epoch": 0.0932209294224204, + "grad_norm": 2.3287761211395264, + "learning_rate": 3.994595532078486e-05, + "loss": 0.12685012817382812, + "step": 669 + }, + { + "epoch": 0.09336027311363478, + "grad_norm": 1.8077071905136108, + "learning_rate": 3.9945260536762775e-05, + "loss": 0.09293746948242188, + "step": 670 + }, + { + "epoch": 0.09349961680484917, + "grad_norm": 3.8554933071136475, + "learning_rate": 3.994456132135916e-05, + "loss": 0.1584300994873047, + "step": 671 + }, + { + "epoch": 0.09363896049606354, + "grad_norm": 3.563399314880371, + "learning_rate": 3.994385767472937e-05, + "loss": 0.14679336547851562, + "step": 672 + }, + { + "epoch": 0.09377830418727792, + "grad_norm": 2.3390536308288574, + "learning_rate": 3.9943149597029724e-05, + "loss": 0.11171531677246094, + "step": 673 + }, + { + "epoch": 0.0939176478784923, + "grad_norm": 0.9687732458114624, + "learning_rate": 3.994243708841755e-05, + "loss": 0.13140106201171875, + "step": 674 + }, + { + "epoch": 0.09405699156970668, + "grad_norm": 1.8634510040283203, + "learning_rate": 3.9941720149051146e-05, + "loss": 0.14212417602539062, + "step": 675 + }, + { + "epoch": 0.09419633526092107, + "grad_norm": 4.657368183135986, + "learning_rate": 3.99409987790898e-05, + "loss": 0.14390182495117188, + "step": 676 + }, + { + "epoch": 0.09433567895213545, + "grad_norm": 3.0431559085845947, + "learning_rate": 3.9940272978693795e-05, + "loss": 0.10838890075683594, + "step": 677 + }, + { + "epoch": 0.09447502264334982, + "grad_norm": 3.7887074947357178, + "learning_rate": 3.993954274802437e-05, + "loss": 0.16521644592285156, + "step": 678 + }, + { + "epoch": 0.0946143663345642, + "grad_norm": 1.4805834293365479, + "learning_rate": 3.993880808724378e-05, + "loss": 0.1354351043701172, + "step": 679 + }, + { + "epoch": 0.09475371002577858, + "grad_norm": 2.151493549346924, + "learning_rate": 3.993806899651524e-05, + "loss": 0.1222991943359375, + "step": 680 + }, + { + "epoch": 0.09489305371699297, + "grad_norm": 2.7471535205841064, + "learning_rate": 3.9937325476002955e-05, + "loss": 0.110748291015625, + "step": 681 + }, + { + "epoch": 0.09503239740820735, + "grad_norm": 2.282899856567383, + "learning_rate": 3.993657752587214e-05, + "loss": 0.108673095703125, + "step": 682 + }, + { + "epoch": 0.09517174109942172, + "grad_norm": 1.6148349046707153, + "learning_rate": 3.993582514628895e-05, + "loss": 0.13756561279296875, + "step": 683 + }, + { + "epoch": 0.0953110847906361, + "grad_norm": 0.6688312888145447, + "learning_rate": 3.9935068337420564e-05, + "loss": 0.09497451782226562, + "step": 684 + }, + { + "epoch": 0.09545042848185048, + "grad_norm": 1.4547045230865479, + "learning_rate": 3.99343070994351e-05, + "loss": 0.08923149108886719, + "step": 685 + }, + { + "epoch": 0.09558977217306487, + "grad_norm": 2.299072265625, + "learning_rate": 3.993354143250173e-05, + "loss": 0.10817718505859375, + "step": 686 + }, + { + "epoch": 0.09572911586427925, + "grad_norm": 1.1764777898788452, + "learning_rate": 3.993277133679053e-05, + "loss": 0.11371994018554688, + "step": 687 + }, + { + "epoch": 0.09586845955549363, + "grad_norm": 0.45177915692329407, + "learning_rate": 3.993199681247261e-05, + "loss": 0.10789108276367188, + "step": 688 + }, + { + "epoch": 0.096007803246708, + "grad_norm": 3.1566860675811768, + "learning_rate": 3.9931217859720066e-05, + "loss": 0.13521575927734375, + "step": 689 + }, + { + "epoch": 0.09614714693792238, + "grad_norm": 1.7835997343063354, + "learning_rate": 3.993043447870594e-05, + "loss": 0.13779640197753906, + "step": 690 + }, + { + "epoch": 0.09628649062913676, + "grad_norm": 1.1598689556121826, + "learning_rate": 3.99296466696043e-05, + "loss": 0.10861778259277344, + "step": 691 + }, + { + "epoch": 0.09642583432035115, + "grad_norm": 1.0363069772720337, + "learning_rate": 3.9928854432590166e-05, + "loss": 0.12509536743164062, + "step": 692 + }, + { + "epoch": 0.09656517801156553, + "grad_norm": 2.5012741088867188, + "learning_rate": 3.9928057767839565e-05, + "loss": 0.11149787902832031, + "step": 693 + }, + { + "epoch": 0.0967045217027799, + "grad_norm": 1.8937814235687256, + "learning_rate": 3.992725667552948e-05, + "loss": 0.10459709167480469, + "step": 694 + }, + { + "epoch": 0.09684386539399428, + "grad_norm": 4.392162799835205, + "learning_rate": 3.9926451155837925e-05, + "loss": 0.1357555389404297, + "step": 695 + }, + { + "epoch": 0.09698320908520866, + "grad_norm": 1.8870683908462524, + "learning_rate": 3.9925641208943846e-05, + "loss": 0.12191581726074219, + "step": 696 + }, + { + "epoch": 0.09712255277642305, + "grad_norm": 0.6833701133728027, + "learning_rate": 3.99248268350272e-05, + "loss": 0.1093292236328125, + "step": 697 + }, + { + "epoch": 0.09726189646763743, + "grad_norm": 2.429398775100708, + "learning_rate": 3.992400803426892e-05, + "loss": 0.12384796142578125, + "step": 698 + }, + { + "epoch": 0.0974012401588518, + "grad_norm": 1.6516509056091309, + "learning_rate": 3.992318480685094e-05, + "loss": 0.11532974243164062, + "step": 699 + }, + { + "epoch": 0.09754058385006618, + "grad_norm": 1.371292233467102, + "learning_rate": 3.992235715295614e-05, + "loss": 0.12771987915039062, + "step": 700 + }, + { + "epoch": 0.09767992754128056, + "grad_norm": 0.9481193423271179, + "learning_rate": 3.992152507276841e-05, + "loss": 0.11787796020507812, + "step": 701 + }, + { + "epoch": 0.09781927123249495, + "grad_norm": 2.2556209564208984, + "learning_rate": 3.9920688566472636e-05, + "loss": 0.1302947998046875, + "step": 702 + }, + { + "epoch": 0.09795861492370933, + "grad_norm": 1.8391849994659424, + "learning_rate": 3.991984763425465e-05, + "loss": 0.11295318603515625, + "step": 703 + }, + { + "epoch": 0.09809795861492371, + "grad_norm": 1.2746561765670776, + "learning_rate": 3.99190022763013e-05, + "loss": 0.11462020874023438, + "step": 704 + }, + { + "epoch": 0.09823730230613809, + "grad_norm": 1.6634970903396606, + "learning_rate": 3.991815249280041e-05, + "loss": 0.112945556640625, + "step": 705 + }, + { + "epoch": 0.09837664599735246, + "grad_norm": 1.1794337034225464, + "learning_rate": 3.991729828394078e-05, + "loss": 0.11040496826171875, + "step": 706 + }, + { + "epoch": 0.09851598968856685, + "grad_norm": 1.8499194383621216, + "learning_rate": 3.9916439649912175e-05, + "loss": 0.1140594482421875, + "step": 707 + }, + { + "epoch": 0.09865533337978123, + "grad_norm": 2.373086452484131, + "learning_rate": 3.9915576590905385e-05, + "loss": 0.16961669921875, + "step": 708 + }, + { + "epoch": 0.09879467707099561, + "grad_norm": 1.4163334369659424, + "learning_rate": 3.991470910711216e-05, + "loss": 0.10801315307617188, + "step": 709 + }, + { + "epoch": 0.09893402076220999, + "grad_norm": 0.6193930506706238, + "learning_rate": 3.9913837198725224e-05, + "loss": 0.12310218811035156, + "step": 710 + }, + { + "epoch": 0.09907336445342436, + "grad_norm": 1.7312260866165161, + "learning_rate": 3.9912960865938316e-05, + "loss": 0.1205291748046875, + "step": 711 + }, + { + "epoch": 0.09921270814463876, + "grad_norm": 2.300800323486328, + "learning_rate": 3.9912080108946115e-05, + "loss": 0.12269973754882812, + "step": 712 + }, + { + "epoch": 0.09935205183585313, + "grad_norm": 0.6431316137313843, + "learning_rate": 3.9911194927944315e-05, + "loss": 0.09942245483398438, + "step": 713 + }, + { + "epoch": 0.09949139552706751, + "grad_norm": 1.694626808166504, + "learning_rate": 3.991030532312959e-05, + "loss": 0.12820816040039062, + "step": 714 + }, + { + "epoch": 0.09963073921828189, + "grad_norm": 0.7608662843704224, + "learning_rate": 3.990941129469957e-05, + "loss": 0.116729736328125, + "step": 715 + }, + { + "epoch": 0.09977008290949627, + "grad_norm": 0.9443773031234741, + "learning_rate": 3.9908512842852906e-05, + "loss": 0.11104393005371094, + "step": 716 + }, + { + "epoch": 0.09990942660071066, + "grad_norm": 1.302150845527649, + "learning_rate": 3.990760996778921e-05, + "loss": 0.11767578125, + "step": 717 + }, + { + "epoch": 0.10004877029192503, + "grad_norm": 1.1279939413070679, + "learning_rate": 3.9906702669709074e-05, + "loss": 0.1015777587890625, + "step": 718 + }, + { + "epoch": 0.10018811398313941, + "grad_norm": 1.566367268562317, + "learning_rate": 3.9905790948814086e-05, + "loss": 0.13325119018554688, + "step": 719 + }, + { + "epoch": 0.10032745767435379, + "grad_norm": 0.959258496761322, + "learning_rate": 3.9904874805306804e-05, + "loss": 0.10112762451171875, + "step": 720 + }, + { + "epoch": 0.10046680136556817, + "grad_norm": 1.1357834339141846, + "learning_rate": 3.990395423939077e-05, + "loss": 0.11501884460449219, + "step": 721 + }, + { + "epoch": 0.10060614505678256, + "grad_norm": 0.680083692073822, + "learning_rate": 3.9903029251270535e-05, + "loss": 0.0945281982421875, + "step": 722 + }, + { + "epoch": 0.10074548874799694, + "grad_norm": 1.5274698734283447, + "learning_rate": 3.990209984115158e-05, + "loss": 0.12396621704101562, + "step": 723 + }, + { + "epoch": 0.10088483243921131, + "grad_norm": 0.81153404712677, + "learning_rate": 3.990116600924042e-05, + "loss": 0.10335731506347656, + "step": 724 + }, + { + "epoch": 0.10102417613042569, + "grad_norm": 1.2719149589538574, + "learning_rate": 3.9900227755744515e-05, + "loss": 0.09760665893554688, + "step": 725 + }, + { + "epoch": 0.10116351982164007, + "grad_norm": 0.845045804977417, + "learning_rate": 3.9899285080872346e-05, + "loss": 0.1549072265625, + "step": 726 + }, + { + "epoch": 0.10130286351285446, + "grad_norm": 0.880206286907196, + "learning_rate": 3.9898337984833334e-05, + "loss": 0.10470962524414062, + "step": 727 + }, + { + "epoch": 0.10144220720406884, + "grad_norm": 0.8191313743591309, + "learning_rate": 3.98973864678379e-05, + "loss": 0.1293773651123047, + "step": 728 + }, + { + "epoch": 0.10158155089528322, + "grad_norm": 1.7954108715057373, + "learning_rate": 3.989643053009747e-05, + "loss": 0.12075614929199219, + "step": 729 + }, + { + "epoch": 0.10172089458649759, + "grad_norm": 1.9443460702896118, + "learning_rate": 3.989547017182442e-05, + "loss": 0.11622238159179688, + "step": 730 + }, + { + "epoch": 0.10186023827771197, + "grad_norm": 1.607010841369629, + "learning_rate": 3.989450539323211e-05, + "loss": 0.11197662353515625, + "step": 731 + }, + { + "epoch": 0.10199958196892636, + "grad_norm": 2.294772148132324, + "learning_rate": 3.989353619453491e-05, + "loss": 0.11856842041015625, + "step": 732 + }, + { + "epoch": 0.10213892566014074, + "grad_norm": 2.1026904582977295, + "learning_rate": 3.989256257594814e-05, + "loss": 0.10171890258789062, + "step": 733 + }, + { + "epoch": 0.10227826935135512, + "grad_norm": 3.8580310344696045, + "learning_rate": 3.989158453768812e-05, + "loss": 0.15116500854492188, + "step": 734 + }, + { + "epoch": 0.1024176130425695, + "grad_norm": 1.6572355031967163, + "learning_rate": 3.989060207997215e-05, + "loss": 0.10948753356933594, + "step": 735 + }, + { + "epoch": 0.10255695673378387, + "grad_norm": 1.17807936668396, + "learning_rate": 3.98896152030185e-05, + "loss": 0.11832427978515625, + "step": 736 + }, + { + "epoch": 0.10269630042499826, + "grad_norm": 1.4838861227035522, + "learning_rate": 3.988862390704645e-05, + "loss": 0.09574699401855469, + "step": 737 + }, + { + "epoch": 0.10283564411621264, + "grad_norm": 3.0549793243408203, + "learning_rate": 3.988762819227623e-05, + "loss": 0.11419868469238281, + "step": 738 + }, + { + "epoch": 0.10297498780742702, + "grad_norm": 1.260387659072876, + "learning_rate": 3.988662805892907e-05, + "loss": 0.13298416137695312, + "step": 739 + }, + { + "epoch": 0.1031143314986414, + "grad_norm": 0.5978083610534668, + "learning_rate": 3.988562350722717e-05, + "loss": 0.10877418518066406, + "step": 740 + }, + { + "epoch": 0.10325367518985577, + "grad_norm": 1.9968520402908325, + "learning_rate": 3.9884614537393724e-05, + "loss": 0.0991668701171875, + "step": 741 + }, + { + "epoch": 0.10339301888107016, + "grad_norm": 1.5601428747177124, + "learning_rate": 3.98836011496529e-05, + "loss": 0.11505126953125, + "step": 742 + }, + { + "epoch": 0.10353236257228454, + "grad_norm": 0.8996284604072571, + "learning_rate": 3.9882583344229856e-05, + "loss": 0.11968612670898438, + "step": 743 + }, + { + "epoch": 0.10367170626349892, + "grad_norm": 0.7764251232147217, + "learning_rate": 3.9881561121350725e-05, + "loss": 0.1281890869140625, + "step": 744 + }, + { + "epoch": 0.1038110499547133, + "grad_norm": 1.0469118356704712, + "learning_rate": 3.988053448124261e-05, + "loss": 0.12379074096679688, + "step": 745 + }, + { + "epoch": 0.10395039364592767, + "grad_norm": 1.9334012269973755, + "learning_rate": 3.9879503424133606e-05, + "loss": 0.1268024444580078, + "step": 746 + }, + { + "epoch": 0.10408973733714207, + "grad_norm": 0.6024311780929565, + "learning_rate": 3.9878467950252807e-05, + "loss": 0.114349365234375, + "step": 747 + }, + { + "epoch": 0.10422908102835644, + "grad_norm": 0.5578605532646179, + "learning_rate": 3.9877428059830256e-05, + "loss": 0.08853721618652344, + "step": 748 + }, + { + "epoch": 0.10436842471957082, + "grad_norm": 0.9022793173789978, + "learning_rate": 3.9876383753097004e-05, + "loss": 0.13303756713867188, + "step": 749 + }, + { + "epoch": 0.1045077684107852, + "grad_norm": 1.331174612045288, + "learning_rate": 3.987533503028507e-05, + "loss": 0.10099220275878906, + "step": 750 + }, + { + "epoch": 0.10464711210199958, + "grad_norm": 1.0141291618347168, + "learning_rate": 3.987428189162745e-05, + "loss": 0.10264205932617188, + "step": 751 + }, + { + "epoch": 0.10478645579321397, + "grad_norm": 0.7063027620315552, + "learning_rate": 3.9873224337358134e-05, + "loss": 0.12443161010742188, + "step": 752 + }, + { + "epoch": 0.10492579948442834, + "grad_norm": 1.4701614379882812, + "learning_rate": 3.987216236771208e-05, + "loss": 0.12478256225585938, + "step": 753 + }, + { + "epoch": 0.10506514317564272, + "grad_norm": 1.4128625392913818, + "learning_rate": 3.987109598292524e-05, + "loss": 0.12109375, + "step": 754 + }, + { + "epoch": 0.1052044868668571, + "grad_norm": 1.400772213935852, + "learning_rate": 3.9870025183234536e-05, + "loss": 0.10844039916992188, + "step": 755 + }, + { + "epoch": 0.10534383055807148, + "grad_norm": 1.298674464225769, + "learning_rate": 3.986894996887788e-05, + "loss": 0.12143707275390625, + "step": 756 + }, + { + "epoch": 0.10548317424928587, + "grad_norm": 0.5277110934257507, + "learning_rate": 3.986787034009416e-05, + "loss": 0.09788131713867188, + "step": 757 + }, + { + "epoch": 0.10562251794050025, + "grad_norm": 0.9235762357711792, + "learning_rate": 3.986678629712323e-05, + "loss": 0.11962699890136719, + "step": 758 + }, + { + "epoch": 0.10576186163171462, + "grad_norm": 5.468353748321533, + "learning_rate": 3.9865697840205955e-05, + "loss": 0.14556121826171875, + "step": 759 + }, + { + "epoch": 0.105901205322929, + "grad_norm": 2.2149393558502197, + "learning_rate": 3.986460496958416e-05, + "loss": 0.12555313110351562, + "step": 760 + }, + { + "epoch": 0.10604054901414338, + "grad_norm": 0.7473896741867065, + "learning_rate": 3.986350768550066e-05, + "loss": 0.10676193237304688, + "step": 761 + }, + { + "epoch": 0.10617989270535777, + "grad_norm": 1.097351312637329, + "learning_rate": 3.986240598819925e-05, + "loss": 0.11294937133789062, + "step": 762 + }, + { + "epoch": 0.10631923639657215, + "grad_norm": 0.7422447204589844, + "learning_rate": 3.986129987792469e-05, + "loss": 0.10520172119140625, + "step": 763 + }, + { + "epoch": 0.10645858008778653, + "grad_norm": 1.7692689895629883, + "learning_rate": 3.986018935492274e-05, + "loss": 0.14983367919921875, + "step": 764 + }, + { + "epoch": 0.1065979237790009, + "grad_norm": 0.5439603924751282, + "learning_rate": 3.985907441944013e-05, + "loss": 0.08978271484375, + "step": 765 + }, + { + "epoch": 0.10673726747021528, + "grad_norm": 1.8087096214294434, + "learning_rate": 3.9857955071724575e-05, + "loss": 0.11293792724609375, + "step": 766 + }, + { + "epoch": 0.10687661116142967, + "grad_norm": 1.4157307147979736, + "learning_rate": 3.9856831312024765e-05, + "loss": 0.11771202087402344, + "step": 767 + }, + { + "epoch": 0.10701595485264405, + "grad_norm": 0.8156030178070068, + "learning_rate": 3.985570314059038e-05, + "loss": 0.10356521606445312, + "step": 768 + }, + { + "epoch": 0.10715529854385843, + "grad_norm": 1.1175578832626343, + "learning_rate": 3.9854570557672073e-05, + "loss": 0.11612701416015625, + "step": 769 + }, + { + "epoch": 0.1072946422350728, + "grad_norm": 1.7102527618408203, + "learning_rate": 3.985343356352147e-05, + "loss": 0.10236835479736328, + "step": 770 + }, + { + "epoch": 0.10743398592628718, + "grad_norm": 0.5460636615753174, + "learning_rate": 3.985229215839119e-05, + "loss": 0.1242523193359375, + "step": 771 + }, + { + "epoch": 0.10757332961750157, + "grad_norm": 0.5472357273101807, + "learning_rate": 3.985114634253483e-05, + "loss": 0.09655380249023438, + "step": 772 + }, + { + "epoch": 0.10771267330871595, + "grad_norm": 0.750043511390686, + "learning_rate": 3.9849996116206966e-05, + "loss": 0.09886932373046875, + "step": 773 + }, + { + "epoch": 0.10785201699993033, + "grad_norm": 1.0187790393829346, + "learning_rate": 3.9848841479663146e-05, + "loss": 0.11560821533203125, + "step": 774 + }, + { + "epoch": 0.1079913606911447, + "grad_norm": 0.7035531401634216, + "learning_rate": 3.984768243315991e-05, + "loss": 0.09477043151855469, + "step": 775 + }, + { + "epoch": 0.10813070438235908, + "grad_norm": 1.1609158515930176, + "learning_rate": 3.984651897695476e-05, + "loss": 0.12434768676757812, + "step": 776 + }, + { + "epoch": 0.10827004807357347, + "grad_norm": 0.6006178259849548, + "learning_rate": 3.9845351111306196e-05, + "loss": 0.10212326049804688, + "step": 777 + }, + { + "epoch": 0.10840939176478785, + "grad_norm": 1.079113245010376, + "learning_rate": 3.98441788364737e-05, + "loss": 0.12759017944335938, + "step": 778 + }, + { + "epoch": 0.10854873545600223, + "grad_norm": 0.7698569893836975, + "learning_rate": 3.984300215271771e-05, + "loss": 0.0953521728515625, + "step": 779 + }, + { + "epoch": 0.1086880791472166, + "grad_norm": 0.445854127407074, + "learning_rate": 3.984182106029967e-05, + "loss": 0.09328460693359375, + "step": 780 + }, + { + "epoch": 0.10882742283843098, + "grad_norm": 1.437705636024475, + "learning_rate": 3.984063555948198e-05, + "loss": 0.10561561584472656, + "step": 781 + }, + { + "epoch": 0.10896676652964538, + "grad_norm": 0.6815686821937561, + "learning_rate": 3.9839445650528046e-05, + "loss": 0.11938858032226562, + "step": 782 + }, + { + "epoch": 0.10910611022085975, + "grad_norm": 1.1238652467727661, + "learning_rate": 3.983825133370223e-05, + "loss": 0.12368392944335938, + "step": 783 + }, + { + "epoch": 0.10924545391207413, + "grad_norm": 0.5675063133239746, + "learning_rate": 3.983705260926988e-05, + "loss": 0.12426376342773438, + "step": 784 + }, + { + "epoch": 0.10938479760328851, + "grad_norm": 1.390886664390564, + "learning_rate": 3.983584947749733e-05, + "loss": 0.12659072875976562, + "step": 785 + }, + { + "epoch": 0.10952414129450289, + "grad_norm": 1.464176893234253, + "learning_rate": 3.983464193865188e-05, + "loss": 0.10884284973144531, + "step": 786 + }, + { + "epoch": 0.10966348498571728, + "grad_norm": 0.45712319016456604, + "learning_rate": 3.9833429993001827e-05, + "loss": 0.0956268310546875, + "step": 787 + }, + { + "epoch": 0.10980282867693165, + "grad_norm": 2.5166823863983154, + "learning_rate": 3.983221364081644e-05, + "loss": 0.13232421875, + "step": 788 + }, + { + "epoch": 0.10994217236814603, + "grad_norm": 2.1168603897094727, + "learning_rate": 3.983099288236595e-05, + "loss": 0.11557865142822266, + "step": 789 + }, + { + "epoch": 0.11008151605936041, + "grad_norm": 2.4503884315490723, + "learning_rate": 3.98297677179216e-05, + "loss": 0.14318084716796875, + "step": 790 + }, + { + "epoch": 0.11022085975057479, + "grad_norm": 0.8684201836585999, + "learning_rate": 3.982853814775558e-05, + "loss": 0.13887405395507812, + "step": 791 + }, + { + "epoch": 0.11036020344178918, + "grad_norm": 1.518127679824829, + "learning_rate": 3.982730417214107e-05, + "loss": 0.11257553100585938, + "step": 792 + }, + { + "epoch": 0.11049954713300356, + "grad_norm": 4.417276859283447, + "learning_rate": 3.982606579135225e-05, + "loss": 0.13979339599609375, + "step": 793 + }, + { + "epoch": 0.11063889082421793, + "grad_norm": 2.069187879562378, + "learning_rate": 3.982482300566424e-05, + "loss": 0.10201644897460938, + "step": 794 + }, + { + "epoch": 0.11077823451543231, + "grad_norm": 0.9190155863761902, + "learning_rate": 3.982357581535317e-05, + "loss": 0.11325454711914062, + "step": 795 + }, + { + "epoch": 0.11091757820664669, + "grad_norm": 0.6466993689537048, + "learning_rate": 3.9822324220696134e-05, + "loss": 0.12409591674804688, + "step": 796 + }, + { + "epoch": 0.11105692189786108, + "grad_norm": 1.38777756690979, + "learning_rate": 3.98210682219712e-05, + "loss": 0.11224746704101562, + "step": 797 + }, + { + "epoch": 0.11119626558907546, + "grad_norm": 1.530794620513916, + "learning_rate": 3.9819807819457444e-05, + "loss": 0.1472759246826172, + "step": 798 + }, + { + "epoch": 0.11133560928028984, + "grad_norm": 0.5411010384559631, + "learning_rate": 3.9818543013434874e-05, + "loss": 0.09783363342285156, + "step": 799 + }, + { + "epoch": 0.11147495297150421, + "grad_norm": 0.6684409379959106, + "learning_rate": 3.9817273804184514e-05, + "loss": 0.09439849853515625, + "step": 800 + }, + { + "epoch": 0.11161429666271859, + "grad_norm": 0.9727270007133484, + "learning_rate": 3.981600019198835e-05, + "loss": 0.13311195373535156, + "step": 801 + }, + { + "epoch": 0.11175364035393298, + "grad_norm": 0.8122290968894958, + "learning_rate": 3.981472217712935e-05, + "loss": 0.1121063232421875, + "step": 802 + }, + { + "epoch": 0.11189298404514736, + "grad_norm": 1.5393226146697998, + "learning_rate": 3.9813439759891466e-05, + "loss": 0.10486221313476562, + "step": 803 + }, + { + "epoch": 0.11203232773636174, + "grad_norm": 1.6558761596679688, + "learning_rate": 3.981215294055961e-05, + "loss": 0.11580276489257812, + "step": 804 + }, + { + "epoch": 0.11217167142757611, + "grad_norm": 0.8459354043006897, + "learning_rate": 3.981086171941969e-05, + "loss": 0.11832237243652344, + "step": 805 + }, + { + "epoch": 0.11231101511879049, + "grad_norm": 0.4636647403240204, + "learning_rate": 3.9809566096758586e-05, + "loss": 0.098724365234375, + "step": 806 + }, + { + "epoch": 0.11245035881000488, + "grad_norm": 2.109541416168213, + "learning_rate": 3.9808266072864156e-05, + "loss": 0.13342666625976562, + "step": 807 + }, + { + "epoch": 0.11258970250121926, + "grad_norm": 1.2662583589553833, + "learning_rate": 3.980696164802523e-05, + "loss": 0.11121749877929688, + "step": 808 + }, + { + "epoch": 0.11272904619243364, + "grad_norm": 1.4285516738891602, + "learning_rate": 3.980565282253164e-05, + "loss": 0.12354278564453125, + "step": 809 + }, + { + "epoch": 0.11286838988364802, + "grad_norm": 2.461992025375366, + "learning_rate": 3.9804339596674146e-05, + "loss": 0.11805343627929688, + "step": 810 + }, + { + "epoch": 0.11300773357486239, + "grad_norm": 0.8118888139724731, + "learning_rate": 3.980302197074455e-05, + "loss": 0.09537315368652344, + "step": 811 + }, + { + "epoch": 0.11314707726607678, + "grad_norm": 1.2067972421646118, + "learning_rate": 3.9801699945035573e-05, + "loss": 0.12611007690429688, + "step": 812 + }, + { + "epoch": 0.11328642095729116, + "grad_norm": 1.0801621675491333, + "learning_rate": 3.980037351984095e-05, + "loss": 0.10651016235351562, + "step": 813 + }, + { + "epoch": 0.11342576464850554, + "grad_norm": 0.5771119594573975, + "learning_rate": 3.979904269545538e-05, + "loss": 0.12457084655761719, + "step": 814 + }, + { + "epoch": 0.11356510833971992, + "grad_norm": 3.148482084274292, + "learning_rate": 3.979770747217455e-05, + "loss": 0.12362480163574219, + "step": 815 + }, + { + "epoch": 0.1137044520309343, + "grad_norm": 1.602010726928711, + "learning_rate": 3.97963678502951e-05, + "loss": 0.09930229187011719, + "step": 816 + }, + { + "epoch": 0.11384379572214869, + "grad_norm": 0.6582207679748535, + "learning_rate": 3.979502383011468e-05, + "loss": 0.08854103088378906, + "step": 817 + }, + { + "epoch": 0.11398313941336306, + "grad_norm": 1.8090646266937256, + "learning_rate": 3.979367541193189e-05, + "loss": 0.11098861694335938, + "step": 818 + }, + { + "epoch": 0.11412248310457744, + "grad_norm": 0.8114482760429382, + "learning_rate": 3.9792322596046326e-05, + "loss": 0.10876083374023438, + "step": 819 + }, + { + "epoch": 0.11426182679579182, + "grad_norm": 1.5681824684143066, + "learning_rate": 3.979096538275854e-05, + "loss": 0.1181488037109375, + "step": 820 + }, + { + "epoch": 0.1144011704870062, + "grad_norm": 2.249469041824341, + "learning_rate": 3.978960377237009e-05, + "loss": 0.14566421508789062, + "step": 821 + }, + { + "epoch": 0.11454051417822059, + "grad_norm": 0.8656013607978821, + "learning_rate": 3.978823776518348e-05, + "loss": 0.1447620391845703, + "step": 822 + }, + { + "epoch": 0.11467985786943496, + "grad_norm": 0.6018960475921631, + "learning_rate": 3.978686736150221e-05, + "loss": 0.14219284057617188, + "step": 823 + }, + { + "epoch": 0.11481920156064934, + "grad_norm": 2.2050657272338867, + "learning_rate": 3.978549256163075e-05, + "loss": 0.13545989990234375, + "step": 824 + }, + { + "epoch": 0.11495854525186372, + "grad_norm": 1.6313886642456055, + "learning_rate": 3.978411336587457e-05, + "loss": 0.12129020690917969, + "step": 825 + }, + { + "epoch": 0.1150978889430781, + "grad_norm": 0.6123172044754028, + "learning_rate": 3.978272977454006e-05, + "loss": 0.10020160675048828, + "step": 826 + }, + { + "epoch": 0.11523723263429249, + "grad_norm": 0.6657999157905579, + "learning_rate": 3.978134178793465e-05, + "loss": 0.10341453552246094, + "step": 827 + }, + { + "epoch": 0.11537657632550687, + "grad_norm": 0.9471684098243713, + "learning_rate": 3.977994940636671e-05, + "loss": 0.11051559448242188, + "step": 828 + }, + { + "epoch": 0.11551592001672124, + "grad_norm": 0.6803514957427979, + "learning_rate": 3.9778552630145595e-05, + "loss": 0.10254287719726562, + "step": 829 + }, + { + "epoch": 0.11565526370793562, + "grad_norm": 1.080765962600708, + "learning_rate": 3.977715145958164e-05, + "loss": 0.151123046875, + "step": 830 + }, + { + "epoch": 0.11579460739915, + "grad_norm": 1.7226406335830688, + "learning_rate": 3.9775745894986155e-05, + "loss": 0.1195526123046875, + "step": 831 + }, + { + "epoch": 0.11593395109036439, + "grad_norm": 2.2186365127563477, + "learning_rate": 3.9774335936671414e-05, + "loss": 0.13119125366210938, + "step": 832 + }, + { + "epoch": 0.11607329478157877, + "grad_norm": 1.9527571201324463, + "learning_rate": 3.977292158495068e-05, + "loss": 0.13148880004882812, + "step": 833 + }, + { + "epoch": 0.11621263847279314, + "grad_norm": 0.9852009415626526, + "learning_rate": 3.9771502840138196e-05, + "loss": 0.10406684875488281, + "step": 834 + }, + { + "epoch": 0.11635198216400752, + "grad_norm": 0.6154264211654663, + "learning_rate": 3.9770079702549174e-05, + "loss": 0.106964111328125, + "step": 835 + }, + { + "epoch": 0.1164913258552219, + "grad_norm": 0.950122594833374, + "learning_rate": 3.9768652172499804e-05, + "loss": 0.09944534301757812, + "step": 836 + }, + { + "epoch": 0.11663066954643629, + "grad_norm": 0.740610659122467, + "learning_rate": 3.9767220250307244e-05, + "loss": 0.1080780029296875, + "step": 837 + }, + { + "epoch": 0.11677001323765067, + "grad_norm": 1.0316812992095947, + "learning_rate": 3.976578393628963e-05, + "loss": 0.10246086120605469, + "step": 838 + }, + { + "epoch": 0.11690935692886505, + "grad_norm": 0.6668219566345215, + "learning_rate": 3.9764343230766096e-05, + "loss": 0.10525131225585938, + "step": 839 + }, + { + "epoch": 0.11704870062007942, + "grad_norm": 1.016589879989624, + "learning_rate": 3.976289813405672e-05, + "loss": 0.11090278625488281, + "step": 840 + }, + { + "epoch": 0.1171880443112938, + "grad_norm": 0.5902987122535706, + "learning_rate": 3.9761448646482576e-05, + "loss": 0.09747314453125, + "step": 841 + }, + { + "epoch": 0.11732738800250819, + "grad_norm": 0.7813085317611694, + "learning_rate": 3.975999476836571e-05, + "loss": 0.10249900817871094, + "step": 842 + }, + { + "epoch": 0.11746673169372257, + "grad_norm": 1.112872838973999, + "learning_rate": 3.9758536500029116e-05, + "loss": 0.1098785400390625, + "step": 843 + }, + { + "epoch": 0.11760607538493695, + "grad_norm": 0.491186261177063, + "learning_rate": 3.975707384179682e-05, + "loss": 0.11371231079101562, + "step": 844 + }, + { + "epoch": 0.11774541907615133, + "grad_norm": 0.5311031341552734, + "learning_rate": 3.9755606793993776e-05, + "loss": 0.11507606506347656, + "step": 845 + }, + { + "epoch": 0.1178847627673657, + "grad_norm": 0.6621170043945312, + "learning_rate": 3.9754135356945934e-05, + "loss": 0.11808395385742188, + "step": 846 + }, + { + "epoch": 0.1180241064585801, + "grad_norm": 0.4611426591873169, + "learning_rate": 3.9752659530980205e-05, + "loss": 0.10037994384765625, + "step": 847 + }, + { + "epoch": 0.11816345014979447, + "grad_norm": 0.6160988807678223, + "learning_rate": 3.975117931642449e-05, + "loss": 0.13685989379882812, + "step": 848 + }, + { + "epoch": 0.11830279384100885, + "grad_norm": 0.7761191129684448, + "learning_rate": 3.9749694713607654e-05, + "loss": 0.10390043258666992, + "step": 849 + }, + { + "epoch": 0.11844213753222323, + "grad_norm": 0.975176990032196, + "learning_rate": 3.974820572285955e-05, + "loss": 0.0965423583984375, + "step": 850 + }, + { + "epoch": 0.1185814812234376, + "grad_norm": 0.802049458026886, + "learning_rate": 3.9746712344510996e-05, + "loss": 0.10572433471679688, + "step": 851 + }, + { + "epoch": 0.118720824914652, + "grad_norm": 0.5088741779327393, + "learning_rate": 3.9745214578893784e-05, + "loss": 0.10759544372558594, + "step": 852 + }, + { + "epoch": 0.11886016860586637, + "grad_norm": 0.7110626697540283, + "learning_rate": 3.974371242634068e-05, + "loss": 0.14133834838867188, + "step": 853 + }, + { + "epoch": 0.11899951229708075, + "grad_norm": 1.185447096824646, + "learning_rate": 3.9742205887185434e-05, + "loss": 0.09471321105957031, + "step": 854 + }, + { + "epoch": 0.11913885598829513, + "grad_norm": 1.0504074096679688, + "learning_rate": 3.974069496176277e-05, + "loss": 0.13141822814941406, + "step": 855 + }, + { + "epoch": 0.1192781996795095, + "grad_norm": 0.7555380463600159, + "learning_rate": 3.973917965040836e-05, + "loss": 0.12518310546875, + "step": 856 + }, + { + "epoch": 0.1194175433707239, + "grad_norm": 0.5758504271507263, + "learning_rate": 3.973765995345889e-05, + "loss": 0.12303543090820312, + "step": 857 + }, + { + "epoch": 0.11955688706193827, + "grad_norm": 0.8818963766098022, + "learning_rate": 3.9736135871251994e-05, + "loss": 0.11772918701171875, + "step": 858 + }, + { + "epoch": 0.11969623075315265, + "grad_norm": 0.7199179530143738, + "learning_rate": 3.9734607404126293e-05, + "loss": 0.12402915954589844, + "step": 859 + }, + { + "epoch": 0.11983557444436703, + "grad_norm": 1.6029106378555298, + "learning_rate": 3.973307455242138e-05, + "loss": 0.09727859497070312, + "step": 860 + }, + { + "epoch": 0.11997491813558141, + "grad_norm": 0.6929355263710022, + "learning_rate": 3.9731537316477806e-05, + "loss": 0.12565231323242188, + "step": 861 + }, + { + "epoch": 0.1201142618267958, + "grad_norm": 0.8705252408981323, + "learning_rate": 3.9729995696637125e-05, + "loss": 0.10021591186523438, + "step": 862 + }, + { + "epoch": 0.12025360551801018, + "grad_norm": 0.4506950378417969, + "learning_rate": 3.972844969324184e-05, + "loss": 0.09520721435546875, + "step": 863 + }, + { + "epoch": 0.12039294920922455, + "grad_norm": 3.3270058631896973, + "learning_rate": 3.9726899306635446e-05, + "loss": 0.17671585083007812, + "step": 864 + }, + { + "epoch": 0.12053229290043893, + "grad_norm": 1.9060680866241455, + "learning_rate": 3.9725344537162394e-05, + "loss": 0.12433242797851562, + "step": 865 + }, + { + "epoch": 0.12067163659165331, + "grad_norm": 1.6091878414154053, + "learning_rate": 3.972378538516813e-05, + "loss": 0.14146041870117188, + "step": 866 + }, + { + "epoch": 0.1208109802828677, + "grad_norm": 1.3433414697647095, + "learning_rate": 3.972222185099905e-05, + "loss": 0.115081787109375, + "step": 867 + }, + { + "epoch": 0.12095032397408208, + "grad_norm": 0.5500553250312805, + "learning_rate": 3.972065393500254e-05, + "loss": 0.09384346008300781, + "step": 868 + }, + { + "epoch": 0.12108966766529645, + "grad_norm": 0.8479167819023132, + "learning_rate": 3.971908163752696e-05, + "loss": 0.12322616577148438, + "step": 869 + }, + { + "epoch": 0.12122901135651083, + "grad_norm": 0.9300147891044617, + "learning_rate": 3.9717504958921634e-05, + "loss": 0.11570167541503906, + "step": 870 + }, + { + "epoch": 0.12136835504772521, + "grad_norm": 2.399371862411499, + "learning_rate": 3.971592389953686e-05, + "loss": 0.12464523315429688, + "step": 871 + }, + { + "epoch": 0.1215076987389396, + "grad_norm": 1.4937114715576172, + "learning_rate": 3.9714338459723924e-05, + "loss": 0.12448692321777344, + "step": 872 + }, + { + "epoch": 0.12164704243015398, + "grad_norm": 1.0245870351791382, + "learning_rate": 3.9712748639835056e-05, + "loss": 0.11182594299316406, + "step": 873 + }, + { + "epoch": 0.12178638612136836, + "grad_norm": 1.325927495956421, + "learning_rate": 3.97111544402235e-05, + "loss": 0.1334972381591797, + "step": 874 + }, + { + "epoch": 0.12192572981258273, + "grad_norm": 0.9411877393722534, + "learning_rate": 3.970955586124344e-05, + "loss": 0.099456787109375, + "step": 875 + }, + { + "epoch": 0.12206507350379711, + "grad_norm": 0.7769520282745361, + "learning_rate": 3.9707952903250045e-05, + "loss": 0.11882591247558594, + "step": 876 + }, + { + "epoch": 0.1222044171950115, + "grad_norm": 0.6269888877868652, + "learning_rate": 3.9706345566599454e-05, + "loss": 0.07987213134765625, + "step": 877 + }, + { + "epoch": 0.12234376088622588, + "grad_norm": 0.9292137622833252, + "learning_rate": 3.9704733851648785e-05, + "loss": 0.11409759521484375, + "step": 878 + }, + { + "epoch": 0.12248310457744026, + "grad_norm": 0.8155890107154846, + "learning_rate": 3.970311775875611e-05, + "loss": 0.12044525146484375, + "step": 879 + }, + { + "epoch": 0.12262244826865464, + "grad_norm": 0.7482179999351501, + "learning_rate": 3.9701497288280506e-05, + "loss": 0.1143798828125, + "step": 880 + }, + { + "epoch": 0.12276179195986901, + "grad_norm": 0.622718870639801, + "learning_rate": 3.9699872440582e-05, + "loss": 0.11348724365234375, + "step": 881 + }, + { + "epoch": 0.1229011356510834, + "grad_norm": 1.3956456184387207, + "learning_rate": 3.969824321602159e-05, + "loss": 0.08522224426269531, + "step": 882 + }, + { + "epoch": 0.12304047934229778, + "grad_norm": 1.0004163980484009, + "learning_rate": 3.969660961496126e-05, + "loss": 0.10439109802246094, + "step": 883 + }, + { + "epoch": 0.12317982303351216, + "grad_norm": 1.0763965845108032, + "learning_rate": 3.969497163776395e-05, + "loss": 0.13083267211914062, + "step": 884 + }, + { + "epoch": 0.12331916672472654, + "grad_norm": 1.2221527099609375, + "learning_rate": 3.9693329284793586e-05, + "loss": 0.1371135711669922, + "step": 885 + }, + { + "epoch": 0.12345851041594091, + "grad_norm": 1.4115592241287231, + "learning_rate": 3.9691682556415064e-05, + "loss": 0.1310138702392578, + "step": 886 + }, + { + "epoch": 0.12359785410715529, + "grad_norm": 0.6806123852729797, + "learning_rate": 3.969003145299424e-05, + "loss": 0.14165306091308594, + "step": 887 + }, + { + "epoch": 0.12373719779836968, + "grad_norm": 1.338171124458313, + "learning_rate": 3.968837597489797e-05, + "loss": 0.12149810791015625, + "step": 888 + }, + { + "epoch": 0.12387654148958406, + "grad_norm": 1.1748696565628052, + "learning_rate": 3.968671612249404e-05, + "loss": 0.12066268920898438, + "step": 889 + }, + { + "epoch": 0.12401588518079844, + "grad_norm": 1.4954332113265991, + "learning_rate": 3.968505189615125e-05, + "loss": 0.1046142578125, + "step": 890 + }, + { + "epoch": 0.12415522887201282, + "grad_norm": 1.477143406867981, + "learning_rate": 3.9683383296239345e-05, + "loss": 0.11077117919921875, + "step": 891 + }, + { + "epoch": 0.12429457256322719, + "grad_norm": 1.8449320793151855, + "learning_rate": 3.968171032312905e-05, + "loss": 0.12347221374511719, + "step": 892 + }, + { + "epoch": 0.12443391625444158, + "grad_norm": 1.4492422342300415, + "learning_rate": 3.968003297719206e-05, + "loss": 0.10310840606689453, + "step": 893 + }, + { + "epoch": 0.12457325994565596, + "grad_norm": 0.5748562812805176, + "learning_rate": 3.9678351258801046e-05, + "loss": 0.0926523208618164, + "step": 894 + }, + { + "epoch": 0.12471260363687034, + "grad_norm": 0.9154125452041626, + "learning_rate": 3.9676665168329655e-05, + "loss": 0.11291313171386719, + "step": 895 + }, + { + "epoch": 0.12485194732808472, + "grad_norm": 1.016230583190918, + "learning_rate": 3.967497470615248e-05, + "loss": 0.11133193969726562, + "step": 896 + }, + { + "epoch": 0.1249912910192991, + "grad_norm": 1.0277421474456787, + "learning_rate": 3.967327987264512e-05, + "loss": 0.11457252502441406, + "step": 897 + }, + { + "epoch": 0.12513063471051347, + "grad_norm": 1.1495734453201294, + "learning_rate": 3.967158066818411e-05, + "loss": 0.1326751708984375, + "step": 898 + }, + { + "epoch": 0.12526997840172785, + "grad_norm": 1.1843676567077637, + "learning_rate": 3.9669877093146995e-05, + "loss": 0.10547637939453125, + "step": 899 + }, + { + "epoch": 0.12540932209294225, + "grad_norm": 0.699892520904541, + "learning_rate": 3.966816914791226e-05, + "loss": 0.10803413391113281, + "step": 900 + }, + { + "epoch": 0.12554866578415663, + "grad_norm": 0.7696776390075684, + "learning_rate": 3.9666456832859365e-05, + "loss": 0.11545181274414062, + "step": 901 + }, + { + "epoch": 0.125688009475371, + "grad_norm": 0.859268069267273, + "learning_rate": 3.966474014836876e-05, + "loss": 0.10674858093261719, + "step": 902 + }, + { + "epoch": 0.1258273531665854, + "grad_norm": 0.7736993432044983, + "learning_rate": 3.9663019094821843e-05, + "loss": 0.14223480224609375, + "step": 903 + }, + { + "epoch": 0.12596669685779976, + "grad_norm": 0.8918118476867676, + "learning_rate": 3.9661293672601006e-05, + "loss": 0.1131591796875, + "step": 904 + }, + { + "epoch": 0.12610604054901414, + "grad_norm": 1.0698210000991821, + "learning_rate": 3.965956388208959e-05, + "loss": 0.1143035888671875, + "step": 905 + }, + { + "epoch": 0.12624538424022852, + "grad_norm": 0.9388188123703003, + "learning_rate": 3.965782972367191e-05, + "loss": 0.11944580078125, + "step": 906 + }, + { + "epoch": 0.1263847279314429, + "grad_norm": 0.6334294676780701, + "learning_rate": 3.965609119773326e-05, + "loss": 0.12298965454101562, + "step": 907 + }, + { + "epoch": 0.12652407162265727, + "grad_norm": 0.5245428085327148, + "learning_rate": 3.9654348304659905e-05, + "loss": 0.09977149963378906, + "step": 908 + }, + { + "epoch": 0.12666341531387165, + "grad_norm": 1.010496735572815, + "learning_rate": 3.965260104483907e-05, + "loss": 0.12683868408203125, + "step": 909 + }, + { + "epoch": 0.12680275900508606, + "grad_norm": 0.5398598909378052, + "learning_rate": 3.965084941865896e-05, + "loss": 0.1012725830078125, + "step": 910 + }, + { + "epoch": 0.12694210269630044, + "grad_norm": 0.9911305904388428, + "learning_rate": 3.964909342650875e-05, + "loss": 0.11532020568847656, + "step": 911 + }, + { + "epoch": 0.1270814463875148, + "grad_norm": 2.310253858566284, + "learning_rate": 3.964733306877857e-05, + "loss": 0.13840484619140625, + "step": 912 + }, + { + "epoch": 0.1272207900787292, + "grad_norm": 0.6033258438110352, + "learning_rate": 3.964556834585954e-05, + "loss": 0.09210205078125, + "step": 913 + }, + { + "epoch": 0.12736013376994357, + "grad_norm": 1.5114418268203735, + "learning_rate": 3.9643799258143745e-05, + "loss": 0.1296234130859375, + "step": 914 + }, + { + "epoch": 0.12749947746115795, + "grad_norm": 0.5797878503799438, + "learning_rate": 3.9642025806024226e-05, + "loss": 0.11158370971679688, + "step": 915 + }, + { + "epoch": 0.12763882115237232, + "grad_norm": 1.1087502241134644, + "learning_rate": 3.964024798989501e-05, + "loss": 0.09055709838867188, + "step": 916 + }, + { + "epoch": 0.1277781648435867, + "grad_norm": 0.525911271572113, + "learning_rate": 3.963846581015109e-05, + "loss": 0.08524131774902344, + "step": 917 + }, + { + "epoch": 0.12791750853480108, + "grad_norm": 0.8735682368278503, + "learning_rate": 3.963667926718841e-05, + "loss": 0.09768295288085938, + "step": 918 + }, + { + "epoch": 0.12805685222601546, + "grad_norm": 1.3456933498382568, + "learning_rate": 3.9634888361403916e-05, + "loss": 0.10442733764648438, + "step": 919 + }, + { + "epoch": 0.12819619591722986, + "grad_norm": 0.7836794257164001, + "learning_rate": 3.96330930931955e-05, + "loss": 0.0962371826171875, + "step": 920 + }, + { + "epoch": 0.12833553960844424, + "grad_norm": 0.7284333109855652, + "learning_rate": 3.963129346296203e-05, + "loss": 0.10160636901855469, + "step": 921 + }, + { + "epoch": 0.12847488329965862, + "grad_norm": 1.060260534286499, + "learning_rate": 3.9629489471103334e-05, + "loss": 0.08993911743164062, + "step": 922 + }, + { + "epoch": 0.128614226990873, + "grad_norm": 0.6368729472160339, + "learning_rate": 3.962768111802023e-05, + "loss": 0.09611892700195312, + "step": 923 + }, + { + "epoch": 0.12875357068208737, + "grad_norm": 0.9469199776649475, + "learning_rate": 3.96258684041145e-05, + "loss": 0.1634654998779297, + "step": 924 + }, + { + "epoch": 0.12889291437330175, + "grad_norm": 1.70189368724823, + "learning_rate": 3.9624051329788875e-05, + "loss": 0.15112876892089844, + "step": 925 + }, + { + "epoch": 0.12903225806451613, + "grad_norm": 0.7292094826698303, + "learning_rate": 3.9622229895447054e-05, + "loss": 0.11292457580566406, + "step": 926 + }, + { + "epoch": 0.1291716017557305, + "grad_norm": 0.7352527379989624, + "learning_rate": 3.962040410149375e-05, + "loss": 0.1237335205078125, + "step": 927 + }, + { + "epoch": 0.12931094544694488, + "grad_norm": 0.8105782866477966, + "learning_rate": 3.961857394833459e-05, + "loss": 0.10524177551269531, + "step": 928 + }, + { + "epoch": 0.12945028913815926, + "grad_norm": 0.5299696922302246, + "learning_rate": 3.96167394363762e-05, + "loss": 0.09292030334472656, + "step": 929 + }, + { + "epoch": 0.12958963282937366, + "grad_norm": 0.4744358956813812, + "learning_rate": 3.9614900566026154e-05, + "loss": 0.09839057922363281, + "step": 930 + }, + { + "epoch": 0.12972897652058804, + "grad_norm": 0.749060332775116, + "learning_rate": 3.961305733769303e-05, + "loss": 0.11085128784179688, + "step": 931 + }, + { + "epoch": 0.12986832021180242, + "grad_norm": 1.4198386669158936, + "learning_rate": 3.961120975178634e-05, + "loss": 0.10490036010742188, + "step": 932 + }, + { + "epoch": 0.1300076639030168, + "grad_norm": 1.6539727449417114, + "learning_rate": 3.960935780871657e-05, + "loss": 0.12621307373046875, + "step": 933 + }, + { + "epoch": 0.13014700759423117, + "grad_norm": 0.7541457414627075, + "learning_rate": 3.9607501508895185e-05, + "loss": 0.1426849365234375, + "step": 934 + }, + { + "epoch": 0.13028635128544555, + "grad_norm": 0.7514516115188599, + "learning_rate": 3.960564085273461e-05, + "loss": 0.10925912857055664, + "step": 935 + }, + { + "epoch": 0.13042569497665993, + "grad_norm": 2.702261209487915, + "learning_rate": 3.9603775840648243e-05, + "loss": 0.15816497802734375, + "step": 936 + }, + { + "epoch": 0.1305650386678743, + "grad_norm": 0.49969300627708435, + "learning_rate": 3.9601906473050446e-05, + "loss": 0.10018539428710938, + "step": 937 + }, + { + "epoch": 0.13070438235908868, + "grad_norm": 0.6740197539329529, + "learning_rate": 3.960003275035655e-05, + "loss": 0.1148080825805664, + "step": 938 + }, + { + "epoch": 0.13084372605030306, + "grad_norm": 1.5184834003448486, + "learning_rate": 3.959815467298285e-05, + "loss": 0.10657501220703125, + "step": 939 + }, + { + "epoch": 0.13098306974151747, + "grad_norm": 0.8972879648208618, + "learning_rate": 3.9596272241346625e-05, + "loss": 0.12562942504882812, + "step": 940 + }, + { + "epoch": 0.13112241343273184, + "grad_norm": 0.7313540577888489, + "learning_rate": 3.959438545586609e-05, + "loss": 0.11937713623046875, + "step": 941 + }, + { + "epoch": 0.13126175712394622, + "grad_norm": 0.7889882326126099, + "learning_rate": 3.959249431696046e-05, + "loss": 0.10939407348632812, + "step": 942 + }, + { + "epoch": 0.1314011008151606, + "grad_norm": 0.5901696681976318, + "learning_rate": 3.9590598825049896e-05, + "loss": 0.08550071716308594, + "step": 943 + }, + { + "epoch": 0.13154044450637498, + "grad_norm": 0.7006765007972717, + "learning_rate": 3.958869898055553e-05, + "loss": 0.12082672119140625, + "step": 944 + }, + { + "epoch": 0.13167978819758935, + "grad_norm": 1.0488924980163574, + "learning_rate": 3.9586794783899464e-05, + "loss": 0.09662818908691406, + "step": 945 + }, + { + "epoch": 0.13181913188880373, + "grad_norm": 0.5573620796203613, + "learning_rate": 3.958488623550478e-05, + "loss": 0.11574172973632812, + "step": 946 + }, + { + "epoch": 0.1319584755800181, + "grad_norm": 0.5051873326301575, + "learning_rate": 3.95829733357955e-05, + "loss": 0.10662841796875, + "step": 947 + }, + { + "epoch": 0.1320978192712325, + "grad_norm": 0.5547820329666138, + "learning_rate": 3.958105608519663e-05, + "loss": 0.09090423583984375, + "step": 948 + }, + { + "epoch": 0.13223716296244686, + "grad_norm": 2.2398440837860107, + "learning_rate": 3.957913448413415e-05, + "loss": 0.12611770629882812, + "step": 949 + }, + { + "epoch": 0.13237650665366127, + "grad_norm": 1.3934921026229858, + "learning_rate": 3.957720853303499e-05, + "loss": 0.15957069396972656, + "step": 950 + }, + { + "epoch": 0.13251585034487565, + "grad_norm": 0.586750864982605, + "learning_rate": 3.9575278232327036e-05, + "loss": 0.10219192504882812, + "step": 951 + }, + { + "epoch": 0.13265519403609002, + "grad_norm": 1.8988627195358276, + "learning_rate": 3.957334358243917e-05, + "loss": 0.1316547393798828, + "step": 952 + }, + { + "epoch": 0.1327945377273044, + "grad_norm": 1.4510811567306519, + "learning_rate": 3.957140458380123e-05, + "loss": 0.11780357360839844, + "step": 953 + }, + { + "epoch": 0.13293388141851878, + "grad_norm": 0.7157463431358337, + "learning_rate": 3.956946123684402e-05, + "loss": 0.10215377807617188, + "step": 954 + }, + { + "epoch": 0.13307322510973316, + "grad_norm": 0.6136739253997803, + "learning_rate": 3.95675135419993e-05, + "loss": 0.10604667663574219, + "step": 955 + }, + { + "epoch": 0.13321256880094753, + "grad_norm": 1.2823508977890015, + "learning_rate": 3.9565561499699795e-05, + "loss": 0.11227798461914062, + "step": 956 + }, + { + "epoch": 0.1333519124921619, + "grad_norm": 2.3199362754821777, + "learning_rate": 3.9563605110379224e-05, + "loss": 0.1218719482421875, + "step": 957 + }, + { + "epoch": 0.1334912561833763, + "grad_norm": 1.9616565704345703, + "learning_rate": 3.956164437447224e-05, + "loss": 0.10147476196289062, + "step": 958 + }, + { + "epoch": 0.13363059987459067, + "grad_norm": 1.0269482135772705, + "learning_rate": 3.955967929241447e-05, + "loss": 0.09922218322753906, + "step": 959 + }, + { + "epoch": 0.13376994356580507, + "grad_norm": 1.047727346420288, + "learning_rate": 3.955770986464253e-05, + "loss": 0.1325359344482422, + "step": 960 + }, + { + "epoch": 0.13390928725701945, + "grad_norm": 2.1604719161987305, + "learning_rate": 3.955573609159395e-05, + "loss": 0.10973167419433594, + "step": 961 + }, + { + "epoch": 0.13404863094823383, + "grad_norm": 2.0188651084899902, + "learning_rate": 3.95537579737073e-05, + "loss": 0.11566448211669922, + "step": 962 + }, + { + "epoch": 0.1341879746394482, + "grad_norm": 1.1468340158462524, + "learning_rate": 3.955177551142202e-05, + "loss": 0.11519432067871094, + "step": 963 + }, + { + "epoch": 0.13432731833066258, + "grad_norm": 0.5684497952461243, + "learning_rate": 3.954978870517861e-05, + "loss": 0.10630035400390625, + "step": 964 + }, + { + "epoch": 0.13446666202187696, + "grad_norm": 0.6943172216415405, + "learning_rate": 3.954779755541848e-05, + "loss": 0.10228729248046875, + "step": 965 + }, + { + "epoch": 0.13460600571309134, + "grad_norm": 1.8208301067352295, + "learning_rate": 3.954580206258402e-05, + "loss": 0.1386394500732422, + "step": 966 + }, + { + "epoch": 0.13474534940430571, + "grad_norm": 0.8284691572189331, + "learning_rate": 3.9543802227118574e-05, + "loss": 0.10860061645507812, + "step": 967 + }, + { + "epoch": 0.1348846930955201, + "grad_norm": 0.49735310673713684, + "learning_rate": 3.954179804946647e-05, + "loss": 0.11211776733398438, + "step": 968 + }, + { + "epoch": 0.13502403678673447, + "grad_norm": 0.8116667866706848, + "learning_rate": 3.953978953007299e-05, + "loss": 0.10866165161132812, + "step": 969 + }, + { + "epoch": 0.13516338047794887, + "grad_norm": 0.5078960061073303, + "learning_rate": 3.953777666938436e-05, + "loss": 0.10389137268066406, + "step": 970 + }, + { + "epoch": 0.13530272416916325, + "grad_norm": 0.7310795187950134, + "learning_rate": 3.953575946784782e-05, + "loss": 0.1147918701171875, + "step": 971 + }, + { + "epoch": 0.13544206786037763, + "grad_norm": 1.0770453214645386, + "learning_rate": 3.953373792591154e-05, + "loss": 0.11897087097167969, + "step": 972 + }, + { + "epoch": 0.135581411551592, + "grad_norm": 0.9468055963516235, + "learning_rate": 3.953171204402465e-05, + "loss": 0.12859535217285156, + "step": 973 + }, + { + "epoch": 0.13572075524280638, + "grad_norm": 0.8375853896141052, + "learning_rate": 3.952968182263726e-05, + "loss": 0.12364387512207031, + "step": 974 + }, + { + "epoch": 0.13586009893402076, + "grad_norm": 0.5607631206512451, + "learning_rate": 3.9527647262200444e-05, + "loss": 0.11432266235351562, + "step": 975 + }, + { + "epoch": 0.13599944262523514, + "grad_norm": 0.5703971982002258, + "learning_rate": 3.9525608363166225e-05, + "loss": 0.11103057861328125, + "step": 976 + }, + { + "epoch": 0.13613878631644952, + "grad_norm": 1.0922409296035767, + "learning_rate": 3.9523565125987606e-05, + "loss": 0.12996482849121094, + "step": 977 + }, + { + "epoch": 0.1362781300076639, + "grad_norm": 0.6918514370918274, + "learning_rate": 3.952151755111855e-05, + "loss": 0.10161018371582031, + "step": 978 + }, + { + "epoch": 0.13641747369887827, + "grad_norm": 0.7833350300788879, + "learning_rate": 3.951946563901397e-05, + "loss": 0.12763595581054688, + "step": 979 + }, + { + "epoch": 0.13655681739009268, + "grad_norm": 0.8319650292396545, + "learning_rate": 3.951740939012977e-05, + "loss": 0.11005401611328125, + "step": 980 + }, + { + "epoch": 0.13669616108130705, + "grad_norm": 0.5948031544685364, + "learning_rate": 3.951534880492279e-05, + "loss": 0.09735107421875, + "step": 981 + }, + { + "epoch": 0.13683550477252143, + "grad_norm": 1.0326142311096191, + "learning_rate": 3.951328388385085e-05, + "loss": 0.12701034545898438, + "step": 982 + }, + { + "epoch": 0.1369748484637358, + "grad_norm": 0.8416455984115601, + "learning_rate": 3.951121462737273e-05, + "loss": 0.10186958312988281, + "step": 983 + }, + { + "epoch": 0.1371141921549502, + "grad_norm": 1.3298273086547852, + "learning_rate": 3.9509141035948156e-05, + "loss": 0.12430953979492188, + "step": 984 + }, + { + "epoch": 0.13725353584616456, + "grad_norm": 0.8721205592155457, + "learning_rate": 3.950706311003785e-05, + "loss": 0.10726356506347656, + "step": 985 + }, + { + "epoch": 0.13739287953737894, + "grad_norm": 1.360089898109436, + "learning_rate": 3.950498085010348e-05, + "loss": 0.127166748046875, + "step": 986 + }, + { + "epoch": 0.13753222322859332, + "grad_norm": 1.599683165550232, + "learning_rate": 3.950289425660767e-05, + "loss": 0.1290912628173828, + "step": 987 + }, + { + "epoch": 0.1376715669198077, + "grad_norm": 0.666685163974762, + "learning_rate": 3.950080333001402e-05, + "loss": 0.11014747619628906, + "step": 988 + }, + { + "epoch": 0.13781091061102207, + "grad_norm": 0.8326734900474548, + "learning_rate": 3.9498708070787076e-05, + "loss": 0.11379241943359375, + "step": 989 + }, + { + "epoch": 0.13795025430223645, + "grad_norm": 0.620697021484375, + "learning_rate": 3.949660847939236e-05, + "loss": 0.10601234436035156, + "step": 990 + }, + { + "epoch": 0.13808959799345086, + "grad_norm": 0.8367962837219238, + "learning_rate": 3.949450455629635e-05, + "loss": 0.11175727844238281, + "step": 991 + }, + { + "epoch": 0.13822894168466524, + "grad_norm": 1.4198100566864014, + "learning_rate": 3.9492396301966504e-05, + "loss": 0.10361671447753906, + "step": 992 + }, + { + "epoch": 0.1383682853758796, + "grad_norm": 1.077608346939087, + "learning_rate": 3.9490283716871214e-05, + "loss": 0.09394645690917969, + "step": 993 + }, + { + "epoch": 0.138507629067094, + "grad_norm": 0.9003399610519409, + "learning_rate": 3.948816680147986e-05, + "loss": 0.10455703735351562, + "step": 994 + }, + { + "epoch": 0.13864697275830837, + "grad_norm": 0.7389146089553833, + "learning_rate": 3.9486045556262756e-05, + "loss": 0.11318206787109375, + "step": 995 + }, + { + "epoch": 0.13878631644952275, + "grad_norm": 0.6551561951637268, + "learning_rate": 3.948391998169121e-05, + "loss": 0.11998558044433594, + "step": 996 + }, + { + "epoch": 0.13892566014073712, + "grad_norm": 0.5806409120559692, + "learning_rate": 3.948179007823746e-05, + "loss": 0.09575080871582031, + "step": 997 + }, + { + "epoch": 0.1390650038319515, + "grad_norm": 0.46352267265319824, + "learning_rate": 3.947965584637474e-05, + "loss": 0.09334945678710938, + "step": 998 + }, + { + "epoch": 0.13920434752316588, + "grad_norm": 0.9006468057632446, + "learning_rate": 3.947751728657722e-05, + "loss": 0.11858558654785156, + "step": 999 + }, + { + "epoch": 0.13934369121438026, + "grad_norm": 0.4505329430103302, + "learning_rate": 3.9475374399320036e-05, + "loss": 0.11407470703125, + "step": 1000 + }, + { + "epoch": 0.13948303490559466, + "grad_norm": 0.5903927683830261, + "learning_rate": 3.947322718507929e-05, + "loss": 0.11135482788085938, + "step": 1001 + }, + { + "epoch": 0.13962237859680904, + "grad_norm": 0.6097264289855957, + "learning_rate": 3.947107564433204e-05, + "loss": 0.10097694396972656, + "step": 1002 + }, + { + "epoch": 0.13976172228802342, + "grad_norm": 1.0720396041870117, + "learning_rate": 3.946891977755632e-05, + "loss": 0.1323375701904297, + "step": 1003 + }, + { + "epoch": 0.1399010659792378, + "grad_norm": 0.5328179597854614, + "learning_rate": 3.946675958523111e-05, + "loss": 0.10199546813964844, + "step": 1004 + }, + { + "epoch": 0.14004040967045217, + "grad_norm": 0.6866522431373596, + "learning_rate": 3.946459506783635e-05, + "loss": 0.08880329132080078, + "step": 1005 + }, + { + "epoch": 0.14017975336166655, + "grad_norm": 1.0815743207931519, + "learning_rate": 3.9462426225852954e-05, + "loss": 0.10049247741699219, + "step": 1006 + }, + { + "epoch": 0.14031909705288093, + "grad_norm": 0.5111573338508606, + "learning_rate": 3.946025305976278e-05, + "loss": 0.09179353713989258, + "step": 1007 + }, + { + "epoch": 0.1404584407440953, + "grad_norm": 1.857206106185913, + "learning_rate": 3.9458075570048666e-05, + "loss": 0.1254100799560547, + "step": 1008 + }, + { + "epoch": 0.14059778443530968, + "grad_norm": 0.8958700299263, + "learning_rate": 3.945589375719439e-05, + "loss": 0.10712051391601562, + "step": 1009 + }, + { + "epoch": 0.14073712812652406, + "grad_norm": 0.6539027690887451, + "learning_rate": 3.9453707621684714e-05, + "loss": 0.10001373291015625, + "step": 1010 + }, + { + "epoch": 0.14087647181773846, + "grad_norm": 0.7009697556495667, + "learning_rate": 3.945151716400534e-05, + "loss": 0.09032058715820312, + "step": 1011 + }, + { + "epoch": 0.14101581550895284, + "grad_norm": 1.0918258428573608, + "learning_rate": 3.944932238464293e-05, + "loss": 0.13779830932617188, + "step": 1012 + }, + { + "epoch": 0.14115515920016722, + "grad_norm": 0.5257221460342407, + "learning_rate": 3.944712328408513e-05, + "loss": 0.10461997985839844, + "step": 1013 + }, + { + "epoch": 0.1412945028913816, + "grad_norm": 0.5867927670478821, + "learning_rate": 3.9444919862820514e-05, + "loss": 0.0917510986328125, + "step": 1014 + }, + { + "epoch": 0.14143384658259597, + "grad_norm": 0.6286444067955017, + "learning_rate": 3.944271212133864e-05, + "loss": 0.1205902099609375, + "step": 1015 + }, + { + "epoch": 0.14157319027381035, + "grad_norm": 0.780185878276825, + "learning_rate": 3.9440500060130025e-05, + "loss": 0.0893402099609375, + "step": 1016 + }, + { + "epoch": 0.14171253396502473, + "grad_norm": 0.7346964478492737, + "learning_rate": 3.943828367968613e-05, + "loss": 0.10934829711914062, + "step": 1017 + }, + { + "epoch": 0.1418518776562391, + "grad_norm": 0.6397187113761902, + "learning_rate": 3.9436062980499376e-05, + "loss": 0.10073471069335938, + "step": 1018 + }, + { + "epoch": 0.14199122134745348, + "grad_norm": 1.0864570140838623, + "learning_rate": 3.943383796306317e-05, + "loss": 0.09846115112304688, + "step": 1019 + }, + { + "epoch": 0.14213056503866786, + "grad_norm": 0.6002786755561829, + "learning_rate": 3.9431608627871845e-05, + "loss": 0.10051918029785156, + "step": 1020 + }, + { + "epoch": 0.14226990872988227, + "grad_norm": 0.8176535367965698, + "learning_rate": 3.9429374975420714e-05, + "loss": 0.10022926330566406, + "step": 1021 + }, + { + "epoch": 0.14240925242109664, + "grad_norm": 0.800677478313446, + "learning_rate": 3.942713700620605e-05, + "loss": 0.10882949829101562, + "step": 1022 + }, + { + "epoch": 0.14254859611231102, + "grad_norm": 1.0367282629013062, + "learning_rate": 3.942489472072507e-05, + "loss": 0.11409378051757812, + "step": 1023 + }, + { + "epoch": 0.1426879398035254, + "grad_norm": 1.8286921977996826, + "learning_rate": 3.942264811947596e-05, + "loss": 0.13402557373046875, + "step": 1024 + }, + { + "epoch": 0.14282728349473978, + "grad_norm": 0.5738794207572937, + "learning_rate": 3.9420397202957854e-05, + "loss": 0.10907745361328125, + "step": 1025 + }, + { + "epoch": 0.14296662718595415, + "grad_norm": 0.4828355610370636, + "learning_rate": 3.941814197167087e-05, + "loss": 0.09228515625, + "step": 1026 + }, + { + "epoch": 0.14310597087716853, + "grad_norm": 0.779949426651001, + "learning_rate": 3.941588242611607e-05, + "loss": 0.08768463134765625, + "step": 1027 + }, + { + "epoch": 0.1432453145683829, + "grad_norm": 0.7158897519111633, + "learning_rate": 3.9413618566795465e-05, + "loss": 0.1085052490234375, + "step": 1028 + }, + { + "epoch": 0.1433846582595973, + "grad_norm": 1.0223382711410522, + "learning_rate": 3.941135039421204e-05, + "loss": 0.1296710968017578, + "step": 1029 + }, + { + "epoch": 0.14352400195081166, + "grad_norm": 0.47489726543426514, + "learning_rate": 3.940907790886971e-05, + "loss": 0.10461997985839844, + "step": 1030 + }, + { + "epoch": 0.14366334564202607, + "grad_norm": 0.7649384140968323, + "learning_rate": 3.94068011112734e-05, + "loss": 0.12026786804199219, + "step": 1031 + }, + { + "epoch": 0.14380268933324045, + "grad_norm": 1.0013147592544556, + "learning_rate": 3.9404520001928945e-05, + "loss": 0.10904312133789062, + "step": 1032 + }, + { + "epoch": 0.14394203302445482, + "grad_norm": 0.5767439603805542, + "learning_rate": 3.940223458134316e-05, + "loss": 0.10051727294921875, + "step": 1033 + }, + { + "epoch": 0.1440813767156692, + "grad_norm": 0.6725744605064392, + "learning_rate": 3.939994485002381e-05, + "loss": 0.1264362335205078, + "step": 1034 + }, + { + "epoch": 0.14422072040688358, + "grad_norm": 1.0172145366668701, + "learning_rate": 3.939765080847962e-05, + "loss": 0.10254478454589844, + "step": 1035 + }, + { + "epoch": 0.14436006409809796, + "grad_norm": 0.6449430584907532, + "learning_rate": 3.9395352457220275e-05, + "loss": 0.11199569702148438, + "step": 1036 + }, + { + "epoch": 0.14449940778931233, + "grad_norm": 1.7011241912841797, + "learning_rate": 3.939304979675642e-05, + "loss": 0.12421798706054688, + "step": 1037 + }, + { + "epoch": 0.1446387514805267, + "grad_norm": 0.7235862612724304, + "learning_rate": 3.939074282759965e-05, + "loss": 0.11300277709960938, + "step": 1038 + }, + { + "epoch": 0.1447780951717411, + "grad_norm": 0.6549309492111206, + "learning_rate": 3.938843155026252e-05, + "loss": 0.10903167724609375, + "step": 1039 + }, + { + "epoch": 0.14491743886295547, + "grad_norm": 1.0817229747772217, + "learning_rate": 3.938611596525855e-05, + "loss": 0.09638214111328125, + "step": 1040 + }, + { + "epoch": 0.14505678255416987, + "grad_norm": 2.0446906089782715, + "learning_rate": 3.9383796073102206e-05, + "loss": 0.11517715454101562, + "step": 1041 + }, + { + "epoch": 0.14519612624538425, + "grad_norm": 0.8069560527801514, + "learning_rate": 3.9381471874308916e-05, + "loss": 0.12018585205078125, + "step": 1042 + }, + { + "epoch": 0.14533546993659863, + "grad_norm": 0.8228487968444824, + "learning_rate": 3.9379143369395054e-05, + "loss": 0.11195755004882812, + "step": 1043 + }, + { + "epoch": 0.145474813627813, + "grad_norm": 1.1628808975219727, + "learning_rate": 3.937681055887797e-05, + "loss": 0.13904190063476562, + "step": 1044 + }, + { + "epoch": 0.14561415731902738, + "grad_norm": 1.0687800645828247, + "learning_rate": 3.937447344327596e-05, + "loss": 0.11993598937988281, + "step": 1045 + }, + { + "epoch": 0.14575350101024176, + "grad_norm": 0.8150053024291992, + "learning_rate": 3.937213202310828e-05, + "loss": 0.11671829223632812, + "step": 1046 + }, + { + "epoch": 0.14589284470145614, + "grad_norm": 1.0894814729690552, + "learning_rate": 3.9369786298895144e-05, + "loss": 0.10169029235839844, + "step": 1047 + }, + { + "epoch": 0.14603218839267051, + "grad_norm": 1.314948558807373, + "learning_rate": 3.93674362711577e-05, + "loss": 0.1286487579345703, + "step": 1048 + }, + { + "epoch": 0.1461715320838849, + "grad_norm": 0.9559410214424133, + "learning_rate": 3.936508194041809e-05, + "loss": 0.1086578369140625, + "step": 1049 + }, + { + "epoch": 0.14631087577509927, + "grad_norm": 0.6632380485534668, + "learning_rate": 3.936272330719938e-05, + "loss": 0.09218025207519531, + "step": 1050 + }, + { + "epoch": 0.14645021946631367, + "grad_norm": 0.9292780756950378, + "learning_rate": 3.936036037202561e-05, + "loss": 0.11011505126953125, + "step": 1051 + }, + { + "epoch": 0.14658956315752805, + "grad_norm": 0.5600764751434326, + "learning_rate": 3.935799313542178e-05, + "loss": 0.10767555236816406, + "step": 1052 + }, + { + "epoch": 0.14672890684874243, + "grad_norm": 1.2531505823135376, + "learning_rate": 3.935562159791381e-05, + "loss": 0.104156494140625, + "step": 1053 + }, + { + "epoch": 0.1468682505399568, + "grad_norm": 0.6613417863845825, + "learning_rate": 3.9353245760028634e-05, + "loss": 0.09179306030273438, + "step": 1054 + }, + { + "epoch": 0.14700759423117118, + "grad_norm": 0.6831132769584656, + "learning_rate": 3.935086562229408e-05, + "loss": 0.11167526245117188, + "step": 1055 + }, + { + "epoch": 0.14714693792238556, + "grad_norm": 1.1813374757766724, + "learning_rate": 3.9348481185238976e-05, + "loss": 0.16306686401367188, + "step": 1056 + }, + { + "epoch": 0.14728628161359994, + "grad_norm": 1.8550021648406982, + "learning_rate": 3.9346092449393084e-05, + "loss": 0.11565780639648438, + "step": 1057 + }, + { + "epoch": 0.14742562530481432, + "grad_norm": 1.9759248495101929, + "learning_rate": 3.934369941528713e-05, + "loss": 0.1028594970703125, + "step": 1058 + }, + { + "epoch": 0.1475649689960287, + "grad_norm": 1.3117984533309937, + "learning_rate": 3.93413020834528e-05, + "loss": 0.10696029663085938, + "step": 1059 + }, + { + "epoch": 0.14770431268724307, + "grad_norm": 1.0312066078186035, + "learning_rate": 3.9338900454422704e-05, + "loss": 0.1008148193359375, + "step": 1060 + }, + { + "epoch": 0.14784365637845748, + "grad_norm": 0.5009776949882507, + "learning_rate": 3.933649452873044e-05, + "loss": 0.08628082275390625, + "step": 1061 + }, + { + "epoch": 0.14798300006967186, + "grad_norm": 1.6246182918548584, + "learning_rate": 3.933408430691055e-05, + "loss": 0.11508846282958984, + "step": 1062 + }, + { + "epoch": 0.14812234376088623, + "grad_norm": 2.139665126800537, + "learning_rate": 3.933166978949855e-05, + "loss": 0.10576248168945312, + "step": 1063 + }, + { + "epoch": 0.1482616874521006, + "grad_norm": 1.9222304821014404, + "learning_rate": 3.932925097703086e-05, + "loss": 0.10812759399414062, + "step": 1064 + }, + { + "epoch": 0.148401031143315, + "grad_norm": 0.8979135751724243, + "learning_rate": 3.932682787004489e-05, + "loss": 0.09952545166015625, + "step": 1065 + }, + { + "epoch": 0.14854037483452937, + "grad_norm": 1.2778499126434326, + "learning_rate": 3.932440046907902e-05, + "loss": 0.12457084655761719, + "step": 1066 + }, + { + "epoch": 0.14867971852574374, + "grad_norm": 1.1739704608917236, + "learning_rate": 3.932196877467254e-05, + "loss": 0.1040802001953125, + "step": 1067 + }, + { + "epoch": 0.14881906221695812, + "grad_norm": 1.604886531829834, + "learning_rate": 3.9319532787365733e-05, + "loss": 0.11844253540039062, + "step": 1068 + }, + { + "epoch": 0.1489584059081725, + "grad_norm": 0.8175526857376099, + "learning_rate": 3.931709250769981e-05, + "loss": 0.11043739318847656, + "step": 1069 + }, + { + "epoch": 0.14909774959938688, + "grad_norm": 0.45772668719291687, + "learning_rate": 3.931464793621695e-05, + "loss": 0.10898590087890625, + "step": 1070 + }, + { + "epoch": 0.14923709329060128, + "grad_norm": 0.7666340470314026, + "learning_rate": 3.931219907346028e-05, + "loss": 0.10194969177246094, + "step": 1071 + }, + { + "epoch": 0.14937643698181566, + "grad_norm": 0.4943307340145111, + "learning_rate": 3.930974591997387e-05, + "loss": 0.09518051147460938, + "step": 1072 + }, + { + "epoch": 0.14951578067303004, + "grad_norm": 0.4414121210575104, + "learning_rate": 3.930728847630278e-05, + "loss": 0.09515762329101562, + "step": 1073 + }, + { + "epoch": 0.1496551243642444, + "grad_norm": 0.8356754779815674, + "learning_rate": 3.930482674299297e-05, + "loss": 0.12707901000976562, + "step": 1074 + }, + { + "epoch": 0.1497944680554588, + "grad_norm": 0.528853178024292, + "learning_rate": 3.930236072059141e-05, + "loss": 0.08618927001953125, + "step": 1075 + }, + { + "epoch": 0.14993381174667317, + "grad_norm": 0.9017003774642944, + "learning_rate": 3.929989040964596e-05, + "loss": 0.0966796875, + "step": 1076 + }, + { + "epoch": 0.15007315543788755, + "grad_norm": 1.4718043804168701, + "learning_rate": 3.92974158107055e-05, + "loss": 0.1151580810546875, + "step": 1077 + }, + { + "epoch": 0.15021249912910192, + "grad_norm": 1.0914039611816406, + "learning_rate": 3.929493692431981e-05, + "loss": 0.09389877319335938, + "step": 1078 + }, + { + "epoch": 0.1503518428203163, + "grad_norm": 0.6040590405464172, + "learning_rate": 3.929245375103965e-05, + "loss": 0.11161231994628906, + "step": 1079 + }, + { + "epoch": 0.15049118651153068, + "grad_norm": 2.2113852500915527, + "learning_rate": 3.928996629141671e-05, + "loss": 0.14171981811523438, + "step": 1080 + }, + { + "epoch": 0.15063053020274508, + "grad_norm": 1.722451090812683, + "learning_rate": 3.928747454600367e-05, + "loss": 0.11282920837402344, + "step": 1081 + }, + { + "epoch": 0.15076987389395946, + "grad_norm": 1.4559450149536133, + "learning_rate": 3.928497851535412e-05, + "loss": 0.10285758972167969, + "step": 1082 + }, + { + "epoch": 0.15090921758517384, + "grad_norm": 0.8050239086151123, + "learning_rate": 3.9282478200022624e-05, + "loss": 0.13266563415527344, + "step": 1083 + }, + { + "epoch": 0.15104856127638822, + "grad_norm": 1.0186659097671509, + "learning_rate": 3.9279973600564706e-05, + "loss": 0.10310173034667969, + "step": 1084 + }, + { + "epoch": 0.1511879049676026, + "grad_norm": 1.653612494468689, + "learning_rate": 3.9277464717536815e-05, + "loss": 0.12218856811523438, + "step": 1085 + }, + { + "epoch": 0.15132724865881697, + "grad_norm": 1.365687608718872, + "learning_rate": 3.927495155149639e-05, + "loss": 0.10552787780761719, + "step": 1086 + }, + { + "epoch": 0.15146659235003135, + "grad_norm": 1.0245559215545654, + "learning_rate": 3.927243410300177e-05, + "loss": 0.10519027709960938, + "step": 1087 + }, + { + "epoch": 0.15160593604124573, + "grad_norm": 0.6870056986808777, + "learning_rate": 3.9269912372612295e-05, + "loss": 0.10170555114746094, + "step": 1088 + }, + { + "epoch": 0.1517452797324601, + "grad_norm": 1.3269811868667603, + "learning_rate": 3.926738636088823e-05, + "loss": 0.10865592956542969, + "step": 1089 + }, + { + "epoch": 0.15188462342367448, + "grad_norm": 1.4549938440322876, + "learning_rate": 3.92648560683908e-05, + "loss": 0.09524726867675781, + "step": 1090 + }, + { + "epoch": 0.15202396711488889, + "grad_norm": 1.1202049255371094, + "learning_rate": 3.926232149568217e-05, + "loss": 0.10443496704101562, + "step": 1091 + }, + { + "epoch": 0.15216331080610326, + "grad_norm": 0.8040599226951599, + "learning_rate": 3.925978264332548e-05, + "loss": 0.10418319702148438, + "step": 1092 + }, + { + "epoch": 0.15230265449731764, + "grad_norm": 0.9009823799133301, + "learning_rate": 3.925723951188478e-05, + "loss": 0.10699462890625, + "step": 1093 + }, + { + "epoch": 0.15244199818853202, + "grad_norm": 1.7892396450042725, + "learning_rate": 3.925469210192512e-05, + "loss": 0.10346221923828125, + "step": 1094 + }, + { + "epoch": 0.1525813418797464, + "grad_norm": 1.6210839748382568, + "learning_rate": 3.9252140414012465e-05, + "loss": 0.10556983947753906, + "step": 1095 + }, + { + "epoch": 0.15272068557096077, + "grad_norm": 0.6625831127166748, + "learning_rate": 3.9249584448713746e-05, + "loss": 0.12241363525390625, + "step": 1096 + }, + { + "epoch": 0.15286002926217515, + "grad_norm": 1.2102683782577515, + "learning_rate": 3.9247024206596836e-05, + "loss": 0.12251663208007812, + "step": 1097 + }, + { + "epoch": 0.15299937295338953, + "grad_norm": 0.7946125864982605, + "learning_rate": 3.924445968823057e-05, + "loss": 0.10699653625488281, + "step": 1098 + }, + { + "epoch": 0.1531387166446039, + "grad_norm": 1.1516603231430054, + "learning_rate": 3.924189089418471e-05, + "loss": 0.1122894287109375, + "step": 1099 + }, + { + "epoch": 0.15327806033581828, + "grad_norm": 0.849206805229187, + "learning_rate": 3.923931782503e-05, + "loss": 0.09733200073242188, + "step": 1100 + }, + { + "epoch": 0.1534174040270327, + "grad_norm": 1.0158400535583496, + "learning_rate": 3.923674048133811e-05, + "loss": 0.1146697998046875, + "step": 1101 + }, + { + "epoch": 0.15355674771824707, + "grad_norm": 0.8866027593612671, + "learning_rate": 3.923415886368166e-05, + "loss": 0.13073158264160156, + "step": 1102 + }, + { + "epoch": 0.15369609140946144, + "grad_norm": 0.8355476260185242, + "learning_rate": 3.923157297263425e-05, + "loss": 0.09718799591064453, + "step": 1103 + }, + { + "epoch": 0.15383543510067582, + "grad_norm": 1.220397710800171, + "learning_rate": 3.922898280877037e-05, + "loss": 0.11266708374023438, + "step": 1104 + }, + { + "epoch": 0.1539747787918902, + "grad_norm": 0.5931010246276855, + "learning_rate": 3.922638837266552e-05, + "loss": 0.09435844421386719, + "step": 1105 + }, + { + "epoch": 0.15411412248310458, + "grad_norm": 0.6064891219139099, + "learning_rate": 3.9223789664896136e-05, + "loss": 0.11042404174804688, + "step": 1106 + }, + { + "epoch": 0.15425346617431895, + "grad_norm": 0.5596071481704712, + "learning_rate": 3.922118668603956e-05, + "loss": 0.10827445983886719, + "step": 1107 + }, + { + "epoch": 0.15439280986553333, + "grad_norm": 1.2594919204711914, + "learning_rate": 3.9218579436674134e-05, + "loss": 0.08607101440429688, + "step": 1108 + }, + { + "epoch": 0.1545321535567477, + "grad_norm": 1.8663822412490845, + "learning_rate": 3.921596791737912e-05, + "loss": 0.10872077941894531, + "step": 1109 + }, + { + "epoch": 0.1546714972479621, + "grad_norm": 1.4589203596115112, + "learning_rate": 3.9213352128734746e-05, + "loss": 0.08480262756347656, + "step": 1110 + }, + { + "epoch": 0.1548108409391765, + "grad_norm": 0.6432622075080872, + "learning_rate": 3.9210732071322175e-05, + "loss": 0.10136604309082031, + "step": 1111 + }, + { + "epoch": 0.15495018463039087, + "grad_norm": 0.48003774881362915, + "learning_rate": 3.920810774572353e-05, + "loss": 0.09714984893798828, + "step": 1112 + }, + { + "epoch": 0.15508952832160525, + "grad_norm": 0.7846549153327942, + "learning_rate": 3.9205479152521874e-05, + "loss": 0.10246467590332031, + "step": 1113 + }, + { + "epoch": 0.15522887201281962, + "grad_norm": 0.6433565020561218, + "learning_rate": 3.920284629230121e-05, + "loss": 0.10528755187988281, + "step": 1114 + }, + { + "epoch": 0.155368215704034, + "grad_norm": 1.3627320528030396, + "learning_rate": 3.920020916564652e-05, + "loss": 0.12327003479003906, + "step": 1115 + }, + { + "epoch": 0.15550755939524838, + "grad_norm": 0.9532634019851685, + "learning_rate": 3.919756777314369e-05, + "loss": 0.10749244689941406, + "step": 1116 + }, + { + "epoch": 0.15564690308646276, + "grad_norm": 0.7035402655601501, + "learning_rate": 3.9194922115379596e-05, + "loss": 0.11034011840820312, + "step": 1117 + }, + { + "epoch": 0.15578624677767713, + "grad_norm": 0.768447756767273, + "learning_rate": 3.919227219294204e-05, + "loss": 0.10797119140625, + "step": 1118 + }, + { + "epoch": 0.1559255904688915, + "grad_norm": 0.7095277309417725, + "learning_rate": 3.918961800641976e-05, + "loss": 0.11505126953125, + "step": 1119 + }, + { + "epoch": 0.1560649341601059, + "grad_norm": 0.4773404598236084, + "learning_rate": 3.918695955640247e-05, + "loss": 0.09186363220214844, + "step": 1120 + }, + { + "epoch": 0.1562042778513203, + "grad_norm": 0.6366556882858276, + "learning_rate": 3.9184296843480816e-05, + "loss": 0.12832260131835938, + "step": 1121 + }, + { + "epoch": 0.15634362154253467, + "grad_norm": 1.5175718069076538, + "learning_rate": 3.918162986824638e-05, + "loss": 0.11987686157226562, + "step": 1122 + }, + { + "epoch": 0.15648296523374905, + "grad_norm": 0.6778616309165955, + "learning_rate": 3.9178958631291715e-05, + "loss": 0.10685920715332031, + "step": 1123 + }, + { + "epoch": 0.15662230892496343, + "grad_norm": 0.6754767894744873, + "learning_rate": 3.91762831332103e-05, + "loss": 0.09728050231933594, + "step": 1124 + }, + { + "epoch": 0.1567616526161778, + "grad_norm": 0.8754009008407593, + "learning_rate": 3.917360337459658e-05, + "loss": 0.09814834594726562, + "step": 1125 + }, + { + "epoch": 0.15690099630739218, + "grad_norm": 0.7166948318481445, + "learning_rate": 3.9170919356045935e-05, + "loss": 0.11912155151367188, + "step": 1126 + }, + { + "epoch": 0.15704033999860656, + "grad_norm": 0.9041458368301392, + "learning_rate": 3.916823107815469e-05, + "loss": 0.11931228637695312, + "step": 1127 + }, + { + "epoch": 0.15717968368982094, + "grad_norm": 1.0233509540557861, + "learning_rate": 3.916553854152011e-05, + "loss": 0.10603713989257812, + "step": 1128 + }, + { + "epoch": 0.15731902738103531, + "grad_norm": 1.2535619735717773, + "learning_rate": 3.916284174674042e-05, + "loss": 0.12276840209960938, + "step": 1129 + }, + { + "epoch": 0.1574583710722497, + "grad_norm": 0.7808024287223816, + "learning_rate": 3.9160140694414796e-05, + "loss": 0.10838890075683594, + "step": 1130 + }, + { + "epoch": 0.1575977147634641, + "grad_norm": 1.0455621480941772, + "learning_rate": 3.915743538514334e-05, + "loss": 0.11814498901367188, + "step": 1131 + }, + { + "epoch": 0.15773705845467847, + "grad_norm": 0.8244376182556152, + "learning_rate": 3.915472581952711e-05, + "loss": 0.11841583251953125, + "step": 1132 + }, + { + "epoch": 0.15787640214589285, + "grad_norm": 1.372155785560608, + "learning_rate": 3.915201199816812e-05, + "loss": 0.11299514770507812, + "step": 1133 + }, + { + "epoch": 0.15801574583710723, + "grad_norm": 0.6422916650772095, + "learning_rate": 3.914929392166931e-05, + "loss": 0.10070228576660156, + "step": 1134 + }, + { + "epoch": 0.1581550895283216, + "grad_norm": 1.6666007041931152, + "learning_rate": 3.914657159063458e-05, + "loss": 0.12022972106933594, + "step": 1135 + }, + { + "epoch": 0.15829443321953598, + "grad_norm": 0.6519126892089844, + "learning_rate": 3.914384500566876e-05, + "loss": 0.10200309753417969, + "step": 1136 + }, + { + "epoch": 0.15843377691075036, + "grad_norm": 1.2189565896987915, + "learning_rate": 3.9141114167377636e-05, + "loss": 0.13907623291015625, + "step": 1137 + }, + { + "epoch": 0.15857312060196474, + "grad_norm": 0.6715749502182007, + "learning_rate": 3.9138379076367956e-05, + "loss": 0.10426139831542969, + "step": 1138 + }, + { + "epoch": 0.15871246429317912, + "grad_norm": 0.7214500308036804, + "learning_rate": 3.913563973324738e-05, + "loss": 0.09730911254882812, + "step": 1139 + }, + { + "epoch": 0.1588518079843935, + "grad_norm": 0.9711089134216309, + "learning_rate": 3.913289613862452e-05, + "loss": 0.12799072265625, + "step": 1140 + }, + { + "epoch": 0.1589911516756079, + "grad_norm": 0.7783446907997131, + "learning_rate": 3.913014829310895e-05, + "loss": 0.12524986267089844, + "step": 1141 + }, + { + "epoch": 0.15913049536682228, + "grad_norm": 0.6626863479614258, + "learning_rate": 3.9127396197311185e-05, + "loss": 0.1197662353515625, + "step": 1142 + }, + { + "epoch": 0.15926983905803666, + "grad_norm": 0.9344526529312134, + "learning_rate": 3.9124639851842666e-05, + "loss": 0.10855674743652344, + "step": 1143 + }, + { + "epoch": 0.15940918274925103, + "grad_norm": 0.7068229913711548, + "learning_rate": 3.91218792573158e-05, + "loss": 0.12094497680664062, + "step": 1144 + }, + { + "epoch": 0.1595485264404654, + "grad_norm": 0.6796913743019104, + "learning_rate": 3.911911441434392e-05, + "loss": 0.12254714965820312, + "step": 1145 + }, + { + "epoch": 0.1596878701316798, + "grad_norm": 1.3940647840499878, + "learning_rate": 3.911634532354131e-05, + "loss": 0.125213623046875, + "step": 1146 + }, + { + "epoch": 0.15982721382289417, + "grad_norm": 0.5595503449440002, + "learning_rate": 3.911357198552321e-05, + "loss": 0.11673736572265625, + "step": 1147 + }, + { + "epoch": 0.15996655751410854, + "grad_norm": 0.46021246910095215, + "learning_rate": 3.9110794400905785e-05, + "loss": 0.08983230590820312, + "step": 1148 + }, + { + "epoch": 0.16010590120532292, + "grad_norm": 0.8177854418754578, + "learning_rate": 3.9108012570306143e-05, + "loss": 0.10196113586425781, + "step": 1149 + }, + { + "epoch": 0.1602452448965373, + "grad_norm": 1.7166720628738403, + "learning_rate": 3.910522649434236e-05, + "loss": 0.13520050048828125, + "step": 1150 + }, + { + "epoch": 0.1603845885877517, + "grad_norm": 1.1033693552017212, + "learning_rate": 3.9102436173633425e-05, + "loss": 0.10908126831054688, + "step": 1151 + }, + { + "epoch": 0.16052393227896608, + "grad_norm": 0.5163620710372925, + "learning_rate": 3.9099641608799286e-05, + "loss": 0.09503173828125, + "step": 1152 + }, + { + "epoch": 0.16066327597018046, + "grad_norm": 2.1093289852142334, + "learning_rate": 3.9096842800460836e-05, + "loss": 0.13034629821777344, + "step": 1153 + }, + { + "epoch": 0.16080261966139484, + "grad_norm": 1.6433382034301758, + "learning_rate": 3.909403974923991e-05, + "loss": 0.12904739379882812, + "step": 1154 + }, + { + "epoch": 0.1609419633526092, + "grad_norm": 1.3105018138885498, + "learning_rate": 3.9091232455759274e-05, + "loss": 0.11600589752197266, + "step": 1155 + }, + { + "epoch": 0.1610813070438236, + "grad_norm": 0.5901782512664795, + "learning_rate": 3.908842092064264e-05, + "loss": 0.09105682373046875, + "step": 1156 + }, + { + "epoch": 0.16122065073503797, + "grad_norm": 0.750860869884491, + "learning_rate": 3.9085605144514674e-05, + "loss": 0.11053466796875, + "step": 1157 + }, + { + "epoch": 0.16135999442625235, + "grad_norm": 0.6785320043563843, + "learning_rate": 3.908278512800098e-05, + "loss": 0.11784553527832031, + "step": 1158 + }, + { + "epoch": 0.16149933811746672, + "grad_norm": 0.7361307144165039, + "learning_rate": 3.9079960871728094e-05, + "loss": 0.10038375854492188, + "step": 1159 + }, + { + "epoch": 0.1616386818086811, + "grad_norm": 1.593086838722229, + "learning_rate": 3.907713237632351e-05, + "loss": 0.13287353515625, + "step": 1160 + }, + { + "epoch": 0.1617780254998955, + "grad_norm": 1.1910334825515747, + "learning_rate": 3.907429964241565e-05, + "loss": 0.12178611755371094, + "step": 1161 + }, + { + "epoch": 0.16191736919110988, + "grad_norm": 0.4607972502708435, + "learning_rate": 3.907146267063389e-05, + "loss": 0.089508056640625, + "step": 1162 + }, + { + "epoch": 0.16205671288232426, + "grad_norm": 0.6554842591285706, + "learning_rate": 3.906862146160852e-05, + "loss": 0.11532211303710938, + "step": 1163 + }, + { + "epoch": 0.16219605657353864, + "grad_norm": 0.5282660126686096, + "learning_rate": 3.9065776015970815e-05, + "loss": 0.09833717346191406, + "step": 1164 + }, + { + "epoch": 0.16233540026475302, + "grad_norm": 2.1841564178466797, + "learning_rate": 3.906292633435295e-05, + "loss": 0.12623023986816406, + "step": 1165 + }, + { + "epoch": 0.1624747439559674, + "grad_norm": 1.46465265750885, + "learning_rate": 3.906007241738807e-05, + "loss": 0.1271381378173828, + "step": 1166 + }, + { + "epoch": 0.16261408764718177, + "grad_norm": 2.2730979919433594, + "learning_rate": 3.9057214265710245e-05, + "loss": 0.1295623779296875, + "step": 1167 + }, + { + "epoch": 0.16275343133839615, + "grad_norm": 0.567375898361206, + "learning_rate": 3.9054351879954505e-05, + "loss": 0.10804176330566406, + "step": 1168 + }, + { + "epoch": 0.16289277502961053, + "grad_norm": 0.7428980469703674, + "learning_rate": 3.905148526075679e-05, + "loss": 0.13909530639648438, + "step": 1169 + }, + { + "epoch": 0.1630321187208249, + "grad_norm": 1.1364778280258179, + "learning_rate": 3.9048614408754e-05, + "loss": 0.12177085876464844, + "step": 1170 + }, + { + "epoch": 0.1631714624120393, + "grad_norm": 1.8678733110427856, + "learning_rate": 3.904573932458398e-05, + "loss": 0.11214256286621094, + "step": 1171 + }, + { + "epoch": 0.1633108061032537, + "grad_norm": 0.965514600276947, + "learning_rate": 3.90428600088855e-05, + "loss": 0.10311126708984375, + "step": 1172 + }, + { + "epoch": 0.16345014979446806, + "grad_norm": 0.8153771758079529, + "learning_rate": 3.9039976462298284e-05, + "loss": 0.09720611572265625, + "step": 1173 + }, + { + "epoch": 0.16358949348568244, + "grad_norm": 0.7599051594734192, + "learning_rate": 3.9037088685462985e-05, + "loss": 0.10807418823242188, + "step": 1174 + }, + { + "epoch": 0.16372883717689682, + "grad_norm": 0.9416832327842712, + "learning_rate": 3.9034196679021206e-05, + "loss": 0.1026458740234375, + "step": 1175 + }, + { + "epoch": 0.1638681808681112, + "grad_norm": 0.6221461296081543, + "learning_rate": 3.903130044361549e-05, + "loss": 0.08760452270507812, + "step": 1176 + }, + { + "epoch": 0.16400752455932557, + "grad_norm": 1.7404636144638062, + "learning_rate": 3.902839997988929e-05, + "loss": 0.13453292846679688, + "step": 1177 + }, + { + "epoch": 0.16414686825053995, + "grad_norm": 1.419524908065796, + "learning_rate": 3.902549528848705e-05, + "loss": 0.114105224609375, + "step": 1178 + }, + { + "epoch": 0.16428621194175433, + "grad_norm": 0.5974566340446472, + "learning_rate": 3.902258637005412e-05, + "loss": 0.12697219848632812, + "step": 1179 + }, + { + "epoch": 0.1644255556329687, + "grad_norm": 0.4588046967983246, + "learning_rate": 3.901967322523679e-05, + "loss": 0.09241485595703125, + "step": 1180 + }, + { + "epoch": 0.16456489932418308, + "grad_norm": 0.5260958075523376, + "learning_rate": 3.901675585468229e-05, + "loss": 0.092498779296875, + "step": 1181 + }, + { + "epoch": 0.1647042430153975, + "grad_norm": 1.322057843208313, + "learning_rate": 3.9013834259038805e-05, + "loss": 0.12258148193359375, + "step": 1182 + }, + { + "epoch": 0.16484358670661187, + "grad_norm": 0.8765625953674316, + "learning_rate": 3.9010908438955436e-05, + "loss": 0.11204338073730469, + "step": 1183 + }, + { + "epoch": 0.16498293039782624, + "grad_norm": 0.5927107334136963, + "learning_rate": 3.900797839508225e-05, + "loss": 0.1091156005859375, + "step": 1184 + }, + { + "epoch": 0.16512227408904062, + "grad_norm": 0.647461473941803, + "learning_rate": 3.900504412807021e-05, + "loss": 0.11450958251953125, + "step": 1185 + }, + { + "epoch": 0.165261617780255, + "grad_norm": 0.8189169764518738, + "learning_rate": 3.900210563857127e-05, + "loss": 0.10146331787109375, + "step": 1186 + }, + { + "epoch": 0.16540096147146938, + "grad_norm": 0.743200421333313, + "learning_rate": 3.8999162927238274e-05, + "loss": 0.09810066223144531, + "step": 1187 + }, + { + "epoch": 0.16554030516268375, + "grad_norm": 0.5914251804351807, + "learning_rate": 3.899621599472504e-05, + "loss": 0.11703681945800781, + "step": 1188 + }, + { + "epoch": 0.16567964885389813, + "grad_norm": 0.4287813603878021, + "learning_rate": 3.899326484168629e-05, + "loss": 0.10734367370605469, + "step": 1189 + }, + { + "epoch": 0.1658189925451125, + "grad_norm": 0.43371325731277466, + "learning_rate": 3.899030946877773e-05, + "loss": 0.07742500305175781, + "step": 1190 + }, + { + "epoch": 0.1659583362363269, + "grad_norm": 0.42002782225608826, + "learning_rate": 3.898734987665596e-05, + "loss": 0.09702301025390625, + "step": 1191 + }, + { + "epoch": 0.1660976799275413, + "grad_norm": 0.4676485061645508, + "learning_rate": 3.898438606597853e-05, + "loss": 0.09243392944335938, + "step": 1192 + }, + { + "epoch": 0.16623702361875567, + "grad_norm": 0.5574652552604675, + "learning_rate": 3.898141803740393e-05, + "loss": 0.09253692626953125, + "step": 1193 + }, + { + "epoch": 0.16637636730997005, + "grad_norm": 0.685685932636261, + "learning_rate": 3.897844579159161e-05, + "loss": 0.09691715240478516, + "step": 1194 + }, + { + "epoch": 0.16651571100118442, + "grad_norm": 1.6578930616378784, + "learning_rate": 3.897546932920191e-05, + "loss": 0.10687637329101562, + "step": 1195 + }, + { + "epoch": 0.1666550546923988, + "grad_norm": 0.6904575824737549, + "learning_rate": 3.897248865089615e-05, + "loss": 0.10463333129882812, + "step": 1196 + }, + { + "epoch": 0.16679439838361318, + "grad_norm": 0.7422584295272827, + "learning_rate": 3.8969503757336564e-05, + "loss": 0.10846710205078125, + "step": 1197 + }, + { + "epoch": 0.16693374207482756, + "grad_norm": 0.5259013772010803, + "learning_rate": 3.896651464918632e-05, + "loss": 0.09645843505859375, + "step": 1198 + }, + { + "epoch": 0.16707308576604193, + "grad_norm": 0.7598360776901245, + "learning_rate": 3.896352132710953e-05, + "loss": 0.10099029541015625, + "step": 1199 + }, + { + "epoch": 0.1672124294572563, + "grad_norm": 1.5906440019607544, + "learning_rate": 3.896052379177125e-05, + "loss": 0.11890792846679688, + "step": 1200 + }, + { + "epoch": 0.1673517731484707, + "grad_norm": 0.9067375659942627, + "learning_rate": 3.895752204383746e-05, + "loss": 0.08875656127929688, + "step": 1201 + }, + { + "epoch": 0.1674911168396851, + "grad_norm": 0.6946342587471008, + "learning_rate": 3.8954516083975075e-05, + "loss": 0.08556556701660156, + "step": 1202 + }, + { + "epoch": 0.16763046053089947, + "grad_norm": 1.1142827272415161, + "learning_rate": 3.8951505912851956e-05, + "loss": 0.11994552612304688, + "step": 1203 + }, + { + "epoch": 0.16776980422211385, + "grad_norm": 0.6152790784835815, + "learning_rate": 3.89484915311369e-05, + "loss": 0.10246849060058594, + "step": 1204 + }, + { + "epoch": 0.16790914791332823, + "grad_norm": 0.4896419048309326, + "learning_rate": 3.8945472939499616e-05, + "loss": 0.10712814331054688, + "step": 1205 + }, + { + "epoch": 0.1680484916045426, + "grad_norm": 0.5622251629829407, + "learning_rate": 3.894245013861079e-05, + "loss": 0.0962677001953125, + "step": 1206 + }, + { + "epoch": 0.16818783529575698, + "grad_norm": 0.42144399881362915, + "learning_rate": 3.8939423129141996e-05, + "loss": 0.08344650268554688, + "step": 1207 + }, + { + "epoch": 0.16832717898697136, + "grad_norm": 0.9202308058738708, + "learning_rate": 3.8936391911765784e-05, + "loss": 0.10640287399291992, + "step": 1208 + }, + { + "epoch": 0.16846652267818574, + "grad_norm": 0.757303774356842, + "learning_rate": 3.893335648715561e-05, + "loss": 0.12560081481933594, + "step": 1209 + }, + { + "epoch": 0.16860586636940011, + "grad_norm": 0.7360061407089233, + "learning_rate": 3.893031685598588e-05, + "loss": 0.1181488037109375, + "step": 1210 + }, + { + "epoch": 0.1687452100606145, + "grad_norm": 0.5497124195098877, + "learning_rate": 3.8927273018931934e-05, + "loss": 0.09655189514160156, + "step": 1211 + }, + { + "epoch": 0.1688845537518289, + "grad_norm": 0.6941784620285034, + "learning_rate": 3.892422497667004e-05, + "loss": 0.08451461791992188, + "step": 1212 + }, + { + "epoch": 0.16902389744304328, + "grad_norm": 1.2585688829421997, + "learning_rate": 3.89211727298774e-05, + "loss": 0.10241508483886719, + "step": 1213 + }, + { + "epoch": 0.16916324113425765, + "grad_norm": 0.6854430437088013, + "learning_rate": 3.891811627923216e-05, + "loss": 0.09734344482421875, + "step": 1214 + }, + { + "epoch": 0.16930258482547203, + "grad_norm": 1.1250931024551392, + "learning_rate": 3.89150556254134e-05, + "loss": 0.12272071838378906, + "step": 1215 + }, + { + "epoch": 0.1694419285166864, + "grad_norm": 0.5334823131561279, + "learning_rate": 3.89119907691011e-05, + "loss": 0.10058784484863281, + "step": 1216 + }, + { + "epoch": 0.16958127220790079, + "grad_norm": 0.6786034107208252, + "learning_rate": 3.8908921710976234e-05, + "loss": 0.10854911804199219, + "step": 1217 + }, + { + "epoch": 0.16972061589911516, + "grad_norm": 1.7209757566452026, + "learning_rate": 3.890584845172066e-05, + "loss": 0.12681961059570312, + "step": 1218 + }, + { + "epoch": 0.16985995959032954, + "grad_norm": 1.4221563339233398, + "learning_rate": 3.890277099201718e-05, + "loss": 0.09784889221191406, + "step": 1219 + }, + { + "epoch": 0.16999930328154392, + "grad_norm": 1.6712332963943481, + "learning_rate": 3.889968933254954e-05, + "loss": 0.114349365234375, + "step": 1220 + }, + { + "epoch": 0.1701386469727583, + "grad_norm": 0.545382022857666, + "learning_rate": 3.889660347400243e-05, + "loss": 0.10887718200683594, + "step": 1221 + }, + { + "epoch": 0.1702779906639727, + "grad_norm": 0.8008676767349243, + "learning_rate": 3.889351341706144e-05, + "loss": 0.11457252502441406, + "step": 1222 + }, + { + "epoch": 0.17041733435518708, + "grad_norm": 0.8593103289604187, + "learning_rate": 3.8890419162413114e-05, + "loss": 0.09812784194946289, + "step": 1223 + }, + { + "epoch": 0.17055667804640146, + "grad_norm": 1.119849681854248, + "learning_rate": 3.8887320710744923e-05, + "loss": 0.09301376342773438, + "step": 1224 + }, + { + "epoch": 0.17069602173761583, + "grad_norm": 0.6343709230422974, + "learning_rate": 3.888421806274528e-05, + "loss": 0.08209991455078125, + "step": 1225 + }, + { + "epoch": 0.1708353654288302, + "grad_norm": 0.5226134657859802, + "learning_rate": 3.8881111219103516e-05, + "loss": 0.10182380676269531, + "step": 1226 + }, + { + "epoch": 0.1709747091200446, + "grad_norm": 0.6347759962081909, + "learning_rate": 3.88780001805099e-05, + "loss": 0.1033477783203125, + "step": 1227 + }, + { + "epoch": 0.17111405281125897, + "grad_norm": 0.8711373805999756, + "learning_rate": 3.8874884947655636e-05, + "loss": 0.10419845581054688, + "step": 1228 + }, + { + "epoch": 0.17125339650247334, + "grad_norm": 0.5903793573379517, + "learning_rate": 3.8871765521232865e-05, + "loss": 0.12643051147460938, + "step": 1229 + }, + { + "epoch": 0.17139274019368772, + "grad_norm": 0.6512933373451233, + "learning_rate": 3.8868641901934636e-05, + "loss": 0.10733985900878906, + "step": 1230 + }, + { + "epoch": 0.1715320838849021, + "grad_norm": 0.6851748824119568, + "learning_rate": 3.886551409045496e-05, + "loss": 0.11083984375, + "step": 1231 + }, + { + "epoch": 0.1716714275761165, + "grad_norm": 0.4329909086227417, + "learning_rate": 3.886238208748876e-05, + "loss": 0.09410476684570312, + "step": 1232 + }, + { + "epoch": 0.17181077126733088, + "grad_norm": 0.5855550169944763, + "learning_rate": 3.885924589373189e-05, + "loss": 0.08937263488769531, + "step": 1233 + }, + { + "epoch": 0.17195011495854526, + "grad_norm": 0.6784842014312744, + "learning_rate": 3.885610550988115e-05, + "loss": 0.09896469116210938, + "step": 1234 + }, + { + "epoch": 0.17208945864975964, + "grad_norm": 0.8468036651611328, + "learning_rate": 3.885296093663426e-05, + "loss": 0.10102653503417969, + "step": 1235 + }, + { + "epoch": 0.172228802340974, + "grad_norm": 0.47271057963371277, + "learning_rate": 3.884981217468987e-05, + "loss": 0.09244728088378906, + "step": 1236 + }, + { + "epoch": 0.1723681460321884, + "grad_norm": 0.703298032283783, + "learning_rate": 3.884665922474756e-05, + "loss": 0.09319114685058594, + "step": 1237 + }, + { + "epoch": 0.17250748972340277, + "grad_norm": 0.7133054733276367, + "learning_rate": 3.884350208750784e-05, + "loss": 0.104278564453125, + "step": 1238 + }, + { + "epoch": 0.17264683341461715, + "grad_norm": 0.9465746879577637, + "learning_rate": 3.884034076367218e-05, + "loss": 0.09842491149902344, + "step": 1239 + }, + { + "epoch": 0.17278617710583152, + "grad_norm": 0.82210773229599, + "learning_rate": 3.883717525394292e-05, + "loss": 0.12450981140136719, + "step": 1240 + }, + { + "epoch": 0.1729255207970459, + "grad_norm": 0.5873798727989197, + "learning_rate": 3.883400555902338e-05, + "loss": 0.10650825500488281, + "step": 1241 + }, + { + "epoch": 0.1730648644882603, + "grad_norm": 0.4849681854248047, + "learning_rate": 3.88308316796178e-05, + "loss": 0.08860301971435547, + "step": 1242 + }, + { + "epoch": 0.17320420817947468, + "grad_norm": 1.1349868774414062, + "learning_rate": 3.882765361643133e-05, + "loss": 0.12273025512695312, + "step": 1243 + }, + { + "epoch": 0.17334355187068906, + "grad_norm": 0.7685161232948303, + "learning_rate": 3.882447137017007e-05, + "loss": 0.10256290435791016, + "step": 1244 + }, + { + "epoch": 0.17348289556190344, + "grad_norm": 1.496555209159851, + "learning_rate": 3.882128494154104e-05, + "loss": 0.10572528839111328, + "step": 1245 + }, + { + "epoch": 0.17362223925311782, + "grad_norm": 0.6639019250869751, + "learning_rate": 3.8818094331252194e-05, + "loss": 0.10349273681640625, + "step": 1246 + }, + { + "epoch": 0.1737615829443322, + "grad_norm": 0.9107252359390259, + "learning_rate": 3.881489954001241e-05, + "loss": 0.11998844146728516, + "step": 1247 + }, + { + "epoch": 0.17390092663554657, + "grad_norm": 0.6336011290550232, + "learning_rate": 3.88117005685315e-05, + "loss": 0.09730911254882812, + "step": 1248 + }, + { + "epoch": 0.17404027032676095, + "grad_norm": 1.2604351043701172, + "learning_rate": 3.88084974175202e-05, + "loss": 0.11320114135742188, + "step": 1249 + }, + { + "epoch": 0.17417961401797533, + "grad_norm": 0.5151472091674805, + "learning_rate": 3.8805290087690196e-05, + "loss": 0.098052978515625, + "step": 1250 + }, + { + "epoch": 0.1743189577091897, + "grad_norm": 0.9305667281150818, + "learning_rate": 3.880207857975405e-05, + "loss": 0.14343643188476562, + "step": 1251 + }, + { + "epoch": 0.1744583014004041, + "grad_norm": 0.514909565448761, + "learning_rate": 3.879886289442531e-05, + "loss": 0.11285018920898438, + "step": 1252 + }, + { + "epoch": 0.1745976450916185, + "grad_norm": 0.46262866258621216, + "learning_rate": 3.879564303241841e-05, + "loss": 0.09924697875976562, + "step": 1253 + }, + { + "epoch": 0.17473698878283286, + "grad_norm": 1.0329948663711548, + "learning_rate": 3.8792418994448746e-05, + "loss": 0.1333293914794922, + "step": 1254 + }, + { + "epoch": 0.17487633247404724, + "grad_norm": 1.0902217626571655, + "learning_rate": 3.8789190781232626e-05, + "loss": 0.126190185546875, + "step": 1255 + }, + { + "epoch": 0.17501567616526162, + "grad_norm": 0.8659790754318237, + "learning_rate": 3.878595839348727e-05, + "loss": 0.07441520690917969, + "step": 1256 + }, + { + "epoch": 0.175155019856476, + "grad_norm": 1.613726019859314, + "learning_rate": 3.878272183193085e-05, + "loss": 0.11447811126708984, + "step": 1257 + }, + { + "epoch": 0.17529436354769037, + "grad_norm": 1.076707363128662, + "learning_rate": 3.8779481097282464e-05, + "loss": 0.09854316711425781, + "step": 1258 + }, + { + "epoch": 0.17543370723890475, + "grad_norm": 0.7574031352996826, + "learning_rate": 3.8776236190262114e-05, + "loss": 0.11490440368652344, + "step": 1259 + }, + { + "epoch": 0.17557305093011913, + "grad_norm": 1.4560134410858154, + "learning_rate": 3.877298711159076e-05, + "loss": 0.10496711730957031, + "step": 1260 + }, + { + "epoch": 0.1757123946213335, + "grad_norm": 0.8728396892547607, + "learning_rate": 3.876973386199025e-05, + "loss": 0.0773162841796875, + "step": 1261 + }, + { + "epoch": 0.1758517383125479, + "grad_norm": 0.9272534251213074, + "learning_rate": 3.87664764421834e-05, + "loss": 0.1280803680419922, + "step": 1262 + }, + { + "epoch": 0.1759910820037623, + "grad_norm": 0.7222005128860474, + "learning_rate": 3.876321485289394e-05, + "loss": 0.10638713836669922, + "step": 1263 + }, + { + "epoch": 0.17613042569497667, + "grad_norm": 0.5347703099250793, + "learning_rate": 3.87599490948465e-05, + "loss": 0.0886383056640625, + "step": 1264 + }, + { + "epoch": 0.17626976938619104, + "grad_norm": 0.5841286778450012, + "learning_rate": 3.875667916876668e-05, + "loss": 0.09392166137695312, + "step": 1265 + }, + { + "epoch": 0.17640911307740542, + "grad_norm": 0.5347645878791809, + "learning_rate": 3.875340507538096e-05, + "loss": 0.08567237854003906, + "step": 1266 + }, + { + "epoch": 0.1765484567686198, + "grad_norm": 1.459162950515747, + "learning_rate": 3.875012681541678e-05, + "loss": 0.12564659118652344, + "step": 1267 + }, + { + "epoch": 0.17668780045983418, + "grad_norm": 0.5253599286079407, + "learning_rate": 3.87468443896025e-05, + "loss": 0.10811614990234375, + "step": 1268 + }, + { + "epoch": 0.17682714415104855, + "grad_norm": 0.5524057149887085, + "learning_rate": 3.8743557798667395e-05, + "loss": 0.09711456298828125, + "step": 1269 + }, + { + "epoch": 0.17696648784226293, + "grad_norm": 0.6123748421669006, + "learning_rate": 3.874026704334167e-05, + "loss": 0.123992919921875, + "step": 1270 + }, + { + "epoch": 0.1771058315334773, + "grad_norm": 0.6877507567405701, + "learning_rate": 3.873697212435645e-05, + "loss": 0.12399673461914062, + "step": 1271 + }, + { + "epoch": 0.17724517522469171, + "grad_norm": 0.4887964725494385, + "learning_rate": 3.87336730424438e-05, + "loss": 0.09934616088867188, + "step": 1272 + }, + { + "epoch": 0.1773845189159061, + "grad_norm": 0.4551629424095154, + "learning_rate": 3.87303697983367e-05, + "loss": 0.10813522338867188, + "step": 1273 + }, + { + "epoch": 0.17752386260712047, + "grad_norm": 0.6707220673561096, + "learning_rate": 3.872706239276904e-05, + "loss": 0.11825180053710938, + "step": 1274 + }, + { + "epoch": 0.17766320629833485, + "grad_norm": 0.49373602867126465, + "learning_rate": 3.8723750826475674e-05, + "loss": 0.11932563781738281, + "step": 1275 + }, + { + "epoch": 0.17780254998954922, + "grad_norm": 0.5918702483177185, + "learning_rate": 3.872043510019235e-05, + "loss": 0.10732269287109375, + "step": 1276 + }, + { + "epoch": 0.1779418936807636, + "grad_norm": 0.9433500170707703, + "learning_rate": 3.871711521465573e-05, + "loss": 0.11172676086425781, + "step": 1277 + }, + { + "epoch": 0.17808123737197798, + "grad_norm": 0.4509205222129822, + "learning_rate": 3.871379117060343e-05, + "loss": 0.08376693725585938, + "step": 1278 + }, + { + "epoch": 0.17822058106319236, + "grad_norm": 1.2193310260772705, + "learning_rate": 3.871046296877398e-05, + "loss": 0.09907913208007812, + "step": 1279 + }, + { + "epoch": 0.17835992475440673, + "grad_norm": 0.6063570380210876, + "learning_rate": 3.870713060990682e-05, + "loss": 0.09279632568359375, + "step": 1280 + }, + { + "epoch": 0.1784992684456211, + "grad_norm": 0.6265129446983337, + "learning_rate": 3.870379409474233e-05, + "loss": 0.11450481414794922, + "step": 1281 + }, + { + "epoch": 0.17863861213683552, + "grad_norm": 0.7718793749809265, + "learning_rate": 3.870045342402181e-05, + "loss": 0.09600830078125, + "step": 1282 + }, + { + "epoch": 0.1787779558280499, + "grad_norm": 0.7753949165344238, + "learning_rate": 3.8697108598487474e-05, + "loss": 0.08893775939941406, + "step": 1283 + }, + { + "epoch": 0.17891729951926427, + "grad_norm": 1.1020095348358154, + "learning_rate": 3.8693759618882475e-05, + "loss": 0.1143035888671875, + "step": 1284 + }, + { + "epoch": 0.17905664321047865, + "grad_norm": 0.5516060590744019, + "learning_rate": 3.8690406485950874e-05, + "loss": 0.09313583374023438, + "step": 1285 + }, + { + "epoch": 0.17919598690169303, + "grad_norm": 0.5935307145118713, + "learning_rate": 3.868704920043766e-05, + "loss": 0.10854339599609375, + "step": 1286 + }, + { + "epoch": 0.1793353305929074, + "grad_norm": 1.2384552955627441, + "learning_rate": 3.8683687763088745e-05, + "loss": 0.13130569458007812, + "step": 1287 + }, + { + "epoch": 0.17947467428412178, + "grad_norm": 0.7888645529747009, + "learning_rate": 3.868032217465097e-05, + "loss": 0.12990570068359375, + "step": 1288 + }, + { + "epoch": 0.17961401797533616, + "grad_norm": 0.9677367806434631, + "learning_rate": 3.867695243587207e-05, + "loss": 0.11485862731933594, + "step": 1289 + }, + { + "epoch": 0.17975336166655054, + "grad_norm": 0.6230425834655762, + "learning_rate": 3.8673578547500754e-05, + "loss": 0.11978530883789062, + "step": 1290 + }, + { + "epoch": 0.17989270535776491, + "grad_norm": 0.48344534635543823, + "learning_rate": 3.867020051028661e-05, + "loss": 0.10195541381835938, + "step": 1291 + }, + { + "epoch": 0.18003204904897932, + "grad_norm": 0.8580906987190247, + "learning_rate": 3.8666818324980165e-05, + "loss": 0.11065864562988281, + "step": 1292 + }, + { + "epoch": 0.1801713927401937, + "grad_norm": 0.6192455291748047, + "learning_rate": 3.866343199233285e-05, + "loss": 0.10961723327636719, + "step": 1293 + }, + { + "epoch": 0.18031073643140808, + "grad_norm": 0.6871436238288879, + "learning_rate": 3.866004151309704e-05, + "loss": 0.11669254302978516, + "step": 1294 + }, + { + "epoch": 0.18045008012262245, + "grad_norm": 0.5778213739395142, + "learning_rate": 3.8656646888026026e-05, + "loss": 0.101226806640625, + "step": 1295 + }, + { + "epoch": 0.18058942381383683, + "grad_norm": 0.719910740852356, + "learning_rate": 3.8653248117874015e-05, + "loss": 0.12615394592285156, + "step": 1296 + }, + { + "epoch": 0.1807287675050512, + "grad_norm": 0.42480331659317017, + "learning_rate": 3.8649845203396125e-05, + "loss": 0.08732414245605469, + "step": 1297 + }, + { + "epoch": 0.18086811119626559, + "grad_norm": 0.792408287525177, + "learning_rate": 3.8646438145348415e-05, + "loss": 0.12754249572753906, + "step": 1298 + }, + { + "epoch": 0.18100745488747996, + "grad_norm": 0.9549935460090637, + "learning_rate": 3.8643026944487856e-05, + "loss": 0.123931884765625, + "step": 1299 + }, + { + "epoch": 0.18114679857869434, + "grad_norm": 0.599617063999176, + "learning_rate": 3.8639611601572345e-05, + "loss": 0.09657859802246094, + "step": 1300 + }, + { + "epoch": 0.18128614226990872, + "grad_norm": 0.9264459609985352, + "learning_rate": 3.8636192117360676e-05, + "loss": 0.11392784118652344, + "step": 1301 + }, + { + "epoch": 0.18142548596112312, + "grad_norm": 0.43463006615638733, + "learning_rate": 3.8632768492612596e-05, + "loss": 0.0997772216796875, + "step": 1302 + }, + { + "epoch": 0.1815648296523375, + "grad_norm": 0.6216017603874207, + "learning_rate": 3.862934072808875e-05, + "loss": 0.10479545593261719, + "step": 1303 + }, + { + "epoch": 0.18170417334355188, + "grad_norm": 0.9465147852897644, + "learning_rate": 3.86259088245507e-05, + "loss": 0.11695098876953125, + "step": 1304 + }, + { + "epoch": 0.18184351703476626, + "grad_norm": 1.73384428024292, + "learning_rate": 3.8622472782760956e-05, + "loss": 0.14857101440429688, + "step": 1305 + }, + { + "epoch": 0.18198286072598063, + "grad_norm": 0.865997850894928, + "learning_rate": 3.861903260348291e-05, + "loss": 0.11359596252441406, + "step": 1306 + }, + { + "epoch": 0.182122204417195, + "grad_norm": 0.8787851929664612, + "learning_rate": 3.8615588287480906e-05, + "loss": 0.12094497680664062, + "step": 1307 + }, + { + "epoch": 0.1822615481084094, + "grad_norm": 0.4675975441932678, + "learning_rate": 3.861213983552018e-05, + "loss": 0.10600852966308594, + "step": 1308 + }, + { + "epoch": 0.18240089179962377, + "grad_norm": 0.4574449360370636, + "learning_rate": 3.860868724836691e-05, + "loss": 0.10350322723388672, + "step": 1309 + }, + { + "epoch": 0.18254023549083814, + "grad_norm": 0.4737798869609833, + "learning_rate": 3.860523052678818e-05, + "loss": 0.10074424743652344, + "step": 1310 + }, + { + "epoch": 0.18267957918205252, + "grad_norm": 2.117921829223633, + "learning_rate": 3.860176967155198e-05, + "loss": 0.10455513000488281, + "step": 1311 + }, + { + "epoch": 0.18281892287326693, + "grad_norm": 0.5849491357803345, + "learning_rate": 3.8598304683427257e-05, + "loss": 0.09949493408203125, + "step": 1312 + }, + { + "epoch": 0.1829582665644813, + "grad_norm": 0.41159746050834656, + "learning_rate": 3.859483556318384e-05, + "loss": 0.10650634765625, + "step": 1313 + }, + { + "epoch": 0.18309761025569568, + "grad_norm": 0.4763065278530121, + "learning_rate": 3.859136231159248e-05, + "loss": 0.09949111938476562, + "step": 1314 + }, + { + "epoch": 0.18323695394691006, + "grad_norm": 0.5769935846328735, + "learning_rate": 3.858788492942486e-05, + "loss": 0.12614822387695312, + "step": 1315 + }, + { + "epoch": 0.18337629763812444, + "grad_norm": 0.43045705556869507, + "learning_rate": 3.8584403417453586e-05, + "loss": 0.10144996643066406, + "step": 1316 + }, + { + "epoch": 0.1835156413293388, + "grad_norm": 0.8331438899040222, + "learning_rate": 3.858091777645216e-05, + "loss": 0.12700271606445312, + "step": 1317 + }, + { + "epoch": 0.1836549850205532, + "grad_norm": 0.8648833632469177, + "learning_rate": 3.857742800719501e-05, + "loss": 0.09358406066894531, + "step": 1318 + }, + { + "epoch": 0.18379432871176757, + "grad_norm": 0.5318050980567932, + "learning_rate": 3.857393411045749e-05, + "loss": 0.10337066650390625, + "step": 1319 + }, + { + "epoch": 0.18393367240298195, + "grad_norm": 0.5023824572563171, + "learning_rate": 3.8570436087015855e-05, + "loss": 0.10684585571289062, + "step": 1320 + }, + { + "epoch": 0.18407301609419632, + "grad_norm": 0.8683139085769653, + "learning_rate": 3.8566933937647294e-05, + "loss": 0.09477043151855469, + "step": 1321 + }, + { + "epoch": 0.18421235978541073, + "grad_norm": 0.7129829525947571, + "learning_rate": 3.856342766312991e-05, + "loss": 0.11628341674804688, + "step": 1322 + }, + { + "epoch": 0.1843517034766251, + "grad_norm": 0.6468017101287842, + "learning_rate": 3.85599172642427e-05, + "loss": 0.11782073974609375, + "step": 1323 + }, + { + "epoch": 0.18449104716783948, + "grad_norm": 0.9843611121177673, + "learning_rate": 3.855640274176561e-05, + "loss": 0.10113525390625, + "step": 1324 + }, + { + "epoch": 0.18463039085905386, + "grad_norm": 0.408173531293869, + "learning_rate": 3.8552884096479476e-05, + "loss": 0.10539627075195312, + "step": 1325 + }, + { + "epoch": 0.18476973455026824, + "grad_norm": 0.7548376321792603, + "learning_rate": 3.854936132916607e-05, + "loss": 0.10464859008789062, + "step": 1326 + }, + { + "epoch": 0.18490907824148262, + "grad_norm": 0.6204270124435425, + "learning_rate": 3.854583444060806e-05, + "loss": 0.10668182373046875, + "step": 1327 + }, + { + "epoch": 0.185048421932697, + "grad_norm": 1.0405303239822388, + "learning_rate": 3.854230343158906e-05, + "loss": 0.10680580139160156, + "step": 1328 + }, + { + "epoch": 0.18518776562391137, + "grad_norm": 0.7958316802978516, + "learning_rate": 3.8538768302893544e-05, + "loss": 0.09859085083007812, + "step": 1329 + }, + { + "epoch": 0.18532710931512575, + "grad_norm": 0.6874585747718811, + "learning_rate": 3.853522905530698e-05, + "loss": 0.10457420349121094, + "step": 1330 + }, + { + "epoch": 0.18546645300634013, + "grad_norm": 0.7485847473144531, + "learning_rate": 3.853168568961567e-05, + "loss": 0.104736328125, + "step": 1331 + }, + { + "epoch": 0.18560579669755453, + "grad_norm": 1.2033241987228394, + "learning_rate": 3.852813820660689e-05, + "loss": 0.12401962280273438, + "step": 1332 + }, + { + "epoch": 0.1857451403887689, + "grad_norm": 0.7698356509208679, + "learning_rate": 3.852458660706881e-05, + "loss": 0.10686302185058594, + "step": 1333 + }, + { + "epoch": 0.1858844840799833, + "grad_norm": 1.682673692703247, + "learning_rate": 3.85210308917905e-05, + "loss": 0.10576057434082031, + "step": 1334 + }, + { + "epoch": 0.18602382777119766, + "grad_norm": 0.5718144178390503, + "learning_rate": 3.8517471061561974e-05, + "loss": 0.09949493408203125, + "step": 1335 + }, + { + "epoch": 0.18616317146241204, + "grad_norm": 0.5549457669258118, + "learning_rate": 3.851390711717414e-05, + "loss": 0.08799362182617188, + "step": 1336 + }, + { + "epoch": 0.18630251515362642, + "grad_norm": 0.7408481240272522, + "learning_rate": 3.851033905941882e-05, + "loss": 0.07938480377197266, + "step": 1337 + }, + { + "epoch": 0.1864418588448408, + "grad_norm": 1.043675184249878, + "learning_rate": 3.850676688908877e-05, + "loss": 0.10807228088378906, + "step": 1338 + }, + { + "epoch": 0.18658120253605517, + "grad_norm": 0.7098798155784607, + "learning_rate": 3.8503190606977624e-05, + "loss": 0.08223152160644531, + "step": 1339 + }, + { + "epoch": 0.18672054622726955, + "grad_norm": 0.9535657167434692, + "learning_rate": 3.849961021387996e-05, + "loss": 0.111663818359375, + "step": 1340 + }, + { + "epoch": 0.18685988991848393, + "grad_norm": 0.580849826335907, + "learning_rate": 3.849602571059127e-05, + "loss": 0.11001968383789062, + "step": 1341 + }, + { + "epoch": 0.18699923360969833, + "grad_norm": 1.03390371799469, + "learning_rate": 3.849243709790793e-05, + "loss": 0.08719062805175781, + "step": 1342 + }, + { + "epoch": 0.1871385773009127, + "grad_norm": 0.6524010300636292, + "learning_rate": 3.848884437662725e-05, + "loss": 0.10087203979492188, + "step": 1343 + }, + { + "epoch": 0.1872779209921271, + "grad_norm": 0.812218189239502, + "learning_rate": 3.8485247547547465e-05, + "loss": 0.13932418823242188, + "step": 1344 + }, + { + "epoch": 0.18741726468334147, + "grad_norm": 0.7532755732536316, + "learning_rate": 3.8481646611467704e-05, + "loss": 0.10669708251953125, + "step": 1345 + }, + { + "epoch": 0.18755660837455584, + "grad_norm": 0.6991297006607056, + "learning_rate": 3.8478041569188e-05, + "loss": 0.09915351867675781, + "step": 1346 + }, + { + "epoch": 0.18769595206577022, + "grad_norm": 0.7532744407653809, + "learning_rate": 3.8474432421509324e-05, + "loss": 0.10355758666992188, + "step": 1347 + }, + { + "epoch": 0.1878352957569846, + "grad_norm": 0.5668098330497742, + "learning_rate": 3.847081916923355e-05, + "loss": 0.12118148803710938, + "step": 1348 + }, + { + "epoch": 0.18797463944819898, + "grad_norm": 0.9967095255851746, + "learning_rate": 3.846720181316344e-05, + "loss": 0.09897613525390625, + "step": 1349 + }, + { + "epoch": 0.18811398313941335, + "grad_norm": 1.1122440099716187, + "learning_rate": 3.846358035410271e-05, + "loss": 0.12298393249511719, + "step": 1350 + }, + { + "epoch": 0.18825332683062773, + "grad_norm": 0.5140758156776428, + "learning_rate": 3.845995479285595e-05, + "loss": 0.09121894836425781, + "step": 1351 + }, + { + "epoch": 0.18839267052184214, + "grad_norm": 0.851491391658783, + "learning_rate": 3.845632513022869e-05, + "loss": 0.11237335205078125, + "step": 1352 + }, + { + "epoch": 0.18853201421305651, + "grad_norm": 1.2109057903289795, + "learning_rate": 3.845269136702734e-05, + "loss": 0.11274909973144531, + "step": 1353 + }, + { + "epoch": 0.1886713579042709, + "grad_norm": 0.8473382592201233, + "learning_rate": 3.844905350405926e-05, + "loss": 0.12432479858398438, + "step": 1354 + }, + { + "epoch": 0.18881070159548527, + "grad_norm": 0.7801395058631897, + "learning_rate": 3.8445411542132684e-05, + "loss": 0.12720870971679688, + "step": 1355 + }, + { + "epoch": 0.18895004528669965, + "grad_norm": 0.6312745213508606, + "learning_rate": 3.8441765482056783e-05, + "loss": 0.09164047241210938, + "step": 1356 + }, + { + "epoch": 0.18908938897791402, + "grad_norm": 1.5245453119277954, + "learning_rate": 3.843811532464163e-05, + "loss": 0.1372699737548828, + "step": 1357 + }, + { + "epoch": 0.1892287326691284, + "grad_norm": 0.7641769647598267, + "learning_rate": 3.8434461070698194e-05, + "loss": 0.13161277770996094, + "step": 1358 + }, + { + "epoch": 0.18936807636034278, + "grad_norm": 0.8637055158615112, + "learning_rate": 3.843080272103837e-05, + "loss": 0.09564781188964844, + "step": 1359 + }, + { + "epoch": 0.18950742005155716, + "grad_norm": 0.49232181906700134, + "learning_rate": 3.842714027647497e-05, + "loss": 0.10339546203613281, + "step": 1360 + }, + { + "epoch": 0.18964676374277153, + "grad_norm": 0.5168125033378601, + "learning_rate": 3.8423473737821705e-05, + "loss": 0.10251808166503906, + "step": 1361 + }, + { + "epoch": 0.18978610743398594, + "grad_norm": 0.8825947046279907, + "learning_rate": 3.8419803105893175e-05, + "loss": 0.1053009033203125, + "step": 1362 + }, + { + "epoch": 0.18992545112520032, + "grad_norm": 0.9567702412605286, + "learning_rate": 3.841612838150494e-05, + "loss": 0.1142425537109375, + "step": 1363 + }, + { + "epoch": 0.1900647948164147, + "grad_norm": 0.5473109483718872, + "learning_rate": 3.8412449565473414e-05, + "loss": 0.11235809326171875, + "step": 1364 + }, + { + "epoch": 0.19020413850762907, + "grad_norm": 0.5483184456825256, + "learning_rate": 3.840876665861597e-05, + "loss": 0.11859130859375, + "step": 1365 + }, + { + "epoch": 0.19034348219884345, + "grad_norm": 0.6694830656051636, + "learning_rate": 3.840507966175085e-05, + "loss": 0.101898193359375, + "step": 1366 + }, + { + "epoch": 0.19048282589005783, + "grad_norm": 0.9363000392913818, + "learning_rate": 3.840138857569722e-05, + "loss": 0.10037994384765625, + "step": 1367 + }, + { + "epoch": 0.1906221695812722, + "grad_norm": 0.5914229154586792, + "learning_rate": 3.8397693401275165e-05, + "loss": 0.11611557006835938, + "step": 1368 + }, + { + "epoch": 0.19076151327248658, + "grad_norm": 0.43977612257003784, + "learning_rate": 3.8393994139305656e-05, + "loss": 0.09508514404296875, + "step": 1369 + }, + { + "epoch": 0.19090085696370096, + "grad_norm": 0.5565763711929321, + "learning_rate": 3.8390290790610595e-05, + "loss": 0.09963035583496094, + "step": 1370 + }, + { + "epoch": 0.19104020065491534, + "grad_norm": 0.4072251319885254, + "learning_rate": 3.838658335601278e-05, + "loss": 0.07805824279785156, + "step": 1371 + }, + { + "epoch": 0.19117954434612974, + "grad_norm": 1.0819729566574097, + "learning_rate": 3.838287183633591e-05, + "loss": 0.11242294311523438, + "step": 1372 + }, + { + "epoch": 0.19131888803734412, + "grad_norm": 0.6569169163703918, + "learning_rate": 3.837915623240462e-05, + "loss": 0.11273384094238281, + "step": 1373 + }, + { + "epoch": 0.1914582317285585, + "grad_norm": 0.8362433910369873, + "learning_rate": 3.837543654504441e-05, + "loss": 0.1107177734375, + "step": 1374 + }, + { + "epoch": 0.19159757541977288, + "grad_norm": 0.9019734263420105, + "learning_rate": 3.837171277508171e-05, + "loss": 0.10533523559570312, + "step": 1375 + }, + { + "epoch": 0.19173691911098725, + "grad_norm": 0.6509817242622375, + "learning_rate": 3.836798492334387e-05, + "loss": 0.10588645935058594, + "step": 1376 + }, + { + "epoch": 0.19187626280220163, + "grad_norm": 0.5858681797981262, + "learning_rate": 3.836425299065913e-05, + "loss": 0.09440040588378906, + "step": 1377 + }, + { + "epoch": 0.192015606493416, + "grad_norm": 0.36017751693725586, + "learning_rate": 3.836051697785664e-05, + "loss": 0.0909881591796875, + "step": 1378 + }, + { + "epoch": 0.19215495018463039, + "grad_norm": 0.46209853887557983, + "learning_rate": 3.8356776885766456e-05, + "loss": 0.1010746955871582, + "step": 1379 + }, + { + "epoch": 0.19229429387584476, + "grad_norm": 0.7017320990562439, + "learning_rate": 3.8353032715219534e-05, + "loss": 0.08271980285644531, + "step": 1380 + }, + { + "epoch": 0.19243363756705914, + "grad_norm": 0.525252103805542, + "learning_rate": 3.834928446704775e-05, + "loss": 0.09624671936035156, + "step": 1381 + }, + { + "epoch": 0.19257298125827352, + "grad_norm": 1.1317015886306763, + "learning_rate": 3.834553214208389e-05, + "loss": 0.10095500946044922, + "step": 1382 + }, + { + "epoch": 0.19271232494948792, + "grad_norm": 0.5466147661209106, + "learning_rate": 3.834177574116161e-05, + "loss": 0.08441352844238281, + "step": 1383 + }, + { + "epoch": 0.1928516686407023, + "grad_norm": 0.795098602771759, + "learning_rate": 3.833801526511552e-05, + "loss": 0.09138298034667969, + "step": 1384 + }, + { + "epoch": 0.19299101233191668, + "grad_norm": 0.4417608976364136, + "learning_rate": 3.83342507147811e-05, + "loss": 0.09862899780273438, + "step": 1385 + }, + { + "epoch": 0.19313035602313106, + "grad_norm": 0.7258963584899902, + "learning_rate": 3.833048209099474e-05, + "loss": 0.09025955200195312, + "step": 1386 + }, + { + "epoch": 0.19326969971434543, + "grad_norm": 0.5603727698326111, + "learning_rate": 3.832670939459376e-05, + "loss": 0.10831451416015625, + "step": 1387 + }, + { + "epoch": 0.1934090434055598, + "grad_norm": 0.46487268805503845, + "learning_rate": 3.832293262641636e-05, + "loss": 0.08048439025878906, + "step": 1388 + }, + { + "epoch": 0.1935483870967742, + "grad_norm": 0.7813740968704224, + "learning_rate": 3.8319151787301644e-05, + "loss": 0.12491607666015625, + "step": 1389 + }, + { + "epoch": 0.19368773078798857, + "grad_norm": 0.575092077255249, + "learning_rate": 3.831536687808964e-05, + "loss": 0.09209060668945312, + "step": 1390 + }, + { + "epoch": 0.19382707447920294, + "grad_norm": 0.4812125265598297, + "learning_rate": 3.831157789962126e-05, + "loss": 0.10500717163085938, + "step": 1391 + }, + { + "epoch": 0.19396641817041732, + "grad_norm": 0.4771767854690552, + "learning_rate": 3.830778485273833e-05, + "loss": 0.09692096710205078, + "step": 1392 + }, + { + "epoch": 0.19410576186163173, + "grad_norm": 1.0150847434997559, + "learning_rate": 3.830398773828358e-05, + "loss": 0.11176681518554688, + "step": 1393 + }, + { + "epoch": 0.1942451055528461, + "grad_norm": 0.5126836895942688, + "learning_rate": 3.830018655710064e-05, + "loss": 0.10516738891601562, + "step": 1394 + }, + { + "epoch": 0.19438444924406048, + "grad_norm": 0.5057573318481445, + "learning_rate": 3.829638131003405e-05, + "loss": 0.08671188354492188, + "step": 1395 + }, + { + "epoch": 0.19452379293527486, + "grad_norm": 1.1748703718185425, + "learning_rate": 3.829257199792925e-05, + "loss": 0.12128829956054688, + "step": 1396 + }, + { + "epoch": 0.19466313662648924, + "grad_norm": 0.4443107843399048, + "learning_rate": 3.828875862163258e-05, + "loss": 0.10485458374023438, + "step": 1397 + }, + { + "epoch": 0.1948024803177036, + "grad_norm": 0.4495733678340912, + "learning_rate": 3.828494118199127e-05, + "loss": 0.09980392456054688, + "step": 1398 + }, + { + "epoch": 0.194941824008918, + "grad_norm": 0.5061647295951843, + "learning_rate": 3.828111967985349e-05, + "loss": 0.10881423950195312, + "step": 1399 + }, + { + "epoch": 0.19508116770013237, + "grad_norm": 0.6497037410736084, + "learning_rate": 3.8277294116068285e-05, + "loss": 0.10570907592773438, + "step": 1400 + }, + { + "epoch": 0.19522051139134675, + "grad_norm": 1.2839761972427368, + "learning_rate": 3.8273464491485596e-05, + "loss": 0.12141227722167969, + "step": 1401 + }, + { + "epoch": 0.19535985508256112, + "grad_norm": 0.3905301094055176, + "learning_rate": 3.82696308069563e-05, + "loss": 0.10225486755371094, + "step": 1402 + }, + { + "epoch": 0.19549919877377553, + "grad_norm": 0.38540029525756836, + "learning_rate": 3.8265793063332135e-05, + "loss": 0.09874725341796875, + "step": 1403 + }, + { + "epoch": 0.1956385424649899, + "grad_norm": 1.2842803001403809, + "learning_rate": 3.826195126146576e-05, + "loss": 0.1134490966796875, + "step": 1404 + }, + { + "epoch": 0.19577788615620428, + "grad_norm": 0.7468305230140686, + "learning_rate": 3.8258105402210755e-05, + "loss": 0.10444259643554688, + "step": 1405 + }, + { + "epoch": 0.19591722984741866, + "grad_norm": 0.4661797881126404, + "learning_rate": 3.825425548642156e-05, + "loss": 0.09274864196777344, + "step": 1406 + }, + { + "epoch": 0.19605657353863304, + "grad_norm": 0.6055221557617188, + "learning_rate": 3.8250401514953557e-05, + "loss": 0.11462783813476562, + "step": 1407 + }, + { + "epoch": 0.19619591722984742, + "grad_norm": 0.4044515788555145, + "learning_rate": 3.824654348866299e-05, + "loss": 0.08596324920654297, + "step": 1408 + }, + { + "epoch": 0.1963352609210618, + "grad_norm": 0.7220860123634338, + "learning_rate": 3.824268140840704e-05, + "loss": 0.10818672180175781, + "step": 1409 + }, + { + "epoch": 0.19647460461227617, + "grad_norm": 1.518726110458374, + "learning_rate": 3.823881527504377e-05, + "loss": 0.1318035125732422, + "step": 1410 + }, + { + "epoch": 0.19661394830349055, + "grad_norm": 0.5223092436790466, + "learning_rate": 3.823494508943214e-05, + "loss": 0.1053466796875, + "step": 1411 + }, + { + "epoch": 0.19675329199470493, + "grad_norm": 0.7580105662345886, + "learning_rate": 3.8231070852432035e-05, + "loss": 0.12936782836914062, + "step": 1412 + }, + { + "epoch": 0.19689263568591933, + "grad_norm": 0.9086602330207825, + "learning_rate": 3.82271925649042e-05, + "loss": 0.09434127807617188, + "step": 1413 + }, + { + "epoch": 0.1970319793771337, + "grad_norm": 0.9804336428642273, + "learning_rate": 3.822331022771031e-05, + "loss": 0.12256526947021484, + "step": 1414 + }, + { + "epoch": 0.1971713230683481, + "grad_norm": 0.751820981502533, + "learning_rate": 3.8219423841712935e-05, + "loss": 0.06526374816894531, + "step": 1415 + }, + { + "epoch": 0.19731066675956246, + "grad_norm": 0.4351544976234436, + "learning_rate": 3.821553340777553e-05, + "loss": 0.09386825561523438, + "step": 1416 + }, + { + "epoch": 0.19745001045077684, + "grad_norm": 0.5709677934646606, + "learning_rate": 3.821163892676248e-05, + "loss": 0.1288738250732422, + "step": 1417 + }, + { + "epoch": 0.19758935414199122, + "grad_norm": 1.1607028245925903, + "learning_rate": 3.820774039953904e-05, + "loss": 0.10728836059570312, + "step": 1418 + }, + { + "epoch": 0.1977286978332056, + "grad_norm": 1.2197813987731934, + "learning_rate": 3.820383782697136e-05, + "loss": 0.10294342041015625, + "step": 1419 + }, + { + "epoch": 0.19786804152441997, + "grad_norm": 0.5867794156074524, + "learning_rate": 3.819993120992653e-05, + "loss": 0.10053825378417969, + "step": 1420 + }, + { + "epoch": 0.19800738521563435, + "grad_norm": 0.49797290563583374, + "learning_rate": 3.819602054927249e-05, + "loss": 0.09128570556640625, + "step": 1421 + }, + { + "epoch": 0.19814672890684873, + "grad_norm": 0.7152952551841736, + "learning_rate": 3.8192105845878106e-05, + "loss": 0.12041473388671875, + "step": 1422 + }, + { + "epoch": 0.19828607259806313, + "grad_norm": 0.6989514827728271, + "learning_rate": 3.818818710061314e-05, + "loss": 0.10884475708007812, + "step": 1423 + }, + { + "epoch": 0.1984254162892775, + "grad_norm": 1.5029706954956055, + "learning_rate": 3.818426431434824e-05, + "loss": 0.11463737487792969, + "step": 1424 + }, + { + "epoch": 0.1985647599804919, + "grad_norm": 0.8383270502090454, + "learning_rate": 3.818033748795497e-05, + "loss": 0.12098312377929688, + "step": 1425 + }, + { + "epoch": 0.19870410367170627, + "grad_norm": 0.47503140568733215, + "learning_rate": 3.817640662230576e-05, + "loss": 0.10045623779296875, + "step": 1426 + }, + { + "epoch": 0.19884344736292064, + "grad_norm": 0.5844559073448181, + "learning_rate": 3.8172471718273986e-05, + "loss": 0.08629608154296875, + "step": 1427 + }, + { + "epoch": 0.19898279105413502, + "grad_norm": 0.5713101625442505, + "learning_rate": 3.8168532776733874e-05, + "loss": 0.09910202026367188, + "step": 1428 + }, + { + "epoch": 0.1991221347453494, + "grad_norm": 0.40462547540664673, + "learning_rate": 3.816458979856058e-05, + "loss": 0.09914016723632812, + "step": 1429 + }, + { + "epoch": 0.19926147843656378, + "grad_norm": 0.43558788299560547, + "learning_rate": 3.816064278463013e-05, + "loss": 0.10738182067871094, + "step": 1430 + }, + { + "epoch": 0.19940082212777815, + "grad_norm": 0.4383837878704071, + "learning_rate": 3.815669173581947e-05, + "loss": 0.08968734741210938, + "step": 1431 + }, + { + "epoch": 0.19954016581899253, + "grad_norm": 0.611033022403717, + "learning_rate": 3.8152736653006434e-05, + "loss": 0.10982990264892578, + "step": 1432 + }, + { + "epoch": 0.19967950951020694, + "grad_norm": 0.8020353317260742, + "learning_rate": 3.8148777537069745e-05, + "loss": 0.09735298156738281, + "step": 1433 + }, + { + "epoch": 0.19981885320142131, + "grad_norm": 0.44346368312835693, + "learning_rate": 3.8144814388889034e-05, + "loss": 0.10519027709960938, + "step": 1434 + }, + { + "epoch": 0.1999581968926357, + "grad_norm": 0.592093825340271, + "learning_rate": 3.814084720934482e-05, + "loss": 0.10346031188964844, + "step": 1435 + }, + { + "epoch": 0.20009754058385007, + "grad_norm": 0.9371759295463562, + "learning_rate": 3.813687599931851e-05, + "loss": 0.10736274719238281, + "step": 1436 + }, + { + "epoch": 0.20023688427506445, + "grad_norm": 0.4826612174510956, + "learning_rate": 3.813290075969243e-05, + "loss": 0.09876251220703125, + "step": 1437 + }, + { + "epoch": 0.20037622796627882, + "grad_norm": 0.8127293586730957, + "learning_rate": 3.812892149134978e-05, + "loss": 0.0878143310546875, + "step": 1438 + }, + { + "epoch": 0.2005155716574932, + "grad_norm": 1.2162224054336548, + "learning_rate": 3.812493819517467e-05, + "loss": 0.138580322265625, + "step": 1439 + }, + { + "epoch": 0.20065491534870758, + "grad_norm": 0.5930866003036499, + "learning_rate": 3.812095087205209e-05, + "loss": 0.09885025024414062, + "step": 1440 + }, + { + "epoch": 0.20079425903992196, + "grad_norm": 0.42039749026298523, + "learning_rate": 3.811695952286793e-05, + "loss": 0.10192012786865234, + "step": 1441 + }, + { + "epoch": 0.20093360273113633, + "grad_norm": 0.5756534934043884, + "learning_rate": 3.8112964148508986e-05, + "loss": 0.10409736633300781, + "step": 1442 + }, + { + "epoch": 0.20107294642235074, + "grad_norm": 0.4790400266647339, + "learning_rate": 3.810896474986294e-05, + "loss": 0.08788490295410156, + "step": 1443 + }, + { + "epoch": 0.20121229011356512, + "grad_norm": 1.3881715536117554, + "learning_rate": 3.8104961327818354e-05, + "loss": 0.11792564392089844, + "step": 1444 + }, + { + "epoch": 0.2013516338047795, + "grad_norm": 0.6050581932067871, + "learning_rate": 3.8100953883264705e-05, + "loss": 0.10498046875, + "step": 1445 + }, + { + "epoch": 0.20149097749599387, + "grad_norm": 0.5192952156066895, + "learning_rate": 3.809694241709235e-05, + "loss": 0.10049724578857422, + "step": 1446 + }, + { + "epoch": 0.20163032118720825, + "grad_norm": 0.3357115089893341, + "learning_rate": 3.8092926930192555e-05, + "loss": 0.08324527740478516, + "step": 1447 + }, + { + "epoch": 0.20176966487842263, + "grad_norm": 0.4492969810962677, + "learning_rate": 3.8088907423457466e-05, + "loss": 0.07926368713378906, + "step": 1448 + }, + { + "epoch": 0.201909008569637, + "grad_norm": 0.839512825012207, + "learning_rate": 3.8084883897780126e-05, + "loss": 0.09444808959960938, + "step": 1449 + }, + { + "epoch": 0.20204835226085138, + "grad_norm": 1.1336843967437744, + "learning_rate": 3.808085635405446e-05, + "loss": 0.11614322662353516, + "step": 1450 + }, + { + "epoch": 0.20218769595206576, + "grad_norm": 0.9181559681892395, + "learning_rate": 3.807682479317531e-05, + "loss": 0.10262298583984375, + "step": 1451 + }, + { + "epoch": 0.20232703964328014, + "grad_norm": 0.7171592712402344, + "learning_rate": 3.80727892160384e-05, + "loss": 0.09939384460449219, + "step": 1452 + }, + { + "epoch": 0.20246638333449454, + "grad_norm": 1.1095881462097168, + "learning_rate": 3.806874962354033e-05, + "loss": 0.11771583557128906, + "step": 1453 + }, + { + "epoch": 0.20260572702570892, + "grad_norm": 0.46935680508613586, + "learning_rate": 3.806470601657861e-05, + "loss": 0.08944320678710938, + "step": 1454 + }, + { + "epoch": 0.2027450707169233, + "grad_norm": 0.6179680228233337, + "learning_rate": 3.806065839605163e-05, + "loss": 0.09772682189941406, + "step": 1455 + }, + { + "epoch": 0.20288441440813768, + "grad_norm": 0.6152550578117371, + "learning_rate": 3.805660676285869e-05, + "loss": 0.10584259033203125, + "step": 1456 + }, + { + "epoch": 0.20302375809935205, + "grad_norm": 0.34207937121391296, + "learning_rate": 3.805255111789997e-05, + "loss": 0.0764460563659668, + "step": 1457 + }, + { + "epoch": 0.20316310179056643, + "grad_norm": 0.9462080597877502, + "learning_rate": 3.804849146207654e-05, + "loss": 0.1347637176513672, + "step": 1458 + }, + { + "epoch": 0.2033024454817808, + "grad_norm": 0.778847873210907, + "learning_rate": 3.804442779629035e-05, + "loss": 0.08905029296875, + "step": 1459 + }, + { + "epoch": 0.20344178917299519, + "grad_norm": 0.4658353626728058, + "learning_rate": 3.804036012144428e-05, + "loss": 0.07747268676757812, + "step": 1460 + }, + { + "epoch": 0.20358113286420956, + "grad_norm": 0.6436386704444885, + "learning_rate": 3.8036288438442056e-05, + "loss": 0.09539413452148438, + "step": 1461 + }, + { + "epoch": 0.20372047655542394, + "grad_norm": 0.5851045846939087, + "learning_rate": 3.8032212748188306e-05, + "loss": 0.09652519226074219, + "step": 1462 + }, + { + "epoch": 0.20385982024663835, + "grad_norm": 0.6760302782058716, + "learning_rate": 3.802813305158857e-05, + "loss": 0.10011100769042969, + "step": 1463 + }, + { + "epoch": 0.20399916393785272, + "grad_norm": 0.47400158643722534, + "learning_rate": 3.802404934954926e-05, + "loss": 0.0924215316772461, + "step": 1464 + }, + { + "epoch": 0.2041385076290671, + "grad_norm": 0.6218384504318237, + "learning_rate": 3.801996164297769e-05, + "loss": 0.11347579956054688, + "step": 1465 + }, + { + "epoch": 0.20427785132028148, + "grad_norm": 0.447503000497818, + "learning_rate": 3.8015869932782034e-05, + "loss": 0.11229133605957031, + "step": 1466 + }, + { + "epoch": 0.20441719501149586, + "grad_norm": 0.5829729437828064, + "learning_rate": 3.801177421987139e-05, + "loss": 0.09628486633300781, + "step": 1467 + }, + { + "epoch": 0.20455653870271023, + "grad_norm": 0.5001473426818848, + "learning_rate": 3.800767450515574e-05, + "loss": 0.09979057312011719, + "step": 1468 + }, + { + "epoch": 0.2046958823939246, + "grad_norm": 0.5363202691078186, + "learning_rate": 3.800357078954593e-05, + "loss": 0.11067771911621094, + "step": 1469 + }, + { + "epoch": 0.204835226085139, + "grad_norm": 0.5093072652816772, + "learning_rate": 3.7999463073953715e-05, + "loss": 0.1068572998046875, + "step": 1470 + }, + { + "epoch": 0.20497456977635337, + "grad_norm": 0.7968482971191406, + "learning_rate": 3.7995351359291743e-05, + "loss": 0.11713600158691406, + "step": 1471 + }, + { + "epoch": 0.20511391346756774, + "grad_norm": 0.9157695174217224, + "learning_rate": 3.799123564647354e-05, + "loss": 0.11902618408203125, + "step": 1472 + }, + { + "epoch": 0.20525325715878215, + "grad_norm": 0.4888874292373657, + "learning_rate": 3.7987115936413526e-05, + "loss": 0.10666656494140625, + "step": 1473 + }, + { + "epoch": 0.20539260084999653, + "grad_norm": 1.1094471216201782, + "learning_rate": 3.7982992230027e-05, + "loss": 0.12027931213378906, + "step": 1474 + }, + { + "epoch": 0.2055319445412109, + "grad_norm": 0.4391915500164032, + "learning_rate": 3.797886452823016e-05, + "loss": 0.10680389404296875, + "step": 1475 + }, + { + "epoch": 0.20567128823242528, + "grad_norm": 0.8205001950263977, + "learning_rate": 3.797473283194009e-05, + "loss": 0.0938863754272461, + "step": 1476 + }, + { + "epoch": 0.20581063192363966, + "grad_norm": 0.6771225929260254, + "learning_rate": 3.797059714207475e-05, + "loss": 0.11816692352294922, + "step": 1477 + }, + { + "epoch": 0.20594997561485404, + "grad_norm": 0.4176645278930664, + "learning_rate": 3.7966457459553e-05, + "loss": 0.09126663208007812, + "step": 1478 + }, + { + "epoch": 0.2060893193060684, + "grad_norm": 0.48210620880126953, + "learning_rate": 3.796231378529458e-05, + "loss": 0.10072708129882812, + "step": 1479 + }, + { + "epoch": 0.2062286629972828, + "grad_norm": 0.6076430678367615, + "learning_rate": 3.795816612022014e-05, + "loss": 0.094696044921875, + "step": 1480 + }, + { + "epoch": 0.20636800668849717, + "grad_norm": 0.9187254309654236, + "learning_rate": 3.795401446525117e-05, + "loss": 0.1089468002319336, + "step": 1481 + }, + { + "epoch": 0.20650735037971155, + "grad_norm": 0.6093528866767883, + "learning_rate": 3.794985882131008e-05, + "loss": 0.11207962036132812, + "step": 1482 + }, + { + "epoch": 0.20664669407092595, + "grad_norm": 0.5483857989311218, + "learning_rate": 3.794569918932016e-05, + "loss": 0.09458351135253906, + "step": 1483 + }, + { + "epoch": 0.20678603776214033, + "grad_norm": 0.5667654871940613, + "learning_rate": 3.79415355702056e-05, + "loss": 0.09866523742675781, + "step": 1484 + }, + { + "epoch": 0.2069253814533547, + "grad_norm": 1.898030161857605, + "learning_rate": 3.793736796489143e-05, + "loss": 0.11981582641601562, + "step": 1485 + }, + { + "epoch": 0.20706472514456908, + "grad_norm": 0.5523898601531982, + "learning_rate": 3.7933196374303636e-05, + "loss": 0.102142333984375, + "step": 1486 + }, + { + "epoch": 0.20720406883578346, + "grad_norm": 0.5127153992652893, + "learning_rate": 3.792902079936902e-05, + "loss": 0.09889030456542969, + "step": 1487 + }, + { + "epoch": 0.20734341252699784, + "grad_norm": 0.5445490479469299, + "learning_rate": 3.79248412410153e-05, + "loss": 0.11187076568603516, + "step": 1488 + }, + { + "epoch": 0.20748275621821222, + "grad_norm": 0.4178209900856018, + "learning_rate": 3.79206577001711e-05, + "loss": 0.0865631103515625, + "step": 1489 + }, + { + "epoch": 0.2076220999094266, + "grad_norm": 0.6377855539321899, + "learning_rate": 3.791647017776589e-05, + "loss": 0.08406448364257812, + "step": 1490 + }, + { + "epoch": 0.20776144360064097, + "grad_norm": 1.036280870437622, + "learning_rate": 3.791227867473004e-05, + "loss": 0.09577369689941406, + "step": 1491 + }, + { + "epoch": 0.20790078729185535, + "grad_norm": 0.4287986159324646, + "learning_rate": 3.790808319199483e-05, + "loss": 0.090240478515625, + "step": 1492 + }, + { + "epoch": 0.20804013098306975, + "grad_norm": 0.2676747143268585, + "learning_rate": 3.790388373049236e-05, + "loss": 0.07974815368652344, + "step": 1493 + }, + { + "epoch": 0.20817947467428413, + "grad_norm": 0.921366810798645, + "learning_rate": 3.78996802911557e-05, + "loss": 0.11388969421386719, + "step": 1494 + }, + { + "epoch": 0.2083188183654985, + "grad_norm": 0.3873124122619629, + "learning_rate": 3.789547287491872e-05, + "loss": 0.07941341400146484, + "step": 1495 + }, + { + "epoch": 0.2084581620567129, + "grad_norm": 0.3247937560081482, + "learning_rate": 3.789126148271624e-05, + "loss": 0.08573150634765625, + "step": 1496 + }, + { + "epoch": 0.20859750574792726, + "grad_norm": 0.4503636956214905, + "learning_rate": 3.7887046115483914e-05, + "loss": 0.11032485961914062, + "step": 1497 + }, + { + "epoch": 0.20873684943914164, + "grad_norm": 0.6142569184303284, + "learning_rate": 3.788282677415831e-05, + "loss": 0.09271621704101562, + "step": 1498 + }, + { + "epoch": 0.20887619313035602, + "grad_norm": 0.7934049963951111, + "learning_rate": 3.787860345967687e-05, + "loss": 0.11320686340332031, + "step": 1499 + }, + { + "epoch": 0.2090155368215704, + "grad_norm": 0.6409229040145874, + "learning_rate": 3.787437617297792e-05, + "loss": 0.09863090515136719, + "step": 1500 + }, + { + "epoch": 0.20915488051278477, + "grad_norm": 0.5125098824501038, + "learning_rate": 3.787014491500066e-05, + "loss": 0.09951210021972656, + "step": 1501 + }, + { + "epoch": 0.20929422420399915, + "grad_norm": 0.6007881760597229, + "learning_rate": 3.786590968668518e-05, + "loss": 0.09958457946777344, + "step": 1502 + }, + { + "epoch": 0.20943356789521356, + "grad_norm": 0.9334321618080139, + "learning_rate": 3.7861670488972464e-05, + "loss": 0.0960693359375, + "step": 1503 + }, + { + "epoch": 0.20957291158642793, + "grad_norm": 0.6448788642883301, + "learning_rate": 3.7857427322804346e-05, + "loss": 0.10546875, + "step": 1504 + }, + { + "epoch": 0.2097122552776423, + "grad_norm": 0.5533633828163147, + "learning_rate": 3.785318018912357e-05, + "loss": 0.1108856201171875, + "step": 1505 + }, + { + "epoch": 0.2098515989688567, + "grad_norm": 1.4106656312942505, + "learning_rate": 3.784892908887375e-05, + "loss": 0.12276077270507812, + "step": 1506 + }, + { + "epoch": 0.20999094266007107, + "grad_norm": 0.6319714188575745, + "learning_rate": 3.7844674022999387e-05, + "loss": 0.11384391784667969, + "step": 1507 + }, + { + "epoch": 0.21013028635128544, + "grad_norm": 0.5909047722816467, + "learning_rate": 3.784041499244585e-05, + "loss": 0.1041259765625, + "step": 1508 + }, + { + "epoch": 0.21026963004249982, + "grad_norm": 0.8323530554771423, + "learning_rate": 3.783615199815941e-05, + "loss": 0.09430122375488281, + "step": 1509 + }, + { + "epoch": 0.2104089737337142, + "grad_norm": 0.7568151354789734, + "learning_rate": 3.78318850410872e-05, + "loss": 0.13238143920898438, + "step": 1510 + }, + { + "epoch": 0.21054831742492858, + "grad_norm": 0.4361835718154907, + "learning_rate": 3.782761412217725e-05, + "loss": 0.07916450500488281, + "step": 1511 + }, + { + "epoch": 0.21068766111614295, + "grad_norm": 0.7982330918312073, + "learning_rate": 3.7823339242378445e-05, + "loss": 0.12289047241210938, + "step": 1512 + }, + { + "epoch": 0.21082700480735736, + "grad_norm": 0.4291945695877075, + "learning_rate": 3.7819060402640577e-05, + "loss": 0.09645938873291016, + "step": 1513 + }, + { + "epoch": 0.21096634849857174, + "grad_norm": 0.7218261361122131, + "learning_rate": 3.7814777603914305e-05, + "loss": 0.08865547180175781, + "step": 1514 + }, + { + "epoch": 0.21110569218978611, + "grad_norm": 0.9422181844711304, + "learning_rate": 3.781049084715117e-05, + "loss": 0.11249351501464844, + "step": 1515 + }, + { + "epoch": 0.2112450358810005, + "grad_norm": 0.7408039569854736, + "learning_rate": 3.780620013330358e-05, + "loss": 0.13343048095703125, + "step": 1516 + }, + { + "epoch": 0.21138437957221487, + "grad_norm": 0.5889005661010742, + "learning_rate": 3.7801905463324855e-05, + "loss": 0.11684417724609375, + "step": 1517 + }, + { + "epoch": 0.21152372326342925, + "grad_norm": 1.0485424995422363, + "learning_rate": 3.7797606838169156e-05, + "loss": 0.10131454467773438, + "step": 1518 + }, + { + "epoch": 0.21166306695464362, + "grad_norm": 0.7445585131645203, + "learning_rate": 3.7793304258791544e-05, + "loss": 0.08873176574707031, + "step": 1519 + }, + { + "epoch": 0.211802410645858, + "grad_norm": 0.39414724707603455, + "learning_rate": 3.778899772614795e-05, + "loss": 0.07842063903808594, + "step": 1520 + }, + { + "epoch": 0.21194175433707238, + "grad_norm": 0.542391836643219, + "learning_rate": 3.7784687241195195e-05, + "loss": 0.11623191833496094, + "step": 1521 + }, + { + "epoch": 0.21208109802828676, + "grad_norm": 1.154749870300293, + "learning_rate": 3.778037280489096e-05, + "loss": 0.12650108337402344, + "step": 1522 + }, + { + "epoch": 0.21222044171950116, + "grad_norm": 0.5699454545974731, + "learning_rate": 3.777605441819383e-05, + "loss": 0.10379219055175781, + "step": 1523 + }, + { + "epoch": 0.21235978541071554, + "grad_norm": 0.5076906681060791, + "learning_rate": 3.777173208206323e-05, + "loss": 0.08121299743652344, + "step": 1524 + }, + { + "epoch": 0.21249912910192992, + "grad_norm": 1.3617066144943237, + "learning_rate": 3.776740579745951e-05, + "loss": 0.13848876953125, + "step": 1525 + }, + { + "epoch": 0.2126384727931443, + "grad_norm": 0.413850337266922, + "learning_rate": 3.776307556534385e-05, + "loss": 0.09504890441894531, + "step": 1526 + }, + { + "epoch": 0.21277781648435867, + "grad_norm": 0.6209184527397156, + "learning_rate": 3.775874138667834e-05, + "loss": 0.11367988586425781, + "step": 1527 + }, + { + "epoch": 0.21291716017557305, + "grad_norm": 0.4957193434238434, + "learning_rate": 3.775440326242593e-05, + "loss": 0.09942817687988281, + "step": 1528 + }, + { + "epoch": 0.21305650386678743, + "grad_norm": 0.41634637117385864, + "learning_rate": 3.775006119355047e-05, + "loss": 0.08196640014648438, + "step": 1529 + }, + { + "epoch": 0.2131958475580018, + "grad_norm": 0.6705635786056519, + "learning_rate": 3.7745715181016634e-05, + "loss": 0.11375999450683594, + "step": 1530 + }, + { + "epoch": 0.21333519124921618, + "grad_norm": 0.31035712361335754, + "learning_rate": 3.774136522579004e-05, + "loss": 0.09266281127929688, + "step": 1531 + }, + { + "epoch": 0.21347453494043056, + "grad_norm": 1.4831700325012207, + "learning_rate": 3.773701132883712e-05, + "loss": 0.11937904357910156, + "step": 1532 + }, + { + "epoch": 0.21361387863164497, + "grad_norm": 0.6732844114303589, + "learning_rate": 3.773265349112524e-05, + "loss": 0.09653854370117188, + "step": 1533 + }, + { + "epoch": 0.21375322232285934, + "grad_norm": 0.533657968044281, + "learning_rate": 3.772829171362259e-05, + "loss": 0.08931732177734375, + "step": 1534 + }, + { + "epoch": 0.21389256601407372, + "grad_norm": 0.7490890026092529, + "learning_rate": 3.772392599729827e-05, + "loss": 0.10683250427246094, + "step": 1535 + }, + { + "epoch": 0.2140319097052881, + "grad_norm": 0.41359615325927734, + "learning_rate": 3.7719556343122236e-05, + "loss": 0.0883331298828125, + "step": 1536 + }, + { + "epoch": 0.21417125339650248, + "grad_norm": 0.3703237473964691, + "learning_rate": 3.771518275206532e-05, + "loss": 0.09852409362792969, + "step": 1537 + }, + { + "epoch": 0.21431059708771685, + "grad_norm": 1.0867794752120972, + "learning_rate": 3.771080522509925e-05, + "loss": 0.10029315948486328, + "step": 1538 + }, + { + "epoch": 0.21444994077893123, + "grad_norm": 0.4383995831012726, + "learning_rate": 3.77064237631966e-05, + "loss": 0.09319686889648438, + "step": 1539 + }, + { + "epoch": 0.2145892844701456, + "grad_norm": 0.5781996250152588, + "learning_rate": 3.770203836733084e-05, + "loss": 0.10411453247070312, + "step": 1540 + }, + { + "epoch": 0.21472862816135999, + "grad_norm": 0.39326539635658264, + "learning_rate": 3.769764903847629e-05, + "loss": 0.09047603607177734, + "step": 1541 + }, + { + "epoch": 0.21486797185257436, + "grad_norm": 0.44336000084877014, + "learning_rate": 3.769325577760817e-05, + "loss": 0.09900093078613281, + "step": 1542 + }, + { + "epoch": 0.21500731554378877, + "grad_norm": 0.7135376334190369, + "learning_rate": 3.7688858585702564e-05, + "loss": 0.11348724365234375, + "step": 1543 + }, + { + "epoch": 0.21514665923500315, + "grad_norm": 0.5136505961418152, + "learning_rate": 3.768445746373642e-05, + "loss": 0.10715293884277344, + "step": 1544 + }, + { + "epoch": 0.21528600292621752, + "grad_norm": 0.35883358120918274, + "learning_rate": 3.768005241268757e-05, + "loss": 0.09140205383300781, + "step": 1545 + }, + { + "epoch": 0.2154253466174319, + "grad_norm": 0.5615813136100769, + "learning_rate": 3.7675643433534725e-05, + "loss": 0.11148834228515625, + "step": 1546 + }, + { + "epoch": 0.21556469030864628, + "grad_norm": 0.9249460101127625, + "learning_rate": 3.767123052725744e-05, + "loss": 0.08353233337402344, + "step": 1547 + }, + { + "epoch": 0.21570403399986066, + "grad_norm": 0.5396033525466919, + "learning_rate": 3.7666813694836176e-05, + "loss": 0.12052536010742188, + "step": 1548 + }, + { + "epoch": 0.21584337769107503, + "grad_norm": 0.5185617804527283, + "learning_rate": 3.7662392937252255e-05, + "loss": 0.08903312683105469, + "step": 1549 + }, + { + "epoch": 0.2159827213822894, + "grad_norm": 1.4435734748840332, + "learning_rate": 3.7657968255487854e-05, + "loss": 0.10304522514343262, + "step": 1550 + }, + { + "epoch": 0.2161220650735038, + "grad_norm": 1.3254905939102173, + "learning_rate": 3.765353965052605e-05, + "loss": 0.11086273193359375, + "step": 1551 + }, + { + "epoch": 0.21626140876471817, + "grad_norm": 0.6222955584526062, + "learning_rate": 3.764910712335077e-05, + "loss": 0.1282806396484375, + "step": 1552 + }, + { + "epoch": 0.21640075245593257, + "grad_norm": 0.668829619884491, + "learning_rate": 3.764467067494683e-05, + "loss": 0.0884857177734375, + "step": 1553 + }, + { + "epoch": 0.21654009614714695, + "grad_norm": 0.542068362236023, + "learning_rate": 3.7640230306299895e-05, + "loss": 0.10982131958007812, + "step": 1554 + }, + { + "epoch": 0.21667943983836133, + "grad_norm": 0.7278395295143127, + "learning_rate": 3.7635786018396524e-05, + "loss": 0.11594009399414062, + "step": 1555 + }, + { + "epoch": 0.2168187835295757, + "grad_norm": 0.7407974600791931, + "learning_rate": 3.763133781222412e-05, + "loss": 0.10821151733398438, + "step": 1556 + }, + { + "epoch": 0.21695812722079008, + "grad_norm": 0.6908783316612244, + "learning_rate": 3.762688568877099e-05, + "loss": 0.12094688415527344, + "step": 1557 + }, + { + "epoch": 0.21709747091200446, + "grad_norm": 0.46273699402809143, + "learning_rate": 3.762242964902629e-05, + "loss": 0.0906524658203125, + "step": 1558 + }, + { + "epoch": 0.21723681460321884, + "grad_norm": 0.4096841812133789, + "learning_rate": 3.761796969398005e-05, + "loss": 0.09876823425292969, + "step": 1559 + }, + { + "epoch": 0.2173761582944332, + "grad_norm": 0.47741949558258057, + "learning_rate": 3.761350582462317e-05, + "loss": 0.10111808776855469, + "step": 1560 + }, + { + "epoch": 0.2175155019856476, + "grad_norm": 0.5157760381698608, + "learning_rate": 3.760903804194742e-05, + "loss": 0.11137938499450684, + "step": 1561 + }, + { + "epoch": 0.21765484567686197, + "grad_norm": 0.49074995517730713, + "learning_rate": 3.7604566346945437e-05, + "loss": 0.09737968444824219, + "step": 1562 + }, + { + "epoch": 0.21779418936807637, + "grad_norm": 0.7621711492538452, + "learning_rate": 3.760009074061073e-05, + "loss": 0.12557601928710938, + "step": 1563 + }, + { + "epoch": 0.21793353305929075, + "grad_norm": 0.45143207907676697, + "learning_rate": 3.759561122393767e-05, + "loss": 0.12238693237304688, + "step": 1564 + }, + { + "epoch": 0.21807287675050513, + "grad_norm": 0.5304073691368103, + "learning_rate": 3.7591127797921523e-05, + "loss": 0.09487152099609375, + "step": 1565 + }, + { + "epoch": 0.2182122204417195, + "grad_norm": 0.7677126526832581, + "learning_rate": 3.7586640463558384e-05, + "loss": 0.1057424545288086, + "step": 1566 + }, + { + "epoch": 0.21835156413293388, + "grad_norm": 0.7530484199523926, + "learning_rate": 3.758214922184525e-05, + "loss": 0.10043907165527344, + "step": 1567 + }, + { + "epoch": 0.21849090782414826, + "grad_norm": 0.5468559265136719, + "learning_rate": 3.7577654073779956e-05, + "loss": 0.09519767761230469, + "step": 1568 + }, + { + "epoch": 0.21863025151536264, + "grad_norm": 0.7459896206855774, + "learning_rate": 3.757315502036124e-05, + "loss": 0.12157249450683594, + "step": 1569 + }, + { + "epoch": 0.21876959520657702, + "grad_norm": 0.6526173949241638, + "learning_rate": 3.756865206258868e-05, + "loss": 0.116180419921875, + "step": 1570 + }, + { + "epoch": 0.2189089388977914, + "grad_norm": 0.5466171503067017, + "learning_rate": 3.7564145201462726e-05, + "loss": 0.1005096435546875, + "step": 1571 + }, + { + "epoch": 0.21904828258900577, + "grad_norm": 0.5829871892929077, + "learning_rate": 3.755963443798471e-05, + "loss": 0.10846710205078125, + "step": 1572 + }, + { + "epoch": 0.21918762628022015, + "grad_norm": 0.5046905875205994, + "learning_rate": 3.7555119773156815e-05, + "loss": 0.08476066589355469, + "step": 1573 + }, + { + "epoch": 0.21932696997143455, + "grad_norm": 0.4624950587749481, + "learning_rate": 3.755060120798209e-05, + "loss": 0.09307670593261719, + "step": 1574 + }, + { + "epoch": 0.21946631366264893, + "grad_norm": 0.6069352626800537, + "learning_rate": 3.754607874346447e-05, + "loss": 0.1098480224609375, + "step": 1575 + }, + { + "epoch": 0.2196056573538633, + "grad_norm": 0.5805450677871704, + "learning_rate": 3.7541552380608734e-05, + "loss": 0.09745597839355469, + "step": 1576 + }, + { + "epoch": 0.2197450010450777, + "grad_norm": 1.4952571392059326, + "learning_rate": 3.753702212042054e-05, + "loss": 0.13129043579101562, + "step": 1577 + }, + { + "epoch": 0.21988434473629206, + "grad_norm": 0.6186228394508362, + "learning_rate": 3.753248796390641e-05, + "loss": 0.09225654602050781, + "step": 1578 + }, + { + "epoch": 0.22002368842750644, + "grad_norm": 0.847498893737793, + "learning_rate": 3.7527949912073725e-05, + "loss": 0.08650016784667969, + "step": 1579 + }, + { + "epoch": 0.22016303211872082, + "grad_norm": 1.0308774709701538, + "learning_rate": 3.752340796593074e-05, + "loss": 0.1167755126953125, + "step": 1580 + }, + { + "epoch": 0.2203023758099352, + "grad_norm": 0.7299877405166626, + "learning_rate": 3.751886212648657e-05, + "loss": 0.12188148498535156, + "step": 1581 + }, + { + "epoch": 0.22044171950114957, + "grad_norm": 0.6274526119232178, + "learning_rate": 3.75143123947512e-05, + "loss": 0.1077728271484375, + "step": 1582 + }, + { + "epoch": 0.22058106319236395, + "grad_norm": 0.6367173790931702, + "learning_rate": 3.7509758771735475e-05, + "loss": 0.09646987915039062, + "step": 1583 + }, + { + "epoch": 0.22072040688357836, + "grad_norm": 0.46828651428222656, + "learning_rate": 3.75052012584511e-05, + "loss": 0.08153152465820312, + "step": 1584 + }, + { + "epoch": 0.22085975057479273, + "grad_norm": 0.5937584638595581, + "learning_rate": 3.750063985591067e-05, + "loss": 0.11708450317382812, + "step": 1585 + }, + { + "epoch": 0.2209990942660071, + "grad_norm": 0.47414156794548035, + "learning_rate": 3.749607456512759e-05, + "loss": 0.10273551940917969, + "step": 1586 + }, + { + "epoch": 0.2211384379572215, + "grad_norm": 0.8978180289268494, + "learning_rate": 3.74915053871162e-05, + "loss": 0.09682655334472656, + "step": 1587 + }, + { + "epoch": 0.22127778164843587, + "grad_norm": 0.4668830335140228, + "learning_rate": 3.7486932322891646e-05, + "loss": 0.09329032897949219, + "step": 1588 + }, + { + "epoch": 0.22141712533965024, + "grad_norm": 0.4054740071296692, + "learning_rate": 3.748235537346996e-05, + "loss": 0.09599685668945312, + "step": 1589 + }, + { + "epoch": 0.22155646903086462, + "grad_norm": 0.587570309638977, + "learning_rate": 3.747777453986804e-05, + "loss": 0.09259986877441406, + "step": 1590 + }, + { + "epoch": 0.221695812722079, + "grad_norm": 0.775287389755249, + "learning_rate": 3.7473189823103645e-05, + "loss": 0.1469573974609375, + "step": 1591 + }, + { + "epoch": 0.22183515641329338, + "grad_norm": 0.4276748597621918, + "learning_rate": 3.746860122419539e-05, + "loss": 0.08135795593261719, + "step": 1592 + }, + { + "epoch": 0.22197450010450775, + "grad_norm": 0.5845743417739868, + "learning_rate": 3.746400874416276e-05, + "loss": 0.08270454406738281, + "step": 1593 + }, + { + "epoch": 0.22211384379572216, + "grad_norm": 0.6051739454269409, + "learning_rate": 3.745941238402609e-05, + "loss": 0.09854316711425781, + "step": 1594 + }, + { + "epoch": 0.22225318748693654, + "grad_norm": 0.7514309883117676, + "learning_rate": 3.74548121448066e-05, + "loss": 0.1097869873046875, + "step": 1595 + }, + { + "epoch": 0.22239253117815092, + "grad_norm": 0.6189656257629395, + "learning_rate": 3.745020802752635e-05, + "loss": 0.09442520141601562, + "step": 1596 + }, + { + "epoch": 0.2225318748693653, + "grad_norm": 1.0425351858139038, + "learning_rate": 3.744560003320827e-05, + "loss": 0.12424468994140625, + "step": 1597 + }, + { + "epoch": 0.22267121856057967, + "grad_norm": 0.400262713432312, + "learning_rate": 3.744098816287616e-05, + "loss": 0.09412765502929688, + "step": 1598 + }, + { + "epoch": 0.22281056225179405, + "grad_norm": 0.6136738061904907, + "learning_rate": 3.743637241755465e-05, + "loss": 0.10781288146972656, + "step": 1599 + }, + { + "epoch": 0.22294990594300843, + "grad_norm": 0.456719309091568, + "learning_rate": 3.743175279826928e-05, + "loss": 0.1080169677734375, + "step": 1600 + }, + { + "epoch": 0.2230892496342228, + "grad_norm": 0.9402472972869873, + "learning_rate": 3.7427129306046406e-05, + "loss": 0.13321685791015625, + "step": 1601 + }, + { + "epoch": 0.22322859332543718, + "grad_norm": 0.6541274785995483, + "learning_rate": 3.7422501941913274e-05, + "loss": 0.11515998840332031, + "step": 1602 + }, + { + "epoch": 0.22336793701665156, + "grad_norm": 0.8702244758605957, + "learning_rate": 3.7417870706897964e-05, + "loss": 0.10232162475585938, + "step": 1603 + }, + { + "epoch": 0.22350728070786596, + "grad_norm": 1.0242801904678345, + "learning_rate": 3.7413235602029445e-05, + "loss": 0.11695098876953125, + "step": 1604 + }, + { + "epoch": 0.22364662439908034, + "grad_norm": 1.3536174297332764, + "learning_rate": 3.740859662833753e-05, + "loss": 0.11833572387695312, + "step": 1605 + }, + { + "epoch": 0.22378596809029472, + "grad_norm": 0.8513897657394409, + "learning_rate": 3.7403953786852884e-05, + "loss": 0.12207317352294922, + "step": 1606 + }, + { + "epoch": 0.2239253117815091, + "grad_norm": 0.36120766401290894, + "learning_rate": 3.739930707860705e-05, + "loss": 0.091339111328125, + "step": 1607 + }, + { + "epoch": 0.22406465547272347, + "grad_norm": 1.2402814626693726, + "learning_rate": 3.739465650463241e-05, + "loss": 0.12421035766601562, + "step": 1608 + }, + { + "epoch": 0.22420399916393785, + "grad_norm": 1.0803292989730835, + "learning_rate": 3.739000206596222e-05, + "loss": 0.08250904083251953, + "step": 1609 + }, + { + "epoch": 0.22434334285515223, + "grad_norm": 1.3499658107757568, + "learning_rate": 3.7385343763630594e-05, + "loss": 0.11162853240966797, + "step": 1610 + }, + { + "epoch": 0.2244826865463666, + "grad_norm": 0.7377925515174866, + "learning_rate": 3.738068159867251e-05, + "loss": 0.09483909606933594, + "step": 1611 + }, + { + "epoch": 0.22462203023758098, + "grad_norm": 0.3731318414211273, + "learning_rate": 3.7376015572123766e-05, + "loss": 0.07771682739257812, + "step": 1612 + }, + { + "epoch": 0.22476137392879536, + "grad_norm": 0.35026803612709045, + "learning_rate": 3.737134568502107e-05, + "loss": 0.09385108947753906, + "step": 1613 + }, + { + "epoch": 0.22490071762000977, + "grad_norm": 0.7284929752349854, + "learning_rate": 3.7366671938401954e-05, + "loss": 0.12259483337402344, + "step": 1614 + }, + { + "epoch": 0.22504006131122414, + "grad_norm": 0.9944056272506714, + "learning_rate": 3.736199433330483e-05, + "loss": 0.09863090515136719, + "step": 1615 + }, + { + "epoch": 0.22517940500243852, + "grad_norm": 0.3973233997821808, + "learning_rate": 3.735731287076893e-05, + "loss": 0.07460594177246094, + "step": 1616 + }, + { + "epoch": 0.2253187486936529, + "grad_norm": 0.7379327416419983, + "learning_rate": 3.73526275518344e-05, + "loss": 0.11617088317871094, + "step": 1617 + }, + { + "epoch": 0.22545809238486728, + "grad_norm": 0.47175443172454834, + "learning_rate": 3.734793837754219e-05, + "loss": 0.09778213500976562, + "step": 1618 + }, + { + "epoch": 0.22559743607608165, + "grad_norm": 0.7335260510444641, + "learning_rate": 3.734324534893413e-05, + "loss": 0.10044479370117188, + "step": 1619 + }, + { + "epoch": 0.22573677976729603, + "grad_norm": 0.7034990191459656, + "learning_rate": 3.733854846705291e-05, + "loss": 0.08196353912353516, + "step": 1620 + }, + { + "epoch": 0.2258761234585104, + "grad_norm": 0.9056631922721863, + "learning_rate": 3.733384773294207e-05, + "loss": 0.09162521362304688, + "step": 1621 + }, + { + "epoch": 0.22601546714972479, + "grad_norm": 0.8530195951461792, + "learning_rate": 3.7329143147645994e-05, + "loss": 0.09783172607421875, + "step": 1622 + }, + { + "epoch": 0.22615481084093916, + "grad_norm": 0.5817391872406006, + "learning_rate": 3.732443471220994e-05, + "loss": 0.09804916381835938, + "step": 1623 + }, + { + "epoch": 0.22629415453215357, + "grad_norm": 0.7240291833877563, + "learning_rate": 3.731972242768002e-05, + "loss": 0.10846710205078125, + "step": 1624 + }, + { + "epoch": 0.22643349822336795, + "grad_norm": 0.7770500779151917, + "learning_rate": 3.73150062951032e-05, + "loss": 0.09083747863769531, + "step": 1625 + }, + { + "epoch": 0.22657284191458232, + "grad_norm": 0.9783400297164917, + "learning_rate": 3.731028631552728e-05, + "loss": 0.08011627197265625, + "step": 1626 + }, + { + "epoch": 0.2267121856057967, + "grad_norm": 0.4757509231567383, + "learning_rate": 3.7305562490000944e-05, + "loss": 0.09305191040039062, + "step": 1627 + }, + { + "epoch": 0.22685152929701108, + "grad_norm": 0.4568759500980377, + "learning_rate": 3.730083481957372e-05, + "loss": 0.09119606018066406, + "step": 1628 + }, + { + "epoch": 0.22699087298822546, + "grad_norm": 0.6910880208015442, + "learning_rate": 3.729610330529598e-05, + "loss": 0.11754608154296875, + "step": 1629 + }, + { + "epoch": 0.22713021667943983, + "grad_norm": 0.6427490711212158, + "learning_rate": 3.7291367948218964e-05, + "loss": 0.08552360534667969, + "step": 1630 + }, + { + "epoch": 0.2272695603706542, + "grad_norm": 0.7387151718139648, + "learning_rate": 3.7286628749394754e-05, + "loss": 0.0776519775390625, + "step": 1631 + }, + { + "epoch": 0.2274089040618686, + "grad_norm": 1.0126993656158447, + "learning_rate": 3.72818857098763e-05, + "loss": 0.10918426513671875, + "step": 1632 + }, + { + "epoch": 0.22754824775308297, + "grad_norm": 0.6434133648872375, + "learning_rate": 3.727713883071739e-05, + "loss": 0.09882354736328125, + "step": 1633 + }, + { + "epoch": 0.22768759144429737, + "grad_norm": 1.075708031654358, + "learning_rate": 3.727238811297268e-05, + "loss": 0.13988304138183594, + "step": 1634 + }, + { + "epoch": 0.22782693513551175, + "grad_norm": 0.8172954320907593, + "learning_rate": 3.7267633557697666e-05, + "loss": 0.09395217895507812, + "step": 1635 + }, + { + "epoch": 0.22796627882672613, + "grad_norm": 1.775506615638733, + "learning_rate": 3.72628751659487e-05, + "loss": 0.12343406677246094, + "step": 1636 + }, + { + "epoch": 0.2281056225179405, + "grad_norm": 1.0438849925994873, + "learning_rate": 3.725811293878299e-05, + "loss": 0.09866905212402344, + "step": 1637 + }, + { + "epoch": 0.22824496620915488, + "grad_norm": 0.7218796014785767, + "learning_rate": 3.72533468772586e-05, + "loss": 0.11874198913574219, + "step": 1638 + }, + { + "epoch": 0.22838430990036926, + "grad_norm": 0.7579420208930969, + "learning_rate": 3.724857698243443e-05, + "loss": 0.10306549072265625, + "step": 1639 + }, + { + "epoch": 0.22852365359158364, + "grad_norm": 0.928203284740448, + "learning_rate": 3.724380325537024e-05, + "loss": 0.1248779296875, + "step": 1640 + }, + { + "epoch": 0.22866299728279801, + "grad_norm": 0.5558241605758667, + "learning_rate": 3.723902569712666e-05, + "loss": 0.08880615234375, + "step": 1641 + }, + { + "epoch": 0.2288023409740124, + "grad_norm": 0.5449173450469971, + "learning_rate": 3.7234244308765136e-05, + "loss": 0.10785865783691406, + "step": 1642 + }, + { + "epoch": 0.22894168466522677, + "grad_norm": 0.6129655241966248, + "learning_rate": 3.7229459091348e-05, + "loss": 0.08777046203613281, + "step": 1643 + }, + { + "epoch": 0.22908102835644117, + "grad_norm": 0.45306411385536194, + "learning_rate": 3.7224670045938406e-05, + "loss": 0.07717418670654297, + "step": 1644 + }, + { + "epoch": 0.22922037204765555, + "grad_norm": 0.5785459280014038, + "learning_rate": 3.721987717360037e-05, + "loss": 0.09938240051269531, + "step": 1645 + }, + { + "epoch": 0.22935971573886993, + "grad_norm": 0.5226876139640808, + "learning_rate": 3.721508047539877e-05, + "loss": 0.12410354614257812, + "step": 1646 + }, + { + "epoch": 0.2294990594300843, + "grad_norm": 0.4744981527328491, + "learning_rate": 3.72102799523993e-05, + "loss": 0.09265518188476562, + "step": 1647 + }, + { + "epoch": 0.22963840312129868, + "grad_norm": 0.9877319931983948, + "learning_rate": 3.720547560566855e-05, + "loss": 0.11077880859375, + "step": 1648 + }, + { + "epoch": 0.22977774681251306, + "grad_norm": 0.44293642044067383, + "learning_rate": 3.720066743627393e-05, + "loss": 0.09561920166015625, + "step": 1649 + }, + { + "epoch": 0.22991709050372744, + "grad_norm": 0.5888404846191406, + "learning_rate": 3.719585544528371e-05, + "loss": 0.11298370361328125, + "step": 1650 + }, + { + "epoch": 0.23005643419494182, + "grad_norm": 0.6731222867965698, + "learning_rate": 3.719103963376699e-05, + "loss": 0.09373760223388672, + "step": 1651 + }, + { + "epoch": 0.2301957778861562, + "grad_norm": 0.4871363043785095, + "learning_rate": 3.718622000279374e-05, + "loss": 0.07995223999023438, + "step": 1652 + }, + { + "epoch": 0.23033512157737057, + "grad_norm": 0.5834523439407349, + "learning_rate": 3.718139655343477e-05, + "loss": 0.095672607421875, + "step": 1653 + }, + { + "epoch": 0.23047446526858498, + "grad_norm": 0.7848942279815674, + "learning_rate": 3.717656928676175e-05, + "loss": 0.10361480712890625, + "step": 1654 + }, + { + "epoch": 0.23061380895979935, + "grad_norm": 0.3163059949874878, + "learning_rate": 3.7171738203847185e-05, + "loss": 0.08546257019042969, + "step": 1655 + }, + { + "epoch": 0.23075315265101373, + "grad_norm": 0.46318134665489197, + "learning_rate": 3.7166903305764426e-05, + "loss": 0.1290435791015625, + "step": 1656 + }, + { + "epoch": 0.2308924963422281, + "grad_norm": 0.7216953635215759, + "learning_rate": 3.716206459358768e-05, + "loss": 0.10788726806640625, + "step": 1657 + }, + { + "epoch": 0.2310318400334425, + "grad_norm": 1.1976927518844604, + "learning_rate": 3.7157222068392e-05, + "loss": 0.09344863891601562, + "step": 1658 + }, + { + "epoch": 0.23117118372465686, + "grad_norm": 0.8235940933227539, + "learning_rate": 3.715237573125328e-05, + "loss": 0.09079170227050781, + "step": 1659 + }, + { + "epoch": 0.23131052741587124, + "grad_norm": 0.5633846521377563, + "learning_rate": 3.7147525583248264e-05, + "loss": 0.12070465087890625, + "step": 1660 + }, + { + "epoch": 0.23144987110708562, + "grad_norm": 0.43856897950172424, + "learning_rate": 3.714267162545455e-05, + "loss": 0.08922004699707031, + "step": 1661 + }, + { + "epoch": 0.2315892147983, + "grad_norm": 0.9055614471435547, + "learning_rate": 3.7137813858950576e-05, + "loss": 0.10529518127441406, + "step": 1662 + }, + { + "epoch": 0.23172855848951437, + "grad_norm": 0.7457897067070007, + "learning_rate": 3.713295228481563e-05, + "loss": 0.09060478210449219, + "step": 1663 + }, + { + "epoch": 0.23186790218072878, + "grad_norm": 0.41292959451675415, + "learning_rate": 3.712808690412983e-05, + "loss": 0.10589218139648438, + "step": 1664 + }, + { + "epoch": 0.23200724587194316, + "grad_norm": 0.49808940291404724, + "learning_rate": 3.7123217717974166e-05, + "loss": 0.09931373596191406, + "step": 1665 + }, + { + "epoch": 0.23214658956315753, + "grad_norm": 0.6659296751022339, + "learning_rate": 3.711834472743045e-05, + "loss": 0.10638809204101562, + "step": 1666 + }, + { + "epoch": 0.2322859332543719, + "grad_norm": 0.5282164812088013, + "learning_rate": 3.7113467933581364e-05, + "loss": 0.08391761779785156, + "step": 1667 + }, + { + "epoch": 0.2324252769455863, + "grad_norm": 0.51567542552948, + "learning_rate": 3.7108587337510405e-05, + "loss": 0.12395668029785156, + "step": 1668 + }, + { + "epoch": 0.23256462063680067, + "grad_norm": 0.6609163880348206, + "learning_rate": 3.7103702940301934e-05, + "loss": 0.10086441040039062, + "step": 1669 + }, + { + "epoch": 0.23270396432801504, + "grad_norm": 0.4839893877506256, + "learning_rate": 3.709881474304115e-05, + "loss": 0.08846664428710938, + "step": 1670 + }, + { + "epoch": 0.23284330801922942, + "grad_norm": 0.7399137616157532, + "learning_rate": 3.7093922746814104e-05, + "loss": 0.09350776672363281, + "step": 1671 + }, + { + "epoch": 0.2329826517104438, + "grad_norm": 0.43581441044807434, + "learning_rate": 3.7089026952707695e-05, + "loss": 0.07612991333007812, + "step": 1672 + }, + { + "epoch": 0.23312199540165818, + "grad_norm": 1.4833694696426392, + "learning_rate": 3.7084127361809636e-05, + "loss": 0.13666725158691406, + "step": 1673 + }, + { + "epoch": 0.23326133909287258, + "grad_norm": 0.775842547416687, + "learning_rate": 3.707922397520852e-05, + "loss": 0.08961868286132812, + "step": 1674 + }, + { + "epoch": 0.23340068278408696, + "grad_norm": 0.44912031292915344, + "learning_rate": 3.707431679399375e-05, + "loss": 0.10228347778320312, + "step": 1675 + }, + { + "epoch": 0.23354002647530134, + "grad_norm": 0.7949982285499573, + "learning_rate": 3.7069405819255615e-05, + "loss": 0.09578895568847656, + "step": 1676 + }, + { + "epoch": 0.23367937016651572, + "grad_norm": 0.4954523742198944, + "learning_rate": 3.706449105208521e-05, + "loss": 0.10465431213378906, + "step": 1677 + }, + { + "epoch": 0.2338187138577301, + "grad_norm": 0.7204354405403137, + "learning_rate": 3.705957249357447e-05, + "loss": 0.11700439453125, + "step": 1678 + }, + { + "epoch": 0.23395805754894447, + "grad_norm": 0.9714399576187134, + "learning_rate": 3.70546501448162e-05, + "loss": 0.09612083435058594, + "step": 1679 + }, + { + "epoch": 0.23409740124015885, + "grad_norm": 0.5849028825759888, + "learning_rate": 3.704972400690404e-05, + "loss": 0.10639572143554688, + "step": 1680 + }, + { + "epoch": 0.23423674493137323, + "grad_norm": 0.47931769490242004, + "learning_rate": 3.704479408093245e-05, + "loss": 0.10466384887695312, + "step": 1681 + }, + { + "epoch": 0.2343760886225876, + "grad_norm": 0.630955159664154, + "learning_rate": 3.703986036799676e-05, + "loss": 0.0942525863647461, + "step": 1682 + }, + { + "epoch": 0.23451543231380198, + "grad_norm": 0.3718426525592804, + "learning_rate": 3.7034922869193125e-05, + "loss": 0.0866384506225586, + "step": 1683 + }, + { + "epoch": 0.23465477600501639, + "grad_norm": 1.1566215753555298, + "learning_rate": 3.702998158561854e-05, + "loss": 0.10633468627929688, + "step": 1684 + }, + { + "epoch": 0.23479411969623076, + "grad_norm": 0.43583405017852783, + "learning_rate": 3.7025036518370846e-05, + "loss": 0.10125350952148438, + "step": 1685 + }, + { + "epoch": 0.23493346338744514, + "grad_norm": 0.7629043459892273, + "learning_rate": 3.7020087668548725e-05, + "loss": 0.10546493530273438, + "step": 1686 + }, + { + "epoch": 0.23507280707865952, + "grad_norm": 1.1714704036712646, + "learning_rate": 3.7015135037251696e-05, + "loss": 0.11434364318847656, + "step": 1687 + }, + { + "epoch": 0.2352121507698739, + "grad_norm": 0.5457972288131714, + "learning_rate": 3.7010178625580134e-05, + "loss": 0.10201835632324219, + "step": 1688 + }, + { + "epoch": 0.23535149446108827, + "grad_norm": 1.0871883630752563, + "learning_rate": 3.700521843463522e-05, + "loss": 0.10319328308105469, + "step": 1689 + }, + { + "epoch": 0.23549083815230265, + "grad_norm": 1.47628915309906, + "learning_rate": 3.7000254465519014e-05, + "loss": 0.12137794494628906, + "step": 1690 + }, + { + "epoch": 0.23563018184351703, + "grad_norm": 0.44007188081741333, + "learning_rate": 3.6995286719334385e-05, + "loss": 0.11427497863769531, + "step": 1691 + }, + { + "epoch": 0.2357695255347314, + "grad_norm": 0.554021954536438, + "learning_rate": 3.6990315197185054e-05, + "loss": 0.109100341796875, + "step": 1692 + }, + { + "epoch": 0.23590886922594578, + "grad_norm": 0.48087939620018005, + "learning_rate": 3.6985339900175584e-05, + "loss": 0.09446907043457031, + "step": 1693 + }, + { + "epoch": 0.2360482129171602, + "grad_norm": 0.5541826486587524, + "learning_rate": 3.698036082941137e-05, + "loss": 0.08913135528564453, + "step": 1694 + }, + { + "epoch": 0.23618755660837457, + "grad_norm": 0.42419350147247314, + "learning_rate": 3.6975377985998645e-05, + "loss": 0.08716869354248047, + "step": 1695 + }, + { + "epoch": 0.23632690029958894, + "grad_norm": 0.9003869891166687, + "learning_rate": 3.697039137104449e-05, + "loss": 0.10712909698486328, + "step": 1696 + }, + { + "epoch": 0.23646624399080332, + "grad_norm": 0.49845653772354126, + "learning_rate": 3.696540098565681e-05, + "loss": 0.1186370849609375, + "step": 1697 + }, + { + "epoch": 0.2366055876820177, + "grad_norm": 0.7263861894607544, + "learning_rate": 3.696040683094436e-05, + "loss": 0.11147689819335938, + "step": 1698 + }, + { + "epoch": 0.23674493137323208, + "grad_norm": 0.7232884168624878, + "learning_rate": 3.695540890801672e-05, + "loss": 0.1344165802001953, + "step": 1699 + }, + { + "epoch": 0.23688427506444645, + "grad_norm": 0.42477747797966003, + "learning_rate": 3.6950407217984326e-05, + "loss": 0.09159660339355469, + "step": 1700 + }, + { + "epoch": 0.23702361875566083, + "grad_norm": 0.4649583697319031, + "learning_rate": 3.694540176195843e-05, + "loss": 0.10343170166015625, + "step": 1701 + }, + { + "epoch": 0.2371629624468752, + "grad_norm": 0.5597682595252991, + "learning_rate": 3.694039254105113e-05, + "loss": 0.08608055114746094, + "step": 1702 + }, + { + "epoch": 0.23730230613808959, + "grad_norm": 0.4700224995613098, + "learning_rate": 3.693537955637537e-05, + "loss": 0.10228347778320312, + "step": 1703 + }, + { + "epoch": 0.237441649829304, + "grad_norm": 0.39851701259613037, + "learning_rate": 3.6930362809044906e-05, + "loss": 0.1092987060546875, + "step": 1704 + }, + { + "epoch": 0.23758099352051837, + "grad_norm": 0.7022625803947449, + "learning_rate": 3.692534230017436e-05, + "loss": 0.11268806457519531, + "step": 1705 + }, + { + "epoch": 0.23772033721173275, + "grad_norm": 0.8913640975952148, + "learning_rate": 3.692031803087916e-05, + "loss": 0.10760307312011719, + "step": 1706 + }, + { + "epoch": 0.23785968090294712, + "grad_norm": 0.951581597328186, + "learning_rate": 3.691529000227559e-05, + "loss": 0.10941123962402344, + "step": 1707 + }, + { + "epoch": 0.2379990245941615, + "grad_norm": 0.5266606211662292, + "learning_rate": 3.691025821548077e-05, + "loss": 0.09762382507324219, + "step": 1708 + }, + { + "epoch": 0.23813836828537588, + "grad_norm": 0.47937697172164917, + "learning_rate": 3.6905222671612634e-05, + "loss": 0.10945320129394531, + "step": 1709 + }, + { + "epoch": 0.23827771197659026, + "grad_norm": 0.6750760078430176, + "learning_rate": 3.6900183371789984e-05, + "loss": 0.09175872802734375, + "step": 1710 + }, + { + "epoch": 0.23841705566780463, + "grad_norm": 0.5860791206359863, + "learning_rate": 3.689514031713242e-05, + "loss": 0.07972431182861328, + "step": 1711 + }, + { + "epoch": 0.238556399359019, + "grad_norm": 0.60374516248703, + "learning_rate": 3.68900935087604e-05, + "loss": 0.08646583557128906, + "step": 1712 + }, + { + "epoch": 0.2386957430502334, + "grad_norm": 0.9443480372428894, + "learning_rate": 3.688504294779521e-05, + "loss": 0.09433937072753906, + "step": 1713 + }, + { + "epoch": 0.2388350867414478, + "grad_norm": 1.1857385635375977, + "learning_rate": 3.687998863535897e-05, + "loss": 0.10815048217773438, + "step": 1714 + }, + { + "epoch": 0.23897443043266217, + "grad_norm": 0.498077392578125, + "learning_rate": 3.687493057257464e-05, + "loss": 0.10036087036132812, + "step": 1715 + }, + { + "epoch": 0.23911377412387655, + "grad_norm": 0.5361073017120361, + "learning_rate": 3.686986876056599e-05, + "loss": 0.08026313781738281, + "step": 1716 + }, + { + "epoch": 0.23925311781509093, + "grad_norm": 0.6346088647842407, + "learning_rate": 3.6864803200457646e-05, + "loss": 0.09618282318115234, + "step": 1717 + }, + { + "epoch": 0.2393924615063053, + "grad_norm": 0.6180944442749023, + "learning_rate": 3.685973389337506e-05, + "loss": 0.09784126281738281, + "step": 1718 + }, + { + "epoch": 0.23953180519751968, + "grad_norm": 0.4588444232940674, + "learning_rate": 3.6854660840444524e-05, + "loss": 0.11002159118652344, + "step": 1719 + }, + { + "epoch": 0.23967114888873406, + "grad_norm": 0.6767001152038574, + "learning_rate": 3.6849584042793145e-05, + "loss": 0.11021804809570312, + "step": 1720 + }, + { + "epoch": 0.23981049257994844, + "grad_norm": 0.4093320965766907, + "learning_rate": 3.6844503501548866e-05, + "loss": 0.08037567138671875, + "step": 1721 + }, + { + "epoch": 0.23994983627116281, + "grad_norm": 0.5722176432609558, + "learning_rate": 3.6839419217840486e-05, + "loss": 0.07763481140136719, + "step": 1722 + }, + { + "epoch": 0.2400891799623772, + "grad_norm": 1.7258340120315552, + "learning_rate": 3.6834331192797606e-05, + "loss": 0.13074302673339844, + "step": 1723 + }, + { + "epoch": 0.2402285236535916, + "grad_norm": 0.828822135925293, + "learning_rate": 3.682923942755066e-05, + "loss": 0.11242294311523438, + "step": 1724 + }, + { + "epoch": 0.24036786734480597, + "grad_norm": 0.8287203311920166, + "learning_rate": 3.6824143923230936e-05, + "loss": 0.11042976379394531, + "step": 1725 + }, + { + "epoch": 0.24050721103602035, + "grad_norm": 0.528779149055481, + "learning_rate": 3.681904468097054e-05, + "loss": 0.10056304931640625, + "step": 1726 + }, + { + "epoch": 0.24064655472723473, + "grad_norm": 0.5056732892990112, + "learning_rate": 3.681394170190239e-05, + "loss": 0.09480667114257812, + "step": 1727 + }, + { + "epoch": 0.2407858984184491, + "grad_norm": 0.7188267111778259, + "learning_rate": 3.6808834987160276e-05, + "loss": 0.09262847900390625, + "step": 1728 + }, + { + "epoch": 0.24092524210966348, + "grad_norm": 0.6547688245773315, + "learning_rate": 3.680372453787877e-05, + "loss": 0.10419464111328125, + "step": 1729 + }, + { + "epoch": 0.24106458580087786, + "grad_norm": 0.4625684916973114, + "learning_rate": 3.679861035519331e-05, + "loss": 0.0964508056640625, + "step": 1730 + }, + { + "epoch": 0.24120392949209224, + "grad_norm": 0.5336496829986572, + "learning_rate": 3.679349244024015e-05, + "loss": 0.107879638671875, + "step": 1731 + }, + { + "epoch": 0.24134327318330662, + "grad_norm": 0.7282295227050781, + "learning_rate": 3.6788370794156366e-05, + "loss": 0.09824180603027344, + "step": 1732 + }, + { + "epoch": 0.241482616874521, + "grad_norm": 0.7245709300041199, + "learning_rate": 3.678324541807988e-05, + "loss": 0.11816024780273438, + "step": 1733 + }, + { + "epoch": 0.2416219605657354, + "grad_norm": 0.7150017619132996, + "learning_rate": 3.677811631314943e-05, + "loss": 0.10387039184570312, + "step": 1734 + }, + { + "epoch": 0.24176130425694978, + "grad_norm": 0.4022858440876007, + "learning_rate": 3.677298348050459e-05, + "loss": 0.10344314575195312, + "step": 1735 + }, + { + "epoch": 0.24190064794816415, + "grad_norm": 0.5362487435340881, + "learning_rate": 3.676784692128575e-05, + "loss": 0.10602760314941406, + "step": 1736 + }, + { + "epoch": 0.24203999163937853, + "grad_norm": 0.7230885028839111, + "learning_rate": 3.676270663663414e-05, + "loss": 0.09670066833496094, + "step": 1737 + }, + { + "epoch": 0.2421793353305929, + "grad_norm": 0.35811546444892883, + "learning_rate": 3.675756262769182e-05, + "loss": 0.08979034423828125, + "step": 1738 + }, + { + "epoch": 0.2423186790218073, + "grad_norm": 0.8890015482902527, + "learning_rate": 3.6752414895601656e-05, + "loss": 0.12512969970703125, + "step": 1739 + }, + { + "epoch": 0.24245802271302166, + "grad_norm": 0.41328006982803345, + "learning_rate": 3.674726344150737e-05, + "loss": 0.08889007568359375, + "step": 1740 + }, + { + "epoch": 0.24259736640423604, + "grad_norm": 0.6189169883728027, + "learning_rate": 3.67421082665535e-05, + "loss": 0.10482597351074219, + "step": 1741 + }, + { + "epoch": 0.24273671009545042, + "grad_norm": 0.3896315395832062, + "learning_rate": 3.6736949371885395e-05, + "loss": 0.08650493621826172, + "step": 1742 + }, + { + "epoch": 0.2428760537866648, + "grad_norm": 0.6923957467079163, + "learning_rate": 3.6731786758649255e-05, + "loss": 0.11436843872070312, + "step": 1743 + }, + { + "epoch": 0.2430153974778792, + "grad_norm": 0.6273401379585266, + "learning_rate": 3.672662042799209e-05, + "loss": 0.11363029479980469, + "step": 1744 + }, + { + "epoch": 0.24315474116909358, + "grad_norm": 0.4818691909313202, + "learning_rate": 3.672145038106174e-05, + "loss": 0.0845174789428711, + "step": 1745 + }, + { + "epoch": 0.24329408486030796, + "grad_norm": 0.5232200622558594, + "learning_rate": 3.6716276619006874e-05, + "loss": 0.1065673828125, + "step": 1746 + }, + { + "epoch": 0.24343342855152234, + "grad_norm": 1.0783401727676392, + "learning_rate": 3.671109914297698e-05, + "loss": 0.1108551025390625, + "step": 1747 + }, + { + "epoch": 0.2435727722427367, + "grad_norm": 0.5938056111335754, + "learning_rate": 3.670591795412238e-05, + "loss": 0.09338760375976562, + "step": 1748 + }, + { + "epoch": 0.2437121159339511, + "grad_norm": 0.43114081025123596, + "learning_rate": 3.670073305359421e-05, + "loss": 0.09356689453125, + "step": 1749 + }, + { + "epoch": 0.24385145962516547, + "grad_norm": 0.4258286952972412, + "learning_rate": 3.669554444254444e-05, + "loss": 0.08612251281738281, + "step": 1750 + }, + { + "epoch": 0.24399080331637985, + "grad_norm": 0.42957577109336853, + "learning_rate": 3.6690352122125867e-05, + "loss": 0.09829521179199219, + "step": 1751 + }, + { + "epoch": 0.24413014700759422, + "grad_norm": 0.5969796180725098, + "learning_rate": 3.668515609349209e-05, + "loss": 0.11530685424804688, + "step": 1752 + }, + { + "epoch": 0.2442694906988086, + "grad_norm": 1.0219353437423706, + "learning_rate": 3.667995635779756e-05, + "loss": 0.12413215637207031, + "step": 1753 + }, + { + "epoch": 0.244408834390023, + "grad_norm": 1.0241777896881104, + "learning_rate": 3.667475291619754e-05, + "loss": 0.10028076171875, + "step": 1754 + }, + { + "epoch": 0.24454817808123738, + "grad_norm": 1.0465726852416992, + "learning_rate": 3.6669545769848115e-05, + "loss": 0.10177040100097656, + "step": 1755 + }, + { + "epoch": 0.24468752177245176, + "grad_norm": 0.8138828873634338, + "learning_rate": 3.666433491990619e-05, + "loss": 0.08757591247558594, + "step": 1756 + }, + { + "epoch": 0.24482686546366614, + "grad_norm": 0.7597835659980774, + "learning_rate": 3.66591203675295e-05, + "loss": 0.09989738464355469, + "step": 1757 + }, + { + "epoch": 0.24496620915488052, + "grad_norm": 0.5970317721366882, + "learning_rate": 3.665390211387659e-05, + "loss": 0.09417343139648438, + "step": 1758 + }, + { + "epoch": 0.2451055528460949, + "grad_norm": 0.4115969240665436, + "learning_rate": 3.664868016010686e-05, + "loss": 0.08739280700683594, + "step": 1759 + }, + { + "epoch": 0.24524489653730927, + "grad_norm": 0.8301898241043091, + "learning_rate": 3.664345450738048e-05, + "loss": 0.09954071044921875, + "step": 1760 + }, + { + "epoch": 0.24538424022852365, + "grad_norm": 0.6844363808631897, + "learning_rate": 3.6638225156858494e-05, + "loss": 0.09521102905273438, + "step": 1761 + }, + { + "epoch": 0.24552358391973803, + "grad_norm": 0.9552773237228394, + "learning_rate": 3.663299210970273e-05, + "loss": 0.08619880676269531, + "step": 1762 + }, + { + "epoch": 0.2456629276109524, + "grad_norm": 0.676528811454773, + "learning_rate": 3.662775536707586e-05, + "loss": 0.0936737060546875, + "step": 1763 + }, + { + "epoch": 0.2458022713021668, + "grad_norm": 0.89321368932724, + "learning_rate": 3.662251493014137e-05, + "loss": 0.09593391418457031, + "step": 1764 + }, + { + "epoch": 0.24594161499338119, + "grad_norm": 0.7969486117362976, + "learning_rate": 3.661727080006356e-05, + "loss": 0.09990310668945312, + "step": 1765 + }, + { + "epoch": 0.24608095868459556, + "grad_norm": 0.6530003547668457, + "learning_rate": 3.6612022978007546e-05, + "loss": 0.10227584838867188, + "step": 1766 + }, + { + "epoch": 0.24622030237580994, + "grad_norm": 1.284582257270813, + "learning_rate": 3.66067714651393e-05, + "loss": 0.10499763488769531, + "step": 1767 + }, + { + "epoch": 0.24635964606702432, + "grad_norm": 0.7088553309440613, + "learning_rate": 3.6601516262625556e-05, + "loss": 0.09611892700195312, + "step": 1768 + }, + { + "epoch": 0.2464989897582387, + "grad_norm": 1.1697510480880737, + "learning_rate": 3.6596257371633926e-05, + "loss": 0.11255264282226562, + "step": 1769 + }, + { + "epoch": 0.24663833344945307, + "grad_norm": 0.9965795278549194, + "learning_rate": 3.659099479333281e-05, + "loss": 0.10837554931640625, + "step": 1770 + }, + { + "epoch": 0.24677767714066745, + "grad_norm": 0.6686175465583801, + "learning_rate": 3.658572852889143e-05, + "loss": 0.10995864868164062, + "step": 1771 + }, + { + "epoch": 0.24691702083188183, + "grad_norm": 0.5130788087844849, + "learning_rate": 3.658045857947983e-05, + "loss": 0.10387039184570312, + "step": 1772 + }, + { + "epoch": 0.2470563645230962, + "grad_norm": 2.6440367698669434, + "learning_rate": 3.657518494626887e-05, + "loss": 0.1539134979248047, + "step": 1773 + }, + { + "epoch": 0.24719570821431058, + "grad_norm": 1.336533784866333, + "learning_rate": 3.6569907630430234e-05, + "loss": 0.105438232421875, + "step": 1774 + }, + { + "epoch": 0.247335051905525, + "grad_norm": 1.6590379476547241, + "learning_rate": 3.6564626633136416e-05, + "loss": 0.10179328918457031, + "step": 1775 + }, + { + "epoch": 0.24747439559673937, + "grad_norm": 1.103308081626892, + "learning_rate": 3.6559341955560744e-05, + "loss": 0.10371017456054688, + "step": 1776 + }, + { + "epoch": 0.24761373928795374, + "grad_norm": 0.8181685209274292, + "learning_rate": 3.655405359887734e-05, + "loss": 0.11313629150390625, + "step": 1777 + }, + { + "epoch": 0.24775308297916812, + "grad_norm": 0.44933387637138367, + "learning_rate": 3.654876156426116e-05, + "loss": 0.11289596557617188, + "step": 1778 + }, + { + "epoch": 0.2478924266703825, + "grad_norm": 1.0407267808914185, + "learning_rate": 3.6543465852887975e-05, + "loss": 0.10938835144042969, + "step": 1779 + }, + { + "epoch": 0.24803177036159688, + "grad_norm": 1.8754897117614746, + "learning_rate": 3.6538166465934375e-05, + "loss": 0.12480354309082031, + "step": 1780 + }, + { + "epoch": 0.24817111405281125, + "grad_norm": 2.08943247795105, + "learning_rate": 3.653286340457776e-05, + "loss": 0.11827850341796875, + "step": 1781 + }, + { + "epoch": 0.24831045774402563, + "grad_norm": 1.1671751737594604, + "learning_rate": 3.6527556669996345e-05, + "loss": 0.08552932739257812, + "step": 1782 + }, + { + "epoch": 0.24844980143524, + "grad_norm": 0.558492124080658, + "learning_rate": 3.6522246263369174e-05, + "loss": 0.11203765869140625, + "step": 1783 + }, + { + "epoch": 0.24858914512645439, + "grad_norm": 0.4015926420688629, + "learning_rate": 3.6516932185876085e-05, + "loss": 0.09124183654785156, + "step": 1784 + }, + { + "epoch": 0.2487284888176688, + "grad_norm": 0.5842437148094177, + "learning_rate": 3.651161443869776e-05, + "loss": 0.10857009887695312, + "step": 1785 + }, + { + "epoch": 0.24886783250888317, + "grad_norm": 1.0265312194824219, + "learning_rate": 3.650629302301567e-05, + "loss": 0.10873985290527344, + "step": 1786 + }, + { + "epoch": 0.24900717620009755, + "grad_norm": 0.5663455128669739, + "learning_rate": 3.650096794001211e-05, + "loss": 0.12546539306640625, + "step": 1787 + }, + { + "epoch": 0.24914651989131192, + "grad_norm": 0.9780675172805786, + "learning_rate": 3.6495639190870204e-05, + "loss": 0.11851119995117188, + "step": 1788 + }, + { + "epoch": 0.2492858635825263, + "grad_norm": 0.5085411071777344, + "learning_rate": 3.649030677677387e-05, + "loss": 0.09145164489746094, + "step": 1789 + }, + { + "epoch": 0.24942520727374068, + "grad_norm": 0.553114116191864, + "learning_rate": 3.648497069890785e-05, + "loss": 0.11277484893798828, + "step": 1790 + }, + { + "epoch": 0.24956455096495506, + "grad_norm": 1.0006221532821655, + "learning_rate": 3.6479630958457696e-05, + "loss": 0.10270309448242188, + "step": 1791 + }, + { + "epoch": 0.24970389465616943, + "grad_norm": 0.4301810562610626, + "learning_rate": 3.647428755660978e-05, + "loss": 0.10884857177734375, + "step": 1792 + }, + { + "epoch": 0.2498432383473838, + "grad_norm": 0.7301721572875977, + "learning_rate": 3.646894049455129e-05, + "loss": 0.1099853515625, + "step": 1793 + }, + { + "epoch": 0.2499825820385982, + "grad_norm": 0.45305392146110535, + "learning_rate": 3.646358977347021e-05, + "loss": 0.07990646362304688, + "step": 1794 + }, + { + "epoch": 0.25012192572981257, + "grad_norm": 0.6776914596557617, + "learning_rate": 3.6458235394555345e-05, + "loss": 0.08172988891601562, + "step": 1795 + }, + { + "epoch": 0.25026126942102694, + "grad_norm": 1.0397921800613403, + "learning_rate": 3.6452877358996336e-05, + "loss": 0.09016609191894531, + "step": 1796 + }, + { + "epoch": 0.2504006131122413, + "grad_norm": 0.47686317563056946, + "learning_rate": 3.64475156679836e-05, + "loss": 0.09930419921875, + "step": 1797 + }, + { + "epoch": 0.2505399568034557, + "grad_norm": 0.7150285840034485, + "learning_rate": 3.644215032270838e-05, + "loss": 0.1056976318359375, + "step": 1798 + }, + { + "epoch": 0.2506793004946701, + "grad_norm": 0.40345680713653564, + "learning_rate": 3.643678132436274e-05, + "loss": 0.09219741821289062, + "step": 1799 + }, + { + "epoch": 0.2508186441858845, + "grad_norm": 1.310410976409912, + "learning_rate": 3.643140867413956e-05, + "loss": 0.10166740417480469, + "step": 1800 + }, + { + "epoch": 0.2509579878770989, + "grad_norm": 0.47272422909736633, + "learning_rate": 3.642603237323249e-05, + "loss": 0.08368682861328125, + "step": 1801 + }, + { + "epoch": 0.25109733156831326, + "grad_norm": 0.7056936025619507, + "learning_rate": 3.6420652422836046e-05, + "loss": 0.0861968994140625, + "step": 1802 + }, + { + "epoch": 0.25123667525952764, + "grad_norm": 1.4007391929626465, + "learning_rate": 3.641526882414553e-05, + "loss": 0.13833999633789062, + "step": 1803 + }, + { + "epoch": 0.251376018950742, + "grad_norm": 0.3816663324832916, + "learning_rate": 3.640988157835704e-05, + "loss": 0.07926368713378906, + "step": 1804 + }, + { + "epoch": 0.2515153626419564, + "grad_norm": 0.8746573328971863, + "learning_rate": 3.640449068666751e-05, + "loss": 0.09640121459960938, + "step": 1805 + }, + { + "epoch": 0.2516547063331708, + "grad_norm": 0.7847357392311096, + "learning_rate": 3.639909615027468e-05, + "loss": 0.09247970581054688, + "step": 1806 + }, + { + "epoch": 0.25179405002438515, + "grad_norm": 0.6216973066329956, + "learning_rate": 3.6393697970377074e-05, + "loss": 0.10019302368164062, + "step": 1807 + }, + { + "epoch": 0.25193339371559953, + "grad_norm": 0.5845248699188232, + "learning_rate": 3.638829614817405e-05, + "loss": 0.101226806640625, + "step": 1808 + }, + { + "epoch": 0.2520727374068139, + "grad_norm": 0.8722448348999023, + "learning_rate": 3.638289068486577e-05, + "loss": 0.0997467041015625, + "step": 1809 + }, + { + "epoch": 0.2522120810980283, + "grad_norm": 1.8739145994186401, + "learning_rate": 3.6377481581653225e-05, + "loss": 0.1434326171875, + "step": 1810 + }, + { + "epoch": 0.25235142478924266, + "grad_norm": 0.6896849870681763, + "learning_rate": 3.637206883973816e-05, + "loss": 0.1361103057861328, + "step": 1811 + }, + { + "epoch": 0.25249076848045704, + "grad_norm": 0.6950340867042542, + "learning_rate": 3.6366652460323186e-05, + "loss": 0.106231689453125, + "step": 1812 + }, + { + "epoch": 0.2526301121716714, + "grad_norm": 0.6778542995452881, + "learning_rate": 3.6361232444611695e-05, + "loss": 0.08740615844726562, + "step": 1813 + }, + { + "epoch": 0.2527694558628858, + "grad_norm": 0.6352048516273499, + "learning_rate": 3.635580879380788e-05, + "loss": 0.11494255065917969, + "step": 1814 + }, + { + "epoch": 0.25290879955410017, + "grad_norm": 0.48777931928634644, + "learning_rate": 3.635038150911677e-05, + "loss": 0.110809326171875, + "step": 1815 + }, + { + "epoch": 0.25304814324531455, + "grad_norm": 0.6533340811729431, + "learning_rate": 3.634495059174417e-05, + "loss": 0.1202545166015625, + "step": 1816 + }, + { + "epoch": 0.2531874869365289, + "grad_norm": 0.5263735055923462, + "learning_rate": 3.633951604289671e-05, + "loss": 0.11395835876464844, + "step": 1817 + }, + { + "epoch": 0.2533268306277433, + "grad_norm": 0.6557647585868835, + "learning_rate": 3.633407786378182e-05, + "loss": 0.10971450805664062, + "step": 1818 + }, + { + "epoch": 0.2534661743189577, + "grad_norm": 0.6357787251472473, + "learning_rate": 3.632863605560775e-05, + "loss": 0.10358238220214844, + "step": 1819 + }, + { + "epoch": 0.2536055180101721, + "grad_norm": 0.7651012539863586, + "learning_rate": 3.632319061958353e-05, + "loss": 0.13370132446289062, + "step": 1820 + }, + { + "epoch": 0.2537448617013865, + "grad_norm": 0.4658437669277191, + "learning_rate": 3.631774155691902e-05, + "loss": 0.11388969421386719, + "step": 1821 + }, + { + "epoch": 0.25388420539260087, + "grad_norm": 1.0176137685775757, + "learning_rate": 3.631228886882488e-05, + "loss": 0.10197257995605469, + "step": 1822 + }, + { + "epoch": 0.25402354908381525, + "grad_norm": 0.499526709318161, + "learning_rate": 3.630683255651256e-05, + "loss": 0.07716178894042969, + "step": 1823 + }, + { + "epoch": 0.2541628927750296, + "grad_norm": 0.8059961199760437, + "learning_rate": 3.6301372621194354e-05, + "loss": 0.11515140533447266, + "step": 1824 + }, + { + "epoch": 0.254302236466244, + "grad_norm": 0.5141969323158264, + "learning_rate": 3.6295909064083305e-05, + "loss": 0.08847713470458984, + "step": 1825 + }, + { + "epoch": 0.2544415801574584, + "grad_norm": 0.6818549633026123, + "learning_rate": 3.629044188639331e-05, + "loss": 0.11722183227539062, + "step": 1826 + }, + { + "epoch": 0.25458092384867276, + "grad_norm": 0.4321526885032654, + "learning_rate": 3.628497108933904e-05, + "loss": 0.08853530883789062, + "step": 1827 + }, + { + "epoch": 0.25472026753988714, + "grad_norm": 0.3915084898471832, + "learning_rate": 3.6279496674135985e-05, + "loss": 0.11795616149902344, + "step": 1828 + }, + { + "epoch": 0.2548596112311015, + "grad_norm": 0.5687150955200195, + "learning_rate": 3.6274018642000445e-05, + "loss": 0.0863199234008789, + "step": 1829 + }, + { + "epoch": 0.2549989549223159, + "grad_norm": 1.1218314170837402, + "learning_rate": 3.62685369941495e-05, + "loss": 0.10189056396484375, + "step": 1830 + }, + { + "epoch": 0.25513829861353027, + "grad_norm": 0.7089283466339111, + "learning_rate": 3.626305173180105e-05, + "loss": 0.11428070068359375, + "step": 1831 + }, + { + "epoch": 0.25527764230474465, + "grad_norm": 0.39349886775016785, + "learning_rate": 3.62575628561738e-05, + "loss": 0.09242630004882812, + "step": 1832 + }, + { + "epoch": 0.255416985995959, + "grad_norm": 0.6966400146484375, + "learning_rate": 3.6252070368487246e-05, + "loss": 0.10071754455566406, + "step": 1833 + }, + { + "epoch": 0.2555563296871734, + "grad_norm": 0.4759010374546051, + "learning_rate": 3.62465742699617e-05, + "loss": 0.09823989868164062, + "step": 1834 + }, + { + "epoch": 0.2556956733783878, + "grad_norm": 0.49466606974601746, + "learning_rate": 3.624107456181826e-05, + "loss": 0.12621307373046875, + "step": 1835 + }, + { + "epoch": 0.25583501706960216, + "grad_norm": 0.42093783617019653, + "learning_rate": 3.623557124527885e-05, + "loss": 0.10466384887695312, + "step": 1836 + }, + { + "epoch": 0.25597436076081653, + "grad_norm": 0.40918153524398804, + "learning_rate": 3.623006432156618e-05, + "loss": 0.09048080444335938, + "step": 1837 + }, + { + "epoch": 0.2561137044520309, + "grad_norm": 0.7708006501197815, + "learning_rate": 3.6224553791903744e-05, + "loss": 0.10151481628417969, + "step": 1838 + }, + { + "epoch": 0.2562530481432453, + "grad_norm": 0.7478400468826294, + "learning_rate": 3.621903965751588e-05, + "loss": 0.0864410400390625, + "step": 1839 + }, + { + "epoch": 0.2563923918344597, + "grad_norm": 1.47235107421875, + "learning_rate": 3.6213521919627675e-05, + "loss": 0.12742233276367188, + "step": 1840 + }, + { + "epoch": 0.2565317355256741, + "grad_norm": 0.4859006106853485, + "learning_rate": 3.6208000579465075e-05, + "loss": 0.08725738525390625, + "step": 1841 + }, + { + "epoch": 0.2566710792168885, + "grad_norm": 0.47042012214660645, + "learning_rate": 3.620247563825477e-05, + "loss": 0.09413719177246094, + "step": 1842 + }, + { + "epoch": 0.25681042290810285, + "grad_norm": 0.3612685799598694, + "learning_rate": 3.619694709722429e-05, + "loss": 0.08603668212890625, + "step": 1843 + }, + { + "epoch": 0.25694976659931723, + "grad_norm": 0.4445644021034241, + "learning_rate": 3.619141495760196e-05, + "loss": 0.08366584777832031, + "step": 1844 + }, + { + "epoch": 0.2570891102905316, + "grad_norm": 0.43157249689102173, + "learning_rate": 3.618587922061687e-05, + "loss": 0.10392379760742188, + "step": 1845 + }, + { + "epoch": 0.257228453981746, + "grad_norm": 0.44415998458862305, + "learning_rate": 3.6180339887498953e-05, + "loss": 0.09296035766601562, + "step": 1846 + }, + { + "epoch": 0.25736779767296036, + "grad_norm": 0.9077130556106567, + "learning_rate": 3.617479695947891e-05, + "loss": 0.10321235656738281, + "step": 1847 + }, + { + "epoch": 0.25750714136417474, + "grad_norm": 0.6162601709365845, + "learning_rate": 3.616925043778826e-05, + "loss": 0.10783958435058594, + "step": 1848 + }, + { + "epoch": 0.2576464850553891, + "grad_norm": 0.6186292767524719, + "learning_rate": 3.6163700323659327e-05, + "loss": 0.10366630554199219, + "step": 1849 + }, + { + "epoch": 0.2577858287466035, + "grad_norm": 0.6848590970039368, + "learning_rate": 3.615814661832519e-05, + "loss": 0.10402107238769531, + "step": 1850 + }, + { + "epoch": 0.2579251724378179, + "grad_norm": 0.7558383941650391, + "learning_rate": 3.6152589323019775e-05, + "loss": 0.11045265197753906, + "step": 1851 + }, + { + "epoch": 0.25806451612903225, + "grad_norm": 0.31549352407455444, + "learning_rate": 3.614702843897779e-05, + "loss": 0.09236907958984375, + "step": 1852 + }, + { + "epoch": 0.25820385982024663, + "grad_norm": 0.6392289996147156, + "learning_rate": 3.6141463967434715e-05, + "loss": 0.10296249389648438, + "step": 1853 + }, + { + "epoch": 0.258343203511461, + "grad_norm": 0.8658183217048645, + "learning_rate": 3.613589590962687e-05, + "loss": 0.11019134521484375, + "step": 1854 + }, + { + "epoch": 0.2584825472026754, + "grad_norm": 0.9094892740249634, + "learning_rate": 3.6130324266791344e-05, + "loss": 0.07885551452636719, + "step": 1855 + }, + { + "epoch": 0.25862189089388976, + "grad_norm": 1.2814197540283203, + "learning_rate": 3.612474904016602e-05, + "loss": 0.11929512023925781, + "step": 1856 + }, + { + "epoch": 0.25876123458510414, + "grad_norm": 0.8910754919052124, + "learning_rate": 3.61191702309896e-05, + "loss": 0.09092330932617188, + "step": 1857 + }, + { + "epoch": 0.2589005782763185, + "grad_norm": 0.4348459243774414, + "learning_rate": 3.611358784050157e-05, + "loss": 0.08975791931152344, + "step": 1858 + }, + { + "epoch": 0.2590399219675329, + "grad_norm": 1.5552594661712646, + "learning_rate": 3.610800186994219e-05, + "loss": 0.10889434814453125, + "step": 1859 + }, + { + "epoch": 0.2591792656587473, + "grad_norm": 0.9772698283195496, + "learning_rate": 3.6102412320552546e-05, + "loss": 0.11496353149414062, + "step": 1860 + }, + { + "epoch": 0.2593186093499617, + "grad_norm": 0.5303655862808228, + "learning_rate": 3.609681919357451e-05, + "loss": 0.102020263671875, + "step": 1861 + }, + { + "epoch": 0.2594579530411761, + "grad_norm": 0.5193725228309631, + "learning_rate": 3.609122249025075e-05, + "loss": 0.09510231018066406, + "step": 1862 + }, + { + "epoch": 0.25959729673239046, + "grad_norm": 0.6260876655578613, + "learning_rate": 3.608562221182472e-05, + "loss": 0.10037803649902344, + "step": 1863 + }, + { + "epoch": 0.25973664042360484, + "grad_norm": 0.978704571723938, + "learning_rate": 3.608001835954067e-05, + "loss": 0.11463546752929688, + "step": 1864 + }, + { + "epoch": 0.2598759841148192, + "grad_norm": 0.4534684717655182, + "learning_rate": 3.607441093464366e-05, + "loss": 0.08794975280761719, + "step": 1865 + }, + { + "epoch": 0.2600153278060336, + "grad_norm": 0.7365133762359619, + "learning_rate": 3.606879993837952e-05, + "loss": 0.08163642883300781, + "step": 1866 + }, + { + "epoch": 0.26015467149724797, + "grad_norm": 0.558170735836029, + "learning_rate": 3.60631853719949e-05, + "loss": 0.07211112976074219, + "step": 1867 + }, + { + "epoch": 0.26029401518846235, + "grad_norm": 0.367588609457016, + "learning_rate": 3.6057567236737206e-05, + "loss": 0.0885772705078125, + "step": 1868 + }, + { + "epoch": 0.2604333588796767, + "grad_norm": 0.37926650047302246, + "learning_rate": 3.605194553385468e-05, + "loss": 0.08649635314941406, + "step": 1869 + }, + { + "epoch": 0.2605727025708911, + "grad_norm": 0.36213475465774536, + "learning_rate": 3.6046320264596324e-05, + "loss": 0.08827018737792969, + "step": 1870 + }, + { + "epoch": 0.2607120462621055, + "grad_norm": 0.7543594837188721, + "learning_rate": 3.6040691430211955e-05, + "loss": 0.13199996948242188, + "step": 1871 + }, + { + "epoch": 0.26085138995331986, + "grad_norm": 0.42086297273635864, + "learning_rate": 3.603505903195217e-05, + "loss": 0.07292556762695312, + "step": 1872 + }, + { + "epoch": 0.26099073364453423, + "grad_norm": 0.47602251172065735, + "learning_rate": 3.602942307106834e-05, + "loss": 0.10824012756347656, + "step": 1873 + }, + { + "epoch": 0.2611300773357486, + "grad_norm": 0.4548465609550476, + "learning_rate": 3.602378354881267e-05, + "loss": 0.12086677551269531, + "step": 1874 + }, + { + "epoch": 0.261269421026963, + "grad_norm": 0.6505460143089294, + "learning_rate": 3.601814046643813e-05, + "loss": 0.10890960693359375, + "step": 1875 + }, + { + "epoch": 0.26140876471817737, + "grad_norm": 0.6262309551239014, + "learning_rate": 3.6012493825198466e-05, + "loss": 0.11978530883789062, + "step": 1876 + }, + { + "epoch": 0.26154810840939174, + "grad_norm": 0.5677730441093445, + "learning_rate": 3.600684362634826e-05, + "loss": 0.1271839141845703, + "step": 1877 + }, + { + "epoch": 0.2616874521006061, + "grad_norm": 0.7307996153831482, + "learning_rate": 3.600118987114283e-05, + "loss": 0.07589530944824219, + "step": 1878 + }, + { + "epoch": 0.2618267957918205, + "grad_norm": 0.6673225164413452, + "learning_rate": 3.599553256083833e-05, + "loss": 0.11626815795898438, + "step": 1879 + }, + { + "epoch": 0.26196613948303493, + "grad_norm": 0.5087465047836304, + "learning_rate": 3.598987169669168e-05, + "loss": 0.0963897705078125, + "step": 1880 + }, + { + "epoch": 0.2621054831742493, + "grad_norm": 0.5479161143302917, + "learning_rate": 3.598420727996059e-05, + "loss": 0.10629081726074219, + "step": 1881 + }, + { + "epoch": 0.2622448268654637, + "grad_norm": 0.543440580368042, + "learning_rate": 3.597853931190357e-05, + "loss": 0.07996559143066406, + "step": 1882 + }, + { + "epoch": 0.26238417055667806, + "grad_norm": 0.7755604386329651, + "learning_rate": 3.597286779377991e-05, + "loss": 0.09710311889648438, + "step": 1883 + }, + { + "epoch": 0.26252351424789244, + "grad_norm": 0.3710983395576477, + "learning_rate": 3.5967192726849694e-05, + "loss": 0.07789897918701172, + "step": 1884 + }, + { + "epoch": 0.2626628579391068, + "grad_norm": 0.6431487798690796, + "learning_rate": 3.59615141123738e-05, + "loss": 0.08535194396972656, + "step": 1885 + }, + { + "epoch": 0.2628022016303212, + "grad_norm": 0.7078412771224976, + "learning_rate": 3.5955831951613866e-05, + "loss": 0.10577392578125, + "step": 1886 + }, + { + "epoch": 0.2629415453215356, + "grad_norm": 0.5746040940284729, + "learning_rate": 3.595014624583235e-05, + "loss": 0.10319328308105469, + "step": 1887 + }, + { + "epoch": 0.26308088901274995, + "grad_norm": 0.6844614148139954, + "learning_rate": 3.5944456996292486e-05, + "loss": 0.09510040283203125, + "step": 1888 + }, + { + "epoch": 0.26322023270396433, + "grad_norm": 0.4684138596057892, + "learning_rate": 3.5938764204258306e-05, + "loss": 0.07483673095703125, + "step": 1889 + }, + { + "epoch": 0.2633595763951787, + "grad_norm": 0.5711475610733032, + "learning_rate": 3.59330678709946e-05, + "loss": 0.09819412231445312, + "step": 1890 + }, + { + "epoch": 0.2634989200863931, + "grad_norm": 0.8515329360961914, + "learning_rate": 3.5927367997766974e-05, + "loss": 0.10117530822753906, + "step": 1891 + }, + { + "epoch": 0.26363826377760746, + "grad_norm": 0.34845271706581116, + "learning_rate": 3.592166458584181e-05, + "loss": 0.077728271484375, + "step": 1892 + }, + { + "epoch": 0.26377760746882184, + "grad_norm": 0.5066868662834167, + "learning_rate": 3.591595763648626e-05, + "loss": 0.10128211975097656, + "step": 1893 + }, + { + "epoch": 0.2639169511600362, + "grad_norm": 0.45643994212150574, + "learning_rate": 3.59102471509683e-05, + "loss": 0.07625961303710938, + "step": 1894 + }, + { + "epoch": 0.2640562948512506, + "grad_norm": 0.5069481730461121, + "learning_rate": 3.590453313055666e-05, + "loss": 0.09384918212890625, + "step": 1895 + }, + { + "epoch": 0.264195638542465, + "grad_norm": 0.7848615050315857, + "learning_rate": 3.589881557652087e-05, + "loss": 0.10451126098632812, + "step": 1896 + }, + { + "epoch": 0.26433498223367935, + "grad_norm": 1.1117900609970093, + "learning_rate": 3.5893094490131224e-05, + "loss": 0.1281719207763672, + "step": 1897 + }, + { + "epoch": 0.2644743259248937, + "grad_norm": 0.5647560954093933, + "learning_rate": 3.588736987265884e-05, + "loss": 0.11759567260742188, + "step": 1898 + }, + { + "epoch": 0.2646136696161081, + "grad_norm": 0.5302659869194031, + "learning_rate": 3.588164172537557e-05, + "loss": 0.12061882019042969, + "step": 1899 + }, + { + "epoch": 0.26475301330732254, + "grad_norm": 0.6906512975692749, + "learning_rate": 3.58759100495541e-05, + "loss": 0.08638763427734375, + "step": 1900 + }, + { + "epoch": 0.2648923569985369, + "grad_norm": 0.812835693359375, + "learning_rate": 3.587017484646787e-05, + "loss": 0.12525367736816406, + "step": 1901 + }, + { + "epoch": 0.2650317006897513, + "grad_norm": 1.537558913230896, + "learning_rate": 3.586443611739111e-05, + "loss": 0.122161865234375, + "step": 1902 + }, + { + "epoch": 0.26517104438096567, + "grad_norm": 0.6174204349517822, + "learning_rate": 3.585869386359884e-05, + "loss": 0.098236083984375, + "step": 1903 + }, + { + "epoch": 0.26531038807218005, + "grad_norm": 0.4166198968887329, + "learning_rate": 3.5852948086366855e-05, + "loss": 0.0991058349609375, + "step": 1904 + }, + { + "epoch": 0.2654497317633944, + "grad_norm": 0.4836024343967438, + "learning_rate": 3.584719878697173e-05, + "loss": 0.08657264709472656, + "step": 1905 + }, + { + "epoch": 0.2655890754546088, + "grad_norm": 0.787804365158081, + "learning_rate": 3.5841445966690834e-05, + "loss": 0.09790420532226562, + "step": 1906 + }, + { + "epoch": 0.2657284191458232, + "grad_norm": 0.45492324233055115, + "learning_rate": 3.583568962680231e-05, + "loss": 0.10796165466308594, + "step": 1907 + }, + { + "epoch": 0.26586776283703756, + "grad_norm": 0.5630793571472168, + "learning_rate": 3.5829929768585086e-05, + "loss": 0.09400749206542969, + "step": 1908 + }, + { + "epoch": 0.26600710652825194, + "grad_norm": 0.4574260413646698, + "learning_rate": 3.582416639331886e-05, + "loss": 0.10048484802246094, + "step": 1909 + }, + { + "epoch": 0.2661464502194663, + "grad_norm": 0.6810020804405212, + "learning_rate": 3.5818399502284154e-05, + "loss": 0.12036705017089844, + "step": 1910 + }, + { + "epoch": 0.2662857939106807, + "grad_norm": 0.7037299275398254, + "learning_rate": 3.581262909676221e-05, + "loss": 0.13618087768554688, + "step": 1911 + }, + { + "epoch": 0.26642513760189507, + "grad_norm": 0.4725121855735779, + "learning_rate": 3.5806855178035085e-05, + "loss": 0.08306312561035156, + "step": 1912 + }, + { + "epoch": 0.26656448129310945, + "grad_norm": 0.33683472871780396, + "learning_rate": 3.580107774738562e-05, + "loss": 0.07567405700683594, + "step": 1913 + }, + { + "epoch": 0.2667038249843238, + "grad_norm": 0.37130436301231384, + "learning_rate": 3.579529680609742e-05, + "loss": 0.09502410888671875, + "step": 1914 + }, + { + "epoch": 0.2668431686755382, + "grad_norm": 0.9863664507865906, + "learning_rate": 3.578951235545489e-05, + "loss": 0.12209320068359375, + "step": 1915 + }, + { + "epoch": 0.2669825123667526, + "grad_norm": 0.8099647164344788, + "learning_rate": 3.578372439674319e-05, + "loss": 0.11366653442382812, + "step": 1916 + }, + { + "epoch": 0.26712185605796696, + "grad_norm": 0.7024099230766296, + "learning_rate": 3.577793293124828e-05, + "loss": 0.1267242431640625, + "step": 1917 + }, + { + "epoch": 0.26726119974918133, + "grad_norm": 0.3783480226993561, + "learning_rate": 3.577213796025689e-05, + "loss": 0.08746147155761719, + "step": 1918 + }, + { + "epoch": 0.2674005434403957, + "grad_norm": 0.552897036075592, + "learning_rate": 3.5766339485056524e-05, + "loss": 0.08239173889160156, + "step": 1919 + }, + { + "epoch": 0.26753988713161014, + "grad_norm": 1.2164027690887451, + "learning_rate": 3.5760537506935475e-05, + "loss": 0.10395622253417969, + "step": 1920 + }, + { + "epoch": 0.2676792308228245, + "grad_norm": 1.0615475177764893, + "learning_rate": 3.575473202718282e-05, + "loss": 0.10221385955810547, + "step": 1921 + }, + { + "epoch": 0.2678185745140389, + "grad_norm": 0.4322379529476166, + "learning_rate": 3.574892304708839e-05, + "loss": 0.09405517578125, + "step": 1922 + }, + { + "epoch": 0.2679579182052533, + "grad_norm": 0.7143469452857971, + "learning_rate": 3.5743110567942815e-05, + "loss": 0.1167144775390625, + "step": 1923 + }, + { + "epoch": 0.26809726189646765, + "grad_norm": 0.5683518648147583, + "learning_rate": 3.573729459103749e-05, + "loss": 0.10382843017578125, + "step": 1924 + }, + { + "epoch": 0.26823660558768203, + "grad_norm": 0.7539183497428894, + "learning_rate": 3.573147511766459e-05, + "loss": 0.08302497863769531, + "step": 1925 + }, + { + "epoch": 0.2683759492788964, + "grad_norm": 0.9067785143852234, + "learning_rate": 3.5725652149117085e-05, + "loss": 0.08611869812011719, + "step": 1926 + }, + { + "epoch": 0.2685152929701108, + "grad_norm": 0.5335357785224915, + "learning_rate": 3.571982568668869e-05, + "loss": 0.09127044677734375, + "step": 1927 + }, + { + "epoch": 0.26865463666132516, + "grad_norm": 0.4851275086402893, + "learning_rate": 3.571399573167392e-05, + "loss": 0.10611915588378906, + "step": 1928 + }, + { + "epoch": 0.26879398035253954, + "grad_norm": 0.8437263369560242, + "learning_rate": 3.570816228536806e-05, + "loss": 0.10076522827148438, + "step": 1929 + }, + { + "epoch": 0.2689333240437539, + "grad_norm": 0.45848551392555237, + "learning_rate": 3.570232534906716e-05, + "loss": 0.0850982666015625, + "step": 1930 + }, + { + "epoch": 0.2690726677349683, + "grad_norm": 0.5492698550224304, + "learning_rate": 3.569648492406805e-05, + "loss": 0.11025714874267578, + "step": 1931 + }, + { + "epoch": 0.2692120114261827, + "grad_norm": 0.4794093072414398, + "learning_rate": 3.569064101166835e-05, + "loss": 0.10953903198242188, + "step": 1932 + }, + { + "epoch": 0.26935135511739705, + "grad_norm": 0.5979156494140625, + "learning_rate": 3.568479361316644e-05, + "loss": 0.09929084777832031, + "step": 1933 + }, + { + "epoch": 0.26949069880861143, + "grad_norm": 0.5717805027961731, + "learning_rate": 3.567894272986149e-05, + "loss": 0.08685874938964844, + "step": 1934 + }, + { + "epoch": 0.2696300424998258, + "grad_norm": 0.39813151955604553, + "learning_rate": 3.567308836305341e-05, + "loss": 0.09638214111328125, + "step": 1935 + }, + { + "epoch": 0.2697693861910402, + "grad_norm": 0.3979974389076233, + "learning_rate": 3.566723051404292e-05, + "loss": 0.09121894836425781, + "step": 1936 + }, + { + "epoch": 0.26990872988225456, + "grad_norm": 0.5000281929969788, + "learning_rate": 3.56613691841315e-05, + "loss": 0.09685206413269043, + "step": 1937 + }, + { + "epoch": 0.27004807357346894, + "grad_norm": 0.4396647810935974, + "learning_rate": 3.5655504374621404e-05, + "loss": 0.1090097427368164, + "step": 1938 + }, + { + "epoch": 0.2701874172646833, + "grad_norm": 1.0587968826293945, + "learning_rate": 3.5649636086815656e-05, + "loss": 0.1168060302734375, + "step": 1939 + }, + { + "epoch": 0.27032676095589775, + "grad_norm": 0.7763506770133972, + "learning_rate": 3.5643764322018054e-05, + "loss": 0.10301494598388672, + "step": 1940 + }, + { + "epoch": 0.2704661046471121, + "grad_norm": 1.1256859302520752, + "learning_rate": 3.563788908153317e-05, + "loss": 0.11363029479980469, + "step": 1941 + }, + { + "epoch": 0.2706054483383265, + "grad_norm": 0.6987976431846619, + "learning_rate": 3.563201036666636e-05, + "loss": 0.07673072814941406, + "step": 1942 + }, + { + "epoch": 0.2707447920295409, + "grad_norm": 0.9623551368713379, + "learning_rate": 3.562612817872373e-05, + "loss": 0.106781005859375, + "step": 1943 + }, + { + "epoch": 0.27088413572075526, + "grad_norm": 0.5149175524711609, + "learning_rate": 3.5620242519012164e-05, + "loss": 0.09737777709960938, + "step": 1944 + }, + { + "epoch": 0.27102347941196964, + "grad_norm": 0.6787305474281311, + "learning_rate": 3.561435338883933e-05, + "loss": 0.08876991271972656, + "step": 1945 + }, + { + "epoch": 0.271162823103184, + "grad_norm": 0.6240215301513672, + "learning_rate": 3.560846078951366e-05, + "loss": 0.09912300109863281, + "step": 1946 + }, + { + "epoch": 0.2713021667943984, + "grad_norm": 0.567781925201416, + "learning_rate": 3.560256472234434e-05, + "loss": 0.10831451416015625, + "step": 1947 + }, + { + "epoch": 0.27144151048561277, + "grad_norm": 0.48684918880462646, + "learning_rate": 3.559666518864136e-05, + "loss": 0.09077072143554688, + "step": 1948 + }, + { + "epoch": 0.27158085417682715, + "grad_norm": 0.50290447473526, + "learning_rate": 3.5590762189715445e-05, + "loss": 0.09780502319335938, + "step": 1949 + }, + { + "epoch": 0.2717201978680415, + "grad_norm": 1.0155134201049805, + "learning_rate": 3.558485572687812e-05, + "loss": 0.11310768127441406, + "step": 1950 + }, + { + "epoch": 0.2718595415592559, + "grad_norm": 0.3519640564918518, + "learning_rate": 3.557894580144166e-05, + "loss": 0.09203529357910156, + "step": 1951 + }, + { + "epoch": 0.2719988852504703, + "grad_norm": 1.2010060548782349, + "learning_rate": 3.5573032414719116e-05, + "loss": 0.12823104858398438, + "step": 1952 + }, + { + "epoch": 0.27213822894168466, + "grad_norm": 0.6177572011947632, + "learning_rate": 3.556711556802431e-05, + "loss": 0.11001014709472656, + "step": 1953 + }, + { + "epoch": 0.27227757263289903, + "grad_norm": 0.5329762101173401, + "learning_rate": 3.556119526267182e-05, + "loss": 0.1001291275024414, + "step": 1954 + }, + { + "epoch": 0.2724169163241134, + "grad_norm": 0.4254553020000458, + "learning_rate": 3.5555271499977015e-05, + "loss": 0.08673095703125, + "step": 1955 + }, + { + "epoch": 0.2725562600153278, + "grad_norm": 0.36570432782173157, + "learning_rate": 3.554934428125602e-05, + "loss": 0.08226966857910156, + "step": 1956 + }, + { + "epoch": 0.27269560370654217, + "grad_norm": 0.45432525873184204, + "learning_rate": 3.554341360782572e-05, + "loss": 0.10203933715820312, + "step": 1957 + }, + { + "epoch": 0.27283494739775654, + "grad_norm": 0.6957197189331055, + "learning_rate": 3.553747948100378e-05, + "loss": 0.11005592346191406, + "step": 1958 + }, + { + "epoch": 0.2729742910889709, + "grad_norm": 0.5369907021522522, + "learning_rate": 3.5531541902108624e-05, + "loss": 0.08141326904296875, + "step": 1959 + }, + { + "epoch": 0.27311363478018535, + "grad_norm": 0.6957015991210938, + "learning_rate": 3.5525600872459444e-05, + "loss": 0.115142822265625, + "step": 1960 + }, + { + "epoch": 0.27325297847139973, + "grad_norm": 0.3878958523273468, + "learning_rate": 3.551965639337621e-05, + "loss": 0.09321403503417969, + "step": 1961 + }, + { + "epoch": 0.2733923221626141, + "grad_norm": 0.548028826713562, + "learning_rate": 3.5513708466179647e-05, + "loss": 0.10700035095214844, + "step": 1962 + }, + { + "epoch": 0.2735316658538285, + "grad_norm": 0.6845682263374329, + "learning_rate": 3.550775709219125e-05, + "loss": 0.11349296569824219, + "step": 1963 + }, + { + "epoch": 0.27367100954504286, + "grad_norm": 0.43343374133110046, + "learning_rate": 3.550180227273327e-05, + "loss": 0.09042930603027344, + "step": 1964 + }, + { + "epoch": 0.27381035323625724, + "grad_norm": 0.4719146490097046, + "learning_rate": 3.549584400912874e-05, + "loss": 0.09910392761230469, + "step": 1965 + }, + { + "epoch": 0.2739496969274716, + "grad_norm": 0.44602110981941223, + "learning_rate": 3.5489882302701445e-05, + "loss": 0.107147216796875, + "step": 1966 + }, + { + "epoch": 0.274089040618686, + "grad_norm": 0.4495578706264496, + "learning_rate": 3.548391715477594e-05, + "loss": 0.09388160705566406, + "step": 1967 + }, + { + "epoch": 0.2742283843099004, + "grad_norm": 0.6246823668479919, + "learning_rate": 3.547794856667756e-05, + "loss": 0.08355712890625, + "step": 1968 + }, + { + "epoch": 0.27436772800111475, + "grad_norm": 0.6473162174224854, + "learning_rate": 3.547197653973236e-05, + "loss": 0.10063934326171875, + "step": 1969 + }, + { + "epoch": 0.27450707169232913, + "grad_norm": 0.5739568471908569, + "learning_rate": 3.546600107526721e-05, + "loss": 0.09838485717773438, + "step": 1970 + }, + { + "epoch": 0.2746464153835435, + "grad_norm": 0.6276091933250427, + "learning_rate": 3.546002217460971e-05, + "loss": 0.08431434631347656, + "step": 1971 + }, + { + "epoch": 0.2747857590747579, + "grad_norm": 0.25285014510154724, + "learning_rate": 3.5454039839088256e-05, + "loss": 0.06070137023925781, + "step": 1972 + }, + { + "epoch": 0.27492510276597226, + "grad_norm": 0.5521366596221924, + "learning_rate": 3.544805407003196e-05, + "loss": 0.12274932861328125, + "step": 1973 + }, + { + "epoch": 0.27506444645718664, + "grad_norm": 0.6475420594215393, + "learning_rate": 3.544206486877073e-05, + "loss": 0.09153175354003906, + "step": 1974 + }, + { + "epoch": 0.275203790148401, + "grad_norm": 0.7204529047012329, + "learning_rate": 3.543607223663524e-05, + "loss": 0.09910392761230469, + "step": 1975 + }, + { + "epoch": 0.2753431338396154, + "grad_norm": 0.4995768368244171, + "learning_rate": 3.543007617495692e-05, + "loss": 0.10692596435546875, + "step": 1976 + }, + { + "epoch": 0.2754824775308298, + "grad_norm": 0.4869649410247803, + "learning_rate": 3.5424076685067935e-05, + "loss": 0.0844268798828125, + "step": 1977 + }, + { + "epoch": 0.27562182122204415, + "grad_norm": 0.4389885663986206, + "learning_rate": 3.5418073768301254e-05, + "loss": 0.10396385192871094, + "step": 1978 + }, + { + "epoch": 0.2757611649132585, + "grad_norm": 0.4725323021411896, + "learning_rate": 3.5412067425990585e-05, + "loss": 0.10226821899414062, + "step": 1979 + }, + { + "epoch": 0.2759005086044729, + "grad_norm": 0.5594927072525024, + "learning_rate": 3.54060576594704e-05, + "loss": 0.11007308959960938, + "step": 1980 + }, + { + "epoch": 0.27603985229568734, + "grad_norm": 0.7999679446220398, + "learning_rate": 3.540004447007592e-05, + "loss": 0.12648582458496094, + "step": 1981 + }, + { + "epoch": 0.2761791959869017, + "grad_norm": 0.3891359567642212, + "learning_rate": 3.5394027859143154e-05, + "loss": 0.08658790588378906, + "step": 1982 + }, + { + "epoch": 0.2763185396781161, + "grad_norm": 0.4532182812690735, + "learning_rate": 3.5388007828008845e-05, + "loss": 0.1101226806640625, + "step": 1983 + }, + { + "epoch": 0.27645788336933047, + "grad_norm": 0.5506977438926697, + "learning_rate": 3.5381984378010513e-05, + "loss": 0.10869407653808594, + "step": 1984 + }, + { + "epoch": 0.27659722706054485, + "grad_norm": 0.38897454738616943, + "learning_rate": 3.5375957510486426e-05, + "loss": 0.09346389770507812, + "step": 1985 + }, + { + "epoch": 0.2767365707517592, + "grad_norm": 0.5302215218544006, + "learning_rate": 3.5369927226775625e-05, + "loss": 0.09136009216308594, + "step": 1986 + }, + { + "epoch": 0.2768759144429736, + "grad_norm": 0.46857786178588867, + "learning_rate": 3.536389352821789e-05, + "loss": 0.09157443046569824, + "step": 1987 + }, + { + "epoch": 0.277015258134188, + "grad_norm": 0.7509437203407288, + "learning_rate": 3.535785641615378e-05, + "loss": 0.11061859130859375, + "step": 1988 + }, + { + "epoch": 0.27715460182540236, + "grad_norm": 0.5455876588821411, + "learning_rate": 3.53518158919246e-05, + "loss": 0.08901786804199219, + "step": 1989 + }, + { + "epoch": 0.27729394551661674, + "grad_norm": 0.36311376094818115, + "learning_rate": 3.5345771956872416e-05, + "loss": 0.08413887023925781, + "step": 1990 + }, + { + "epoch": 0.2774332892078311, + "grad_norm": 0.46132463216781616, + "learning_rate": 3.5339724612340055e-05, + "loss": 0.0963141918182373, + "step": 1991 + }, + { + "epoch": 0.2775726328990455, + "grad_norm": 0.4650948643684387, + "learning_rate": 3.5333673859671095e-05, + "loss": 0.09014511108398438, + "step": 1992 + }, + { + "epoch": 0.27771197659025987, + "grad_norm": 0.7308070063591003, + "learning_rate": 3.532761970020987e-05, + "loss": 0.13817787170410156, + "step": 1993 + }, + { + "epoch": 0.27785132028147425, + "grad_norm": 0.3516034185886383, + "learning_rate": 3.532156213530149e-05, + "loss": 0.08388710021972656, + "step": 1994 + }, + { + "epoch": 0.2779906639726886, + "grad_norm": 0.5816835165023804, + "learning_rate": 3.5315501166291806e-05, + "loss": 0.11684036254882812, + "step": 1995 + }, + { + "epoch": 0.278130007663903, + "grad_norm": 0.8744456768035889, + "learning_rate": 3.530943679452742e-05, + "loss": 0.09442520141601562, + "step": 1996 + }, + { + "epoch": 0.2782693513551174, + "grad_norm": 0.8287582397460938, + "learning_rate": 3.530336902135569e-05, + "loss": 0.11166000366210938, + "step": 1997 + }, + { + "epoch": 0.27840869504633176, + "grad_norm": 0.6182478666305542, + "learning_rate": 3.5297297848124756e-05, + "loss": 0.09850120544433594, + "step": 1998 + }, + { + "epoch": 0.27854803873754613, + "grad_norm": 0.8368113040924072, + "learning_rate": 3.5291223276183476e-05, + "loss": 0.16281700134277344, + "step": 1999 + }, + { + "epoch": 0.2786873824287605, + "grad_norm": 0.7414789795875549, + "learning_rate": 3.528514530688149e-05, + "loss": 0.09772491455078125, + "step": 2000 + }, + { + "epoch": 0.27882672611997494, + "grad_norm": 1.5253937244415283, + "learning_rate": 3.527906394156919e-05, + "loss": 0.10630989074707031, + "step": 2001 + }, + { + "epoch": 0.2789660698111893, + "grad_norm": 1.1796684265136719, + "learning_rate": 3.52729791815977e-05, + "loss": 0.10336112976074219, + "step": 2002 + }, + { + "epoch": 0.2791054135024037, + "grad_norm": 0.6950538754463196, + "learning_rate": 3.526689102831892e-05, + "loss": 0.108489990234375, + "step": 2003 + }, + { + "epoch": 0.2792447571936181, + "grad_norm": 0.42869114875793457, + "learning_rate": 3.526079948308551e-05, + "loss": 0.11172103881835938, + "step": 2004 + }, + { + "epoch": 0.27938410088483245, + "grad_norm": 0.3711647689342499, + "learning_rate": 3.525470454725087e-05, + "loss": 0.07848358154296875, + "step": 2005 + }, + { + "epoch": 0.27952344457604683, + "grad_norm": 0.9821681976318359, + "learning_rate": 3.524860622216914e-05, + "loss": 0.09612178802490234, + "step": 2006 + }, + { + "epoch": 0.2796627882672612, + "grad_norm": 1.6618419885635376, + "learning_rate": 3.524250450919524e-05, + "loss": 0.12527084350585938, + "step": 2007 + }, + { + "epoch": 0.2798021319584756, + "grad_norm": 0.5700966119766235, + "learning_rate": 3.523639940968484e-05, + "loss": 0.08934211730957031, + "step": 2008 + }, + { + "epoch": 0.27994147564968996, + "grad_norm": 0.695976972579956, + "learning_rate": 3.5230290924994334e-05, + "loss": 0.08948516845703125, + "step": 2009 + }, + { + "epoch": 0.28008081934090434, + "grad_norm": 0.5557721257209778, + "learning_rate": 3.5224179056480906e-05, + "loss": 0.087921142578125, + "step": 2010 + }, + { + "epoch": 0.2802201630321187, + "grad_norm": 0.7302590012550354, + "learning_rate": 3.521806380550246e-05, + "loss": 0.11031341552734375, + "step": 2011 + }, + { + "epoch": 0.2803595067233331, + "grad_norm": 0.5596675872802734, + "learning_rate": 3.5211945173417674e-05, + "loss": 0.11163711547851562, + "step": 2012 + }, + { + "epoch": 0.2804988504145475, + "grad_norm": 0.43095651268959045, + "learning_rate": 3.520582316158596e-05, + "loss": 0.09783172607421875, + "step": 2013 + }, + { + "epoch": 0.28063819410576185, + "grad_norm": 0.5426381230354309, + "learning_rate": 3.5199697771367494e-05, + "loss": 0.08706855773925781, + "step": 2014 + }, + { + "epoch": 0.28077753779697623, + "grad_norm": 0.5186313986778259, + "learning_rate": 3.5193569004123204e-05, + "loss": 0.09665107727050781, + "step": 2015 + }, + { + "epoch": 0.2809168814881906, + "grad_norm": 0.6567549109458923, + "learning_rate": 3.518743686121475e-05, + "loss": 0.09626007080078125, + "step": 2016 + }, + { + "epoch": 0.281056225179405, + "grad_norm": 0.4034946858882904, + "learning_rate": 3.5181301344004574e-05, + "loss": 0.09074783325195312, + "step": 2017 + }, + { + "epoch": 0.28119556887061936, + "grad_norm": 0.4889029264450073, + "learning_rate": 3.517516245385582e-05, + "loss": 0.1105804443359375, + "step": 2018 + }, + { + "epoch": 0.28133491256183374, + "grad_norm": 0.5822237133979797, + "learning_rate": 3.5169020192132425e-05, + "loss": 0.1280345916748047, + "step": 2019 + }, + { + "epoch": 0.2814742562530481, + "grad_norm": 0.6812788844108582, + "learning_rate": 3.516287456019907e-05, + "loss": 0.10105705261230469, + "step": 2020 + }, + { + "epoch": 0.28161359994426255, + "grad_norm": 0.5145977139472961, + "learning_rate": 3.515672555942115e-05, + "loss": 0.106170654296875, + "step": 2021 + }, + { + "epoch": 0.2817529436354769, + "grad_norm": 0.48416802287101746, + "learning_rate": 3.5150573191164855e-05, + "loss": 0.12720108032226562, + "step": 2022 + }, + { + "epoch": 0.2818922873266913, + "grad_norm": 0.5762851238250732, + "learning_rate": 3.514441745679708e-05, + "loss": 0.08830833435058594, + "step": 2023 + }, + { + "epoch": 0.2820316310179057, + "grad_norm": 0.4591979384422302, + "learning_rate": 3.5138258357685494e-05, + "loss": 0.08657264709472656, + "step": 2024 + }, + { + "epoch": 0.28217097470912006, + "grad_norm": 0.5983343124389648, + "learning_rate": 3.513209589519853e-05, + "loss": 0.10584259033203125, + "step": 2025 + }, + { + "epoch": 0.28231031840033444, + "grad_norm": 0.6214262843132019, + "learning_rate": 3.512593007070532e-05, + "loss": 0.08835601806640625, + "step": 2026 + }, + { + "epoch": 0.2824496620915488, + "grad_norm": 0.4884583353996277, + "learning_rate": 3.5119760885575785e-05, + "loss": 0.10404586791992188, + "step": 2027 + }, + { + "epoch": 0.2825890057827632, + "grad_norm": 0.5546515583992004, + "learning_rate": 3.5113588341180564e-05, + "loss": 0.11568641662597656, + "step": 2028 + }, + { + "epoch": 0.28272834947397757, + "grad_norm": 0.336719810962677, + "learning_rate": 3.510741243889106e-05, + "loss": 0.07889175415039062, + "step": 2029 + }, + { + "epoch": 0.28286769316519195, + "grad_norm": 0.35256800055503845, + "learning_rate": 3.510123318007943e-05, + "loss": 0.08304786682128906, + "step": 2030 + }, + { + "epoch": 0.2830070368564063, + "grad_norm": 0.5280969142913818, + "learning_rate": 3.509505056611855e-05, + "loss": 0.111297607421875, + "step": 2031 + }, + { + "epoch": 0.2831463805476207, + "grad_norm": 0.6875507235527039, + "learning_rate": 3.508886459838206e-05, + "loss": 0.11219978332519531, + "step": 2032 + }, + { + "epoch": 0.2832857242388351, + "grad_norm": 0.49009379744529724, + "learning_rate": 3.508267527824434e-05, + "loss": 0.08899307250976562, + "step": 2033 + }, + { + "epoch": 0.28342506793004946, + "grad_norm": 0.43936610221862793, + "learning_rate": 3.5076482607080513e-05, + "loss": 0.10557937622070312, + "step": 2034 + }, + { + "epoch": 0.28356441162126383, + "grad_norm": 0.40643271803855896, + "learning_rate": 3.507028658626646e-05, + "loss": 0.08600234985351562, + "step": 2035 + }, + { + "epoch": 0.2837037553124782, + "grad_norm": 0.8471086621284485, + "learning_rate": 3.5064087217178787e-05, + "loss": 0.1038818359375, + "step": 2036 + }, + { + "epoch": 0.2838430990036926, + "grad_norm": 0.5158953666687012, + "learning_rate": 3.505788450119485e-05, + "loss": 0.09281444549560547, + "step": 2037 + }, + { + "epoch": 0.28398244269490697, + "grad_norm": 0.7604515552520752, + "learning_rate": 3.505167843969276e-05, + "loss": 0.0981130599975586, + "step": 2038 + }, + { + "epoch": 0.28412178638612134, + "grad_norm": 0.5028108954429626, + "learning_rate": 3.504546903405135e-05, + "loss": 0.0921783447265625, + "step": 2039 + }, + { + "epoch": 0.2842611300773357, + "grad_norm": 0.618361234664917, + "learning_rate": 3.5039256285650214e-05, + "loss": 0.10581398010253906, + "step": 2040 + }, + { + "epoch": 0.28440047376855015, + "grad_norm": 0.37025171518325806, + "learning_rate": 3.5033040195869685e-05, + "loss": 0.0891265869140625, + "step": 2041 + }, + { + "epoch": 0.28453981745976453, + "grad_norm": 0.32601094245910645, + "learning_rate": 3.502682076609084e-05, + "loss": 0.08760452270507812, + "step": 2042 + }, + { + "epoch": 0.2846791611509789, + "grad_norm": 0.3449068069458008, + "learning_rate": 3.5020597997695484e-05, + "loss": 0.08321189880371094, + "step": 2043 + }, + { + "epoch": 0.2848185048421933, + "grad_norm": 0.3402368426322937, + "learning_rate": 3.501437189206618e-05, + "loss": 0.08222579956054688, + "step": 2044 + }, + { + "epoch": 0.28495784853340766, + "grad_norm": 1.1123073101043701, + "learning_rate": 3.5008142450586226e-05, + "loss": 0.1349658966064453, + "step": 2045 + }, + { + "epoch": 0.28509719222462204, + "grad_norm": 0.5089871287345886, + "learning_rate": 3.500190967463966e-05, + "loss": 0.10527610778808594, + "step": 2046 + }, + { + "epoch": 0.2852365359158364, + "grad_norm": 0.38736042380332947, + "learning_rate": 3.4995673565611265e-05, + "loss": 0.09382820129394531, + "step": 2047 + }, + { + "epoch": 0.2853758796070508, + "grad_norm": 0.2725287675857544, + "learning_rate": 3.498943412488656e-05, + "loss": 0.07949447631835938, + "step": 2048 + }, + { + "epoch": 0.2855152232982652, + "grad_norm": 0.6003334522247314, + "learning_rate": 3.4983191353851804e-05, + "loss": 0.09588050842285156, + "step": 2049 + }, + { + "epoch": 0.28565456698947955, + "grad_norm": 0.5046861171722412, + "learning_rate": 3.4976945253894e-05, + "loss": 0.11311149597167969, + "step": 2050 + }, + { + "epoch": 0.28579391068069393, + "grad_norm": 0.7701734900474548, + "learning_rate": 3.49706958264009e-05, + "loss": 0.10278129577636719, + "step": 2051 + }, + { + "epoch": 0.2859332543719083, + "grad_norm": 0.796151876449585, + "learning_rate": 3.496444307276097e-05, + "loss": 0.10177803039550781, + "step": 2052 + }, + { + "epoch": 0.2860725980631227, + "grad_norm": 0.38831818103790283, + "learning_rate": 3.495818699436343e-05, + "loss": 0.08589553833007812, + "step": 2053 + }, + { + "epoch": 0.28621194175433706, + "grad_norm": 0.761687159538269, + "learning_rate": 3.495192759259824e-05, + "loss": 0.09610939025878906, + "step": 2054 + }, + { + "epoch": 0.28635128544555144, + "grad_norm": 0.7137998342514038, + "learning_rate": 3.49456648688561e-05, + "loss": 0.1042938232421875, + "step": 2055 + }, + { + "epoch": 0.2864906291367658, + "grad_norm": 0.36243367195129395, + "learning_rate": 3.493939882452845e-05, + "loss": 0.08701515197753906, + "step": 2056 + }, + { + "epoch": 0.2866299728279802, + "grad_norm": 0.6012636423110962, + "learning_rate": 3.493312946100743e-05, + "loss": 0.09810638427734375, + "step": 2057 + }, + { + "epoch": 0.2867693165191946, + "grad_norm": 0.38831913471221924, + "learning_rate": 3.4926856779685993e-05, + "loss": 0.07594728469848633, + "step": 2058 + }, + { + "epoch": 0.28690866021040895, + "grad_norm": 0.4729180932044983, + "learning_rate": 3.492058078195776e-05, + "loss": 0.11568260192871094, + "step": 2059 + }, + { + "epoch": 0.2870480039016233, + "grad_norm": 0.49349096417427063, + "learning_rate": 3.491430146921712e-05, + "loss": 0.10857200622558594, + "step": 2060 + }, + { + "epoch": 0.28718734759283776, + "grad_norm": 0.6250549554824829, + "learning_rate": 3.49080188428592e-05, + "loss": 0.09729862213134766, + "step": 2061 + }, + { + "epoch": 0.28732669128405214, + "grad_norm": 0.736370861530304, + "learning_rate": 3.490173290427984e-05, + "loss": 0.1114501953125, + "step": 2062 + }, + { + "epoch": 0.2874660349752665, + "grad_norm": 0.46386000514030457, + "learning_rate": 3.489544365487564e-05, + "loss": 0.10046958923339844, + "step": 2063 + }, + { + "epoch": 0.2876053786664809, + "grad_norm": 0.43344372510910034, + "learning_rate": 3.488915109604393e-05, + "loss": 0.0726470947265625, + "step": 2064 + }, + { + "epoch": 0.28774472235769527, + "grad_norm": 0.6358215808868408, + "learning_rate": 3.488285522918277e-05, + "loss": 0.103057861328125, + "step": 2065 + }, + { + "epoch": 0.28788406604890965, + "grad_norm": 0.5364714860916138, + "learning_rate": 3.487655605569096e-05, + "loss": 0.08786964416503906, + "step": 2066 + }, + { + "epoch": 0.288023409740124, + "grad_norm": 0.7962645292282104, + "learning_rate": 3.487025357696804e-05, + "loss": 0.11811447143554688, + "step": 2067 + }, + { + "epoch": 0.2881627534313384, + "grad_norm": 0.5017166137695312, + "learning_rate": 3.486394779441426e-05, + "loss": 0.09339427947998047, + "step": 2068 + }, + { + "epoch": 0.2883020971225528, + "grad_norm": 0.7446008324623108, + "learning_rate": 3.485763870943064e-05, + "loss": 0.11197662353515625, + "step": 2069 + }, + { + "epoch": 0.28844144081376716, + "grad_norm": 0.7377222180366516, + "learning_rate": 3.48513263234189e-05, + "loss": 0.08985710144042969, + "step": 2070 + }, + { + "epoch": 0.28858078450498154, + "grad_norm": 0.7972334027290344, + "learning_rate": 3.484501063778151e-05, + "loss": 0.10547256469726562, + "step": 2071 + }, + { + "epoch": 0.2887201281961959, + "grad_norm": 0.9523621797561646, + "learning_rate": 3.483869165392167e-05, + "loss": 0.10811138153076172, + "step": 2072 + }, + { + "epoch": 0.2888594718874103, + "grad_norm": 0.6885198354721069, + "learning_rate": 3.483236937324332e-05, + "loss": 0.11858749389648438, + "step": 2073 + }, + { + "epoch": 0.28899881557862467, + "grad_norm": 0.5483176112174988, + "learning_rate": 3.482604379715113e-05, + "loss": 0.11445236206054688, + "step": 2074 + }, + { + "epoch": 0.28913815926983905, + "grad_norm": 0.504965603351593, + "learning_rate": 3.481971492705048e-05, + "loss": 0.09706497192382812, + "step": 2075 + }, + { + "epoch": 0.2892775029610534, + "grad_norm": 1.1115397214889526, + "learning_rate": 3.481338276434753e-05, + "loss": 0.08595085144042969, + "step": 2076 + }, + { + "epoch": 0.2894168466522678, + "grad_norm": 1.0511475801467896, + "learning_rate": 3.480704731044911e-05, + "loss": 0.10981178283691406, + "step": 2077 + }, + { + "epoch": 0.2895561903434822, + "grad_norm": 0.6764277815818787, + "learning_rate": 3.480070856676283e-05, + "loss": 0.09339714050292969, + "step": 2078 + }, + { + "epoch": 0.28969553403469656, + "grad_norm": 0.5880546569824219, + "learning_rate": 3.479436653469702e-05, + "loss": 0.10503578186035156, + "step": 2079 + }, + { + "epoch": 0.28983487772591093, + "grad_norm": 0.608733594417572, + "learning_rate": 3.478802121566073e-05, + "loss": 0.10369873046875, + "step": 2080 + }, + { + "epoch": 0.28997422141712537, + "grad_norm": 1.259231448173523, + "learning_rate": 3.478167261106373e-05, + "loss": 0.11305427551269531, + "step": 2081 + }, + { + "epoch": 0.29011356510833974, + "grad_norm": 0.908088207244873, + "learning_rate": 3.4775320722316555e-05, + "loss": 0.09795951843261719, + "step": 2082 + }, + { + "epoch": 0.2902529087995541, + "grad_norm": 0.3898991048336029, + "learning_rate": 3.476896555083044e-05, + "loss": 0.10217571258544922, + "step": 2083 + }, + { + "epoch": 0.2903922524907685, + "grad_norm": 0.8726428747177124, + "learning_rate": 3.476260709801736e-05, + "loss": 0.1063995361328125, + "step": 2084 + }, + { + "epoch": 0.2905315961819829, + "grad_norm": 0.44480207562446594, + "learning_rate": 3.475624536529002e-05, + "loss": 0.08466339111328125, + "step": 2085 + }, + { + "epoch": 0.29067093987319725, + "grad_norm": 0.45753002166748047, + "learning_rate": 3.4749880354061855e-05, + "loss": 0.09235954284667969, + "step": 2086 + }, + { + "epoch": 0.29081028356441163, + "grad_norm": 0.3739868700504303, + "learning_rate": 3.474351206574701e-05, + "loss": 0.10792922973632812, + "step": 2087 + }, + { + "epoch": 0.290949627255626, + "grad_norm": 0.4036974310874939, + "learning_rate": 3.4737140501760396e-05, + "loss": 0.0898284912109375, + "step": 2088 + }, + { + "epoch": 0.2910889709468404, + "grad_norm": 0.2917064428329468, + "learning_rate": 3.473076566351761e-05, + "loss": 0.06950759887695312, + "step": 2089 + }, + { + "epoch": 0.29122831463805476, + "grad_norm": 0.42970845103263855, + "learning_rate": 3.4724387552435004e-05, + "loss": 0.07973670959472656, + "step": 2090 + }, + { + "epoch": 0.29136765832926914, + "grad_norm": 0.5566427707672119, + "learning_rate": 3.471800616992965e-05, + "loss": 0.10890007019042969, + "step": 2091 + }, + { + "epoch": 0.2915070020204835, + "grad_norm": 0.44000962376594543, + "learning_rate": 3.471162151741934e-05, + "loss": 0.1005401611328125, + "step": 2092 + }, + { + "epoch": 0.2916463457116979, + "grad_norm": 0.5636531710624695, + "learning_rate": 3.47052335963226e-05, + "loss": 0.11909866333007812, + "step": 2093 + }, + { + "epoch": 0.2917856894029123, + "grad_norm": 0.4694679379463196, + "learning_rate": 3.469884240805869e-05, + "loss": 0.10953330993652344, + "step": 2094 + }, + { + "epoch": 0.29192503309412665, + "grad_norm": 0.5053476691246033, + "learning_rate": 3.4692447954047566e-05, + "loss": 0.09275245666503906, + "step": 2095 + }, + { + "epoch": 0.29206437678534103, + "grad_norm": 0.9431514739990234, + "learning_rate": 3.468605023570993e-05, + "loss": 0.1019124984741211, + "step": 2096 + }, + { + "epoch": 0.2922037204765554, + "grad_norm": 0.6847701668739319, + "learning_rate": 3.4679649254467244e-05, + "loss": 0.11291885375976562, + "step": 2097 + }, + { + "epoch": 0.2923430641677698, + "grad_norm": 0.6601539254188538, + "learning_rate": 3.467324501174163e-05, + "loss": 0.08331871032714844, + "step": 2098 + }, + { + "epoch": 0.29248240785898416, + "grad_norm": 0.7209383845329285, + "learning_rate": 3.466683750895596e-05, + "loss": 0.09385108947753906, + "step": 2099 + }, + { + "epoch": 0.29262175155019854, + "grad_norm": 0.5217559337615967, + "learning_rate": 3.4660426747533846e-05, + "loss": 0.0844721794128418, + "step": 2100 + }, + { + "epoch": 0.29276109524141297, + "grad_norm": 0.8308572769165039, + "learning_rate": 3.4654012728899624e-05, + "loss": 0.08640098571777344, + "step": 2101 + }, + { + "epoch": 0.29290043893262735, + "grad_norm": 1.1069687604904175, + "learning_rate": 3.464759545447832e-05, + "loss": 0.10311603546142578, + "step": 2102 + }, + { + "epoch": 0.2930397826238417, + "grad_norm": 0.519899845123291, + "learning_rate": 3.4641174925695716e-05, + "loss": 0.1273822784423828, + "step": 2103 + }, + { + "epoch": 0.2931791263150561, + "grad_norm": 0.5660131573677063, + "learning_rate": 3.4634751143978317e-05, + "loss": 0.09707832336425781, + "step": 2104 + }, + { + "epoch": 0.2933184700062705, + "grad_norm": 1.256427526473999, + "learning_rate": 3.4628324110753326e-05, + "loss": 0.11171722412109375, + "step": 2105 + }, + { + "epoch": 0.29345781369748486, + "grad_norm": 1.0843735933303833, + "learning_rate": 3.462189382744869e-05, + "loss": 0.09132957458496094, + "step": 2106 + }, + { + "epoch": 0.29359715738869924, + "grad_norm": 0.3993794322013855, + "learning_rate": 3.461546029549306e-05, + "loss": 0.08511924743652344, + "step": 2107 + }, + { + "epoch": 0.2937365010799136, + "grad_norm": 0.4280039072036743, + "learning_rate": 3.4609023516315834e-05, + "loss": 0.09238815307617188, + "step": 2108 + }, + { + "epoch": 0.293875844771128, + "grad_norm": 0.43211379647254944, + "learning_rate": 3.4602583491347116e-05, + "loss": 0.09300422668457031, + "step": 2109 + }, + { + "epoch": 0.29401518846234237, + "grad_norm": 0.4341447353363037, + "learning_rate": 3.4596140222017725e-05, + "loss": 0.09497356414794922, + "step": 2110 + }, + { + "epoch": 0.29415453215355675, + "grad_norm": 0.6074653267860413, + "learning_rate": 3.4589693709759216e-05, + "loss": 0.08537673950195312, + "step": 2111 + }, + { + "epoch": 0.2942938758447711, + "grad_norm": 0.5405095815658569, + "learning_rate": 3.4583243956003847e-05, + "loss": 0.09031105041503906, + "step": 2112 + }, + { + "epoch": 0.2944332195359855, + "grad_norm": 0.4934329092502594, + "learning_rate": 3.457679096218461e-05, + "loss": 0.10712432861328125, + "step": 2113 + }, + { + "epoch": 0.2945725632271999, + "grad_norm": 0.4705880880355835, + "learning_rate": 3.457033472973523e-05, + "loss": 0.09687614440917969, + "step": 2114 + }, + { + "epoch": 0.29471190691841426, + "grad_norm": 0.7594464421272278, + "learning_rate": 3.4563875260090114e-05, + "loss": 0.1034698486328125, + "step": 2115 + }, + { + "epoch": 0.29485125060962863, + "grad_norm": 0.5543537139892578, + "learning_rate": 3.45574125546844e-05, + "loss": 0.10367584228515625, + "step": 2116 + }, + { + "epoch": 0.294990594300843, + "grad_norm": 0.6267674565315247, + "learning_rate": 3.4550946614953984e-05, + "loss": 0.10161018371582031, + "step": 2117 + }, + { + "epoch": 0.2951299379920574, + "grad_norm": 0.5528331398963928, + "learning_rate": 3.454447744233543e-05, + "loss": 0.0946512222290039, + "step": 2118 + }, + { + "epoch": 0.29526928168327177, + "grad_norm": 0.4160483777523041, + "learning_rate": 3.453800503826604e-05, + "loss": 0.08843612670898438, + "step": 2119 + }, + { + "epoch": 0.29540862537448614, + "grad_norm": 0.6785407066345215, + "learning_rate": 3.453152940418384e-05, + "loss": 0.08759498596191406, + "step": 2120 + }, + { + "epoch": 0.2955479690657006, + "grad_norm": 0.32645702362060547, + "learning_rate": 3.4525050541527566e-05, + "loss": 0.0837860107421875, + "step": 2121 + }, + { + "epoch": 0.29568731275691496, + "grad_norm": 0.5568139553070068, + "learning_rate": 3.4518568451736675e-05, + "loss": 0.0971975326538086, + "step": 2122 + }, + { + "epoch": 0.29582665644812933, + "grad_norm": 0.5456815361976624, + "learning_rate": 3.4512083136251346e-05, + "loss": 0.07890892028808594, + "step": 2123 + }, + { + "epoch": 0.2959660001393437, + "grad_norm": 0.8517391681671143, + "learning_rate": 3.450559459651245e-05, + "loss": 0.10771560668945312, + "step": 2124 + }, + { + "epoch": 0.2961053438305581, + "grad_norm": 0.3687277138233185, + "learning_rate": 3.449910283396161e-05, + "loss": 0.08002376556396484, + "step": 2125 + }, + { + "epoch": 0.29624468752177247, + "grad_norm": 0.48090848326683044, + "learning_rate": 3.4492607850041136e-05, + "loss": 0.09629344940185547, + "step": 2126 + }, + { + "epoch": 0.29638403121298684, + "grad_norm": 0.5203213095664978, + "learning_rate": 3.448610964619407e-05, + "loss": 0.09990882873535156, + "step": 2127 + }, + { + "epoch": 0.2965233749042012, + "grad_norm": 0.8020710349082947, + "learning_rate": 3.447960822386417e-05, + "loss": 0.12642669677734375, + "step": 2128 + }, + { + "epoch": 0.2966627185954156, + "grad_norm": 0.6456871628761292, + "learning_rate": 3.4473103584495894e-05, + "loss": 0.11972618103027344, + "step": 2129 + }, + { + "epoch": 0.29680206228663, + "grad_norm": 0.8542104959487915, + "learning_rate": 3.446659572953443e-05, + "loss": 0.11702156066894531, + "step": 2130 + }, + { + "epoch": 0.29694140597784435, + "grad_norm": 0.4246699810028076, + "learning_rate": 3.446008466042566e-05, + "loss": 0.09244537353515625, + "step": 2131 + }, + { + "epoch": 0.29708074966905873, + "grad_norm": 0.6083512902259827, + "learning_rate": 3.445357037861622e-05, + "loss": 0.09647369384765625, + "step": 2132 + }, + { + "epoch": 0.2972200933602731, + "grad_norm": 0.6889880299568176, + "learning_rate": 3.4447052885553424e-05, + "loss": 0.07866287231445312, + "step": 2133 + }, + { + "epoch": 0.2973594370514875, + "grad_norm": 1.0585412979125977, + "learning_rate": 3.44405321826853e-05, + "loss": 0.09650421142578125, + "step": 2134 + }, + { + "epoch": 0.29749878074270186, + "grad_norm": 1.0656801462173462, + "learning_rate": 3.443400827146062e-05, + "loss": 0.08791160583496094, + "step": 2135 + }, + { + "epoch": 0.29763812443391624, + "grad_norm": 0.4876661002635956, + "learning_rate": 3.442748115332882e-05, + "loss": 0.10219287872314453, + "step": 2136 + }, + { + "epoch": 0.2977774681251306, + "grad_norm": 0.7717230916023254, + "learning_rate": 3.442095082974011e-05, + "loss": 0.10959625244140625, + "step": 2137 + }, + { + "epoch": 0.297916811816345, + "grad_norm": 0.5631849765777588, + "learning_rate": 3.441441730214535e-05, + "loss": 0.09054756164550781, + "step": 2138 + }, + { + "epoch": 0.2980561555075594, + "grad_norm": 0.5076965093612671, + "learning_rate": 3.440788057199616e-05, + "loss": 0.08554267883300781, + "step": 2139 + }, + { + "epoch": 0.29819549919877375, + "grad_norm": 0.6352832913398743, + "learning_rate": 3.440134064074483e-05, + "loss": 0.11734390258789062, + "step": 2140 + }, + { + "epoch": 0.2983348428899882, + "grad_norm": 0.46394649147987366, + "learning_rate": 3.4394797509844415e-05, + "loss": 0.08931350708007812, + "step": 2141 + }, + { + "epoch": 0.29847418658120256, + "grad_norm": 0.4499259889125824, + "learning_rate": 3.438825118074863e-05, + "loss": 0.07714366912841797, + "step": 2142 + }, + { + "epoch": 0.29861353027241694, + "grad_norm": 1.2976733446121216, + "learning_rate": 3.4381701654911915e-05, + "loss": 0.11816072463989258, + "step": 2143 + }, + { + "epoch": 0.2987528739636313, + "grad_norm": 0.4784453213214874, + "learning_rate": 3.437514893378943e-05, + "loss": 0.090240478515625, + "step": 2144 + }, + { + "epoch": 0.2988922176548457, + "grad_norm": 0.3874991536140442, + "learning_rate": 3.4368593018837046e-05, + "loss": 0.07796955108642578, + "step": 2145 + }, + { + "epoch": 0.29903156134606007, + "grad_norm": 0.7354505062103271, + "learning_rate": 3.4362033911511336e-05, + "loss": 0.09737014770507812, + "step": 2146 + }, + { + "epoch": 0.29917090503727445, + "grad_norm": 0.9315333366394043, + "learning_rate": 3.435547161326958e-05, + "loss": 0.14058685302734375, + "step": 2147 + }, + { + "epoch": 0.2993102487284888, + "grad_norm": 0.6048086285591125, + "learning_rate": 3.434890612556977e-05, + "loss": 0.12213325500488281, + "step": 2148 + }, + { + "epoch": 0.2994495924197032, + "grad_norm": 0.5281503796577454, + "learning_rate": 3.434233744987061e-05, + "loss": 0.10611534118652344, + "step": 2149 + }, + { + "epoch": 0.2995889361109176, + "grad_norm": 0.6552187204360962, + "learning_rate": 3.433576558763151e-05, + "loss": 0.1031036376953125, + "step": 2150 + }, + { + "epoch": 0.29972827980213196, + "grad_norm": 0.37553954124450684, + "learning_rate": 3.4329190540312596e-05, + "loss": 0.09434318542480469, + "step": 2151 + }, + { + "epoch": 0.29986762349334634, + "grad_norm": 0.35598284006118774, + "learning_rate": 3.432261230937468e-05, + "loss": 0.08730506896972656, + "step": 2152 + }, + { + "epoch": 0.3000069671845607, + "grad_norm": 0.6854313015937805, + "learning_rate": 3.431603089627929e-05, + "loss": 0.12860488891601562, + "step": 2153 + }, + { + "epoch": 0.3001463108757751, + "grad_norm": 0.3890502154827118, + "learning_rate": 3.4309446302488686e-05, + "loss": 0.07787322998046875, + "step": 2154 + }, + { + "epoch": 0.30028565456698947, + "grad_norm": 0.528264045715332, + "learning_rate": 3.4302858529465806e-05, + "loss": 0.10934829711914062, + "step": 2155 + }, + { + "epoch": 0.30042499825820385, + "grad_norm": 0.7069416046142578, + "learning_rate": 3.429626757867429e-05, + "loss": 0.10016059875488281, + "step": 2156 + }, + { + "epoch": 0.3005643419494182, + "grad_norm": 0.8496620059013367, + "learning_rate": 3.428967345157852e-05, + "loss": 0.12720489501953125, + "step": 2157 + }, + { + "epoch": 0.3007036856406326, + "grad_norm": 1.0899163484573364, + "learning_rate": 3.428307614964354e-05, + "loss": 0.10907268524169922, + "step": 2158 + }, + { + "epoch": 0.300843029331847, + "grad_norm": 0.7079445719718933, + "learning_rate": 3.427647567433512e-05, + "loss": 0.13139724731445312, + "step": 2159 + }, + { + "epoch": 0.30098237302306136, + "grad_norm": 1.1602897644042969, + "learning_rate": 3.426987202711976e-05, + "loss": 0.11451148986816406, + "step": 2160 + }, + { + "epoch": 0.3011217167142758, + "grad_norm": 0.877894401550293, + "learning_rate": 3.4263265209464606e-05, + "loss": 0.09314918518066406, + "step": 2161 + }, + { + "epoch": 0.30126106040549017, + "grad_norm": 0.36089757084846497, + "learning_rate": 3.4256655222837574e-05, + "loss": 0.09607696533203125, + "step": 2162 + }, + { + "epoch": 0.30140040409670454, + "grad_norm": 0.339236319065094, + "learning_rate": 3.425004206870723e-05, + "loss": 0.08096504211425781, + "step": 2163 + }, + { + "epoch": 0.3015397477879189, + "grad_norm": 0.9875641465187073, + "learning_rate": 3.424342574854286e-05, + "loss": 0.11868095397949219, + "step": 2164 + }, + { + "epoch": 0.3016790914791333, + "grad_norm": 0.4389139413833618, + "learning_rate": 3.423680626381449e-05, + "loss": 0.09980392456054688, + "step": 2165 + }, + { + "epoch": 0.3018184351703477, + "grad_norm": 0.5438355803489685, + "learning_rate": 3.423018361599279e-05, + "loss": 0.0950164794921875, + "step": 2166 + }, + { + "epoch": 0.30195777886156205, + "grad_norm": 0.6282327175140381, + "learning_rate": 3.4223557806549175e-05, + "loss": 0.12180137634277344, + "step": 2167 + }, + { + "epoch": 0.30209712255277643, + "grad_norm": 0.5196643471717834, + "learning_rate": 3.421692883695574e-05, + "loss": 0.10793685913085938, + "step": 2168 + }, + { + "epoch": 0.3022364662439908, + "grad_norm": 0.6626913547515869, + "learning_rate": 3.4210296708685303e-05, + "loss": 0.10032272338867188, + "step": 2169 + }, + { + "epoch": 0.3023758099352052, + "grad_norm": 0.537963330745697, + "learning_rate": 3.420366142321136e-05, + "loss": 0.08997631072998047, + "step": 2170 + }, + { + "epoch": 0.30251515362641956, + "grad_norm": 0.7282382845878601, + "learning_rate": 3.419702298200812e-05, + "loss": 0.10723495483398438, + "step": 2171 + }, + { + "epoch": 0.30265449731763394, + "grad_norm": 0.4967740476131439, + "learning_rate": 3.41903813865505e-05, + "loss": 0.1079864501953125, + "step": 2172 + }, + { + "epoch": 0.3027938410088483, + "grad_norm": 0.6178718209266663, + "learning_rate": 3.418373663831411e-05, + "loss": 0.08711433410644531, + "step": 2173 + }, + { + "epoch": 0.3029331847000627, + "grad_norm": 0.3703649640083313, + "learning_rate": 3.4177088738775254e-05, + "loss": 0.10692405700683594, + "step": 2174 + }, + { + "epoch": 0.3030725283912771, + "grad_norm": 0.6796919703483582, + "learning_rate": 3.417043768941095e-05, + "loss": 0.10854244232177734, + "step": 2175 + }, + { + "epoch": 0.30321187208249145, + "grad_norm": 1.0287684202194214, + "learning_rate": 3.416378349169891e-05, + "loss": 0.11612701416015625, + "step": 2176 + }, + { + "epoch": 0.30335121577370583, + "grad_norm": 0.5455131530761719, + "learning_rate": 3.415712614711755e-05, + "loss": 0.09951019287109375, + "step": 2177 + }, + { + "epoch": 0.3034905594649202, + "grad_norm": 0.5129433274269104, + "learning_rate": 3.4150465657145964e-05, + "loss": 0.08732986450195312, + "step": 2178 + }, + { + "epoch": 0.3036299031561346, + "grad_norm": 0.28851577639579773, + "learning_rate": 3.414380202326397e-05, + "loss": 0.0754079818725586, + "step": 2179 + }, + { + "epoch": 0.30376924684734896, + "grad_norm": 0.573017418384552, + "learning_rate": 3.413713524695208e-05, + "loss": 0.1007080078125, + "step": 2180 + }, + { + "epoch": 0.30390859053856334, + "grad_norm": 0.46001359820365906, + "learning_rate": 3.413046532969149e-05, + "loss": 0.08643531799316406, + "step": 2181 + }, + { + "epoch": 0.30404793422977777, + "grad_norm": 0.3033980131149292, + "learning_rate": 3.412379227296411e-05, + "loss": 0.07881641387939453, + "step": 2182 + }, + { + "epoch": 0.30418727792099215, + "grad_norm": 0.6827101111412048, + "learning_rate": 3.411711607825253e-05, + "loss": 0.09555244445800781, + "step": 2183 + }, + { + "epoch": 0.3043266216122065, + "grad_norm": 0.4287656843662262, + "learning_rate": 3.411043674704007e-05, + "loss": 0.0908355712890625, + "step": 2184 + }, + { + "epoch": 0.3044659653034209, + "grad_norm": 0.6100982427597046, + "learning_rate": 3.4103754280810705e-05, + "loss": 0.0872344970703125, + "step": 2185 + }, + { + "epoch": 0.3046053089946353, + "grad_norm": 0.8008341789245605, + "learning_rate": 3.409706868104913e-05, + "loss": 0.11306571960449219, + "step": 2186 + }, + { + "epoch": 0.30474465268584966, + "grad_norm": 0.3981601893901825, + "learning_rate": 3.409037994924074e-05, + "loss": 0.076080322265625, + "step": 2187 + }, + { + "epoch": 0.30488399637706404, + "grad_norm": 0.7504094243049622, + "learning_rate": 3.408368808687161e-05, + "loss": 0.10343360900878906, + "step": 2188 + }, + { + "epoch": 0.3050233400682784, + "grad_norm": 0.529603123664856, + "learning_rate": 3.407699309542853e-05, + "loss": 0.10666656494140625, + "step": 2189 + }, + { + "epoch": 0.3051626837594928, + "grad_norm": 0.47010594606399536, + "learning_rate": 3.407029497639896e-05, + "loss": 0.09266853332519531, + "step": 2190 + }, + { + "epoch": 0.30530202745070717, + "grad_norm": 0.5007533431053162, + "learning_rate": 3.406359373127108e-05, + "loss": 0.11820220947265625, + "step": 2191 + }, + { + "epoch": 0.30544137114192155, + "grad_norm": 0.41564491391181946, + "learning_rate": 3.405688936153375e-05, + "loss": 0.07934093475341797, + "step": 2192 + }, + { + "epoch": 0.3055807148331359, + "grad_norm": 0.6206364631652832, + "learning_rate": 3.405018186867653e-05, + "loss": 0.10615730285644531, + "step": 2193 + }, + { + "epoch": 0.3057200585243503, + "grad_norm": 0.3721495270729065, + "learning_rate": 3.404347125418967e-05, + "loss": 0.08855628967285156, + "step": 2194 + }, + { + "epoch": 0.3058594022155647, + "grad_norm": 0.49193429946899414, + "learning_rate": 3.4036757519564116e-05, + "loss": 0.09521484375, + "step": 2195 + }, + { + "epoch": 0.30599874590677906, + "grad_norm": 0.48759907484054565, + "learning_rate": 3.40300406662915e-05, + "loss": 0.10703277587890625, + "step": 2196 + }, + { + "epoch": 0.30613808959799343, + "grad_norm": 0.49241793155670166, + "learning_rate": 3.402332069586416e-05, + "loss": 0.10277938842773438, + "step": 2197 + }, + { + "epoch": 0.3062774332892078, + "grad_norm": 0.42401450872421265, + "learning_rate": 3.401659760977513e-05, + "loss": 0.0828399658203125, + "step": 2198 + }, + { + "epoch": 0.3064167769804222, + "grad_norm": 0.3375469446182251, + "learning_rate": 3.4009871409518104e-05, + "loss": 0.08403301239013672, + "step": 2199 + }, + { + "epoch": 0.30655612067163657, + "grad_norm": 0.5821214914321899, + "learning_rate": 3.40031420965875e-05, + "loss": 0.08715248107910156, + "step": 2200 + }, + { + "epoch": 0.30669546436285094, + "grad_norm": 0.5236865878105164, + "learning_rate": 3.399640967247843e-05, + "loss": 0.08608055114746094, + "step": 2201 + }, + { + "epoch": 0.3068348080540654, + "grad_norm": 0.5465147495269775, + "learning_rate": 3.398967413868666e-05, + "loss": 0.08088493347167969, + "step": 2202 + }, + { + "epoch": 0.30697415174527976, + "grad_norm": 0.6144598126411438, + "learning_rate": 3.3982935496708704e-05, + "loss": 0.09711265563964844, + "step": 2203 + }, + { + "epoch": 0.30711349543649413, + "grad_norm": 0.6020606160163879, + "learning_rate": 3.397619374804171e-05, + "loss": 0.10264015197753906, + "step": 2204 + }, + { + "epoch": 0.3072528391277085, + "grad_norm": 0.4844193756580353, + "learning_rate": 3.3969448894183536e-05, + "loss": 0.08401107788085938, + "step": 2205 + }, + { + "epoch": 0.3073921828189229, + "grad_norm": 1.0908540487289429, + "learning_rate": 3.396270093663276e-05, + "loss": 0.09431648254394531, + "step": 2206 + }, + { + "epoch": 0.30753152651013727, + "grad_norm": 1.044950246810913, + "learning_rate": 3.39559498768886e-05, + "loss": 0.1266326904296875, + "step": 2207 + }, + { + "epoch": 0.30767087020135164, + "grad_norm": 0.4585292935371399, + "learning_rate": 3.3949195716451004e-05, + "loss": 0.10058021545410156, + "step": 2208 + }, + { + "epoch": 0.307810213892566, + "grad_norm": 0.5305322408676147, + "learning_rate": 3.394243845682058e-05, + "loss": 0.09962844848632812, + "step": 2209 + }, + { + "epoch": 0.3079495575837804, + "grad_norm": 0.49544110894203186, + "learning_rate": 3.3935678099498644e-05, + "loss": 0.08480262756347656, + "step": 2210 + }, + { + "epoch": 0.3080889012749948, + "grad_norm": 0.7254745364189148, + "learning_rate": 3.392891464598719e-05, + "loss": 0.10778236389160156, + "step": 2211 + }, + { + "epoch": 0.30822824496620915, + "grad_norm": 0.9944416880607605, + "learning_rate": 3.3922148097788906e-05, + "loss": 0.10288619995117188, + "step": 2212 + }, + { + "epoch": 0.30836758865742353, + "grad_norm": 0.38243958353996277, + "learning_rate": 3.3915378456407167e-05, + "loss": 0.08947944641113281, + "step": 2213 + }, + { + "epoch": 0.3085069323486379, + "grad_norm": 0.4126301109790802, + "learning_rate": 3.390860572334602e-05, + "loss": 0.08646392822265625, + "step": 2214 + }, + { + "epoch": 0.3086462760398523, + "grad_norm": 0.520907998085022, + "learning_rate": 3.390182990011022e-05, + "loss": 0.0814208984375, + "step": 2215 + }, + { + "epoch": 0.30878561973106666, + "grad_norm": 0.6573888659477234, + "learning_rate": 3.389505098820521e-05, + "loss": 0.10614967346191406, + "step": 2216 + }, + { + "epoch": 0.30892496342228104, + "grad_norm": 0.9226203560829163, + "learning_rate": 3.388826898913709e-05, + "loss": 0.1023716926574707, + "step": 2217 + }, + { + "epoch": 0.3090643071134954, + "grad_norm": 0.49294745922088623, + "learning_rate": 3.3881483904412685e-05, + "loss": 0.09208297729492188, + "step": 2218 + }, + { + "epoch": 0.3092036508047098, + "grad_norm": 0.8828102946281433, + "learning_rate": 3.3874695735539467e-05, + "loss": 0.0971832275390625, + "step": 2219 + }, + { + "epoch": 0.3093429944959242, + "grad_norm": 0.526613175868988, + "learning_rate": 3.3867904484025626e-05, + "loss": 0.10667037963867188, + "step": 2220 + }, + { + "epoch": 0.30948233818713855, + "grad_norm": 0.5357423424720764, + "learning_rate": 3.3861110151380015e-05, + "loss": 0.09980392456054688, + "step": 2221 + }, + { + "epoch": 0.309621681878353, + "grad_norm": 0.6962130665779114, + "learning_rate": 3.3854312739112186e-05, + "loss": 0.08608627319335938, + "step": 2222 + }, + { + "epoch": 0.30976102556956736, + "grad_norm": 0.3724520802497864, + "learning_rate": 3.384751224873237e-05, + "loss": 0.09062576293945312, + "step": 2223 + }, + { + "epoch": 0.30990036926078174, + "grad_norm": 0.48805883526802063, + "learning_rate": 3.384070868175146e-05, + "loss": 0.09583759307861328, + "step": 2224 + }, + { + "epoch": 0.3100397129519961, + "grad_norm": 1.1224833726882935, + "learning_rate": 3.383390203968109e-05, + "loss": 0.09821128845214844, + "step": 2225 + }, + { + "epoch": 0.3101790566432105, + "grad_norm": 0.8186205625534058, + "learning_rate": 3.38270923240335e-05, + "loss": 0.09426498413085938, + "step": 2226 + }, + { + "epoch": 0.31031840033442487, + "grad_norm": 0.8999630212783813, + "learning_rate": 3.382027953632169e-05, + "loss": 0.10291671752929688, + "step": 2227 + }, + { + "epoch": 0.31045774402563925, + "grad_norm": 1.2767413854599, + "learning_rate": 3.381346367805928e-05, + "loss": 0.11797904968261719, + "step": 2228 + }, + { + "epoch": 0.3105970877168536, + "grad_norm": 0.5676521062850952, + "learning_rate": 3.3806644750760615e-05, + "loss": 0.07837295532226562, + "step": 2229 + }, + { + "epoch": 0.310736431408068, + "grad_norm": 0.40331146121025085, + "learning_rate": 3.3799822755940694e-05, + "loss": 0.09167289733886719, + "step": 2230 + }, + { + "epoch": 0.3108757750992824, + "grad_norm": 0.3617352843284607, + "learning_rate": 3.379299769511521e-05, + "loss": 0.09654045104980469, + "step": 2231 + }, + { + "epoch": 0.31101511879049676, + "grad_norm": 1.1232472658157349, + "learning_rate": 3.3786169569800534e-05, + "loss": 0.1339282989501953, + "step": 2232 + }, + { + "epoch": 0.31115446248171114, + "grad_norm": 0.6361211538314819, + "learning_rate": 3.377933838151374e-05, + "loss": 0.09234046936035156, + "step": 2233 + }, + { + "epoch": 0.3112938061729255, + "grad_norm": 0.5229412913322449, + "learning_rate": 3.377250413177253e-05, + "loss": 0.09511375427246094, + "step": 2234 + }, + { + "epoch": 0.3114331498641399, + "grad_norm": 0.5465513467788696, + "learning_rate": 3.3765666822095336e-05, + "loss": 0.10082626342773438, + "step": 2235 + }, + { + "epoch": 0.31157249355535427, + "grad_norm": 0.5783780217170715, + "learning_rate": 3.375882645400125e-05, + "loss": 0.11132049560546875, + "step": 2236 + }, + { + "epoch": 0.31171183724656865, + "grad_norm": 0.4736022353172302, + "learning_rate": 3.375198302901004e-05, + "loss": 0.10608386993408203, + "step": 2237 + }, + { + "epoch": 0.311851180937783, + "grad_norm": 0.7147184014320374, + "learning_rate": 3.3745136548642175e-05, + "loss": 0.10601615905761719, + "step": 2238 + }, + { + "epoch": 0.3119905246289974, + "grad_norm": 0.8241739273071289, + "learning_rate": 3.373828701441877e-05, + "loss": 0.10969924926757812, + "step": 2239 + }, + { + "epoch": 0.3121298683202118, + "grad_norm": 0.8350490927696228, + "learning_rate": 3.3731434427861644e-05, + "loss": 0.11039924621582031, + "step": 2240 + }, + { + "epoch": 0.31226921201142616, + "grad_norm": 0.5176172852516174, + "learning_rate": 3.372457879049328e-05, + "loss": 0.083831787109375, + "step": 2241 + }, + { + "epoch": 0.3124085557026406, + "grad_norm": 0.35571184754371643, + "learning_rate": 3.3717720103836846e-05, + "loss": 0.09469223022460938, + "step": 2242 + }, + { + "epoch": 0.31254789939385497, + "grad_norm": 0.8767082095146179, + "learning_rate": 3.371085836941618e-05, + "loss": 0.1347064971923828, + "step": 2243 + }, + { + "epoch": 0.31268724308506934, + "grad_norm": 0.5708740949630737, + "learning_rate": 3.370399358875582e-05, + "loss": 0.08681678771972656, + "step": 2244 + }, + { + "epoch": 0.3128265867762837, + "grad_norm": 1.5869791507720947, + "learning_rate": 3.3697125763380944e-05, + "loss": 0.11468315124511719, + "step": 2245 + }, + { + "epoch": 0.3129659304674981, + "grad_norm": 1.1759752035140991, + "learning_rate": 3.369025489481744e-05, + "loss": 0.12551498413085938, + "step": 2246 + }, + { + "epoch": 0.3131052741587125, + "grad_norm": 0.8986391425132751, + "learning_rate": 3.3683380984591845e-05, + "loss": 0.08970451354980469, + "step": 2247 + }, + { + "epoch": 0.31324461784992685, + "grad_norm": 0.7021854519844055, + "learning_rate": 3.36765040342314e-05, + "loss": 0.13306045532226562, + "step": 2248 + }, + { + "epoch": 0.31338396154114123, + "grad_norm": 0.7171725630760193, + "learning_rate": 3.3669624045264e-05, + "loss": 0.08751678466796875, + "step": 2249 + }, + { + "epoch": 0.3135233052323556, + "grad_norm": 0.5362443327903748, + "learning_rate": 3.3662741019218206e-05, + "loss": 0.09351539611816406, + "step": 2250 + }, + { + "epoch": 0.31366264892357, + "grad_norm": 0.4910133183002472, + "learning_rate": 3.3655854957623295e-05, + "loss": 0.07937240600585938, + "step": 2251 + }, + { + "epoch": 0.31380199261478436, + "grad_norm": 0.7357692718505859, + "learning_rate": 3.3648965862009174e-05, + "loss": 0.10335826873779297, + "step": 2252 + }, + { + "epoch": 0.31394133630599874, + "grad_norm": 1.3043243885040283, + "learning_rate": 3.364207373390645e-05, + "loss": 0.1270580291748047, + "step": 2253 + }, + { + "epoch": 0.3140806799972131, + "grad_norm": 0.4393627345561981, + "learning_rate": 3.3635178574846403e-05, + "loss": 0.0951080322265625, + "step": 2254 + }, + { + "epoch": 0.3142200236884275, + "grad_norm": 0.6365562081336975, + "learning_rate": 3.362828038636097e-05, + "loss": 0.08769416809082031, + "step": 2255 + }, + { + "epoch": 0.3143593673796419, + "grad_norm": 0.6258069276809692, + "learning_rate": 3.3621379169982774e-05, + "loss": 0.08183574676513672, + "step": 2256 + }, + { + "epoch": 0.31449871107085625, + "grad_norm": 0.3630194664001465, + "learning_rate": 3.361447492724511e-05, + "loss": 0.082366943359375, + "step": 2257 + }, + { + "epoch": 0.31463805476207063, + "grad_norm": 0.9087234735488892, + "learning_rate": 3.3607567659681934e-05, + "loss": 0.09379005432128906, + "step": 2258 + }, + { + "epoch": 0.314777398453285, + "grad_norm": 0.815839409828186, + "learning_rate": 3.3600657368827894e-05, + "loss": 0.1160736083984375, + "step": 2259 + }, + { + "epoch": 0.3149167421444994, + "grad_norm": 0.39689868688583374, + "learning_rate": 3.35937440562183e-05, + "loss": 0.08857250213623047, + "step": 2260 + }, + { + "epoch": 0.31505608583571376, + "grad_norm": 0.7062476873397827, + "learning_rate": 3.358682772338912e-05, + "loss": 0.12250137329101562, + "step": 2261 + }, + { + "epoch": 0.3151954295269282, + "grad_norm": 0.6238400936126709, + "learning_rate": 3.357990837187701e-05, + "loss": 0.08953666687011719, + "step": 2262 + }, + { + "epoch": 0.31533477321814257, + "grad_norm": 0.75706547498703, + "learning_rate": 3.35729860032193e-05, + "loss": 0.10445976257324219, + "step": 2263 + }, + { + "epoch": 0.31547411690935695, + "grad_norm": 0.6777775287628174, + "learning_rate": 3.356606061895398e-05, + "loss": 0.11177635192871094, + "step": 2264 + }, + { + "epoch": 0.3156134606005713, + "grad_norm": 0.7969926595687866, + "learning_rate": 3.35591322206197e-05, + "loss": 0.10001277923583984, + "step": 2265 + }, + { + "epoch": 0.3157528042917857, + "grad_norm": 0.7321264743804932, + "learning_rate": 3.355220080975581e-05, + "loss": 0.10354423522949219, + "step": 2266 + }, + { + "epoch": 0.3158921479830001, + "grad_norm": 0.40182194113731384, + "learning_rate": 3.3545266387902295e-05, + "loss": 0.08821487426757812, + "step": 2267 + }, + { + "epoch": 0.31603149167421446, + "grad_norm": 0.5508785247802734, + "learning_rate": 3.353832895659984e-05, + "loss": 0.09101486206054688, + "step": 2268 + }, + { + "epoch": 0.31617083536542884, + "grad_norm": 0.436471551656723, + "learning_rate": 3.353138851738976e-05, + "loss": 0.11338424682617188, + "step": 2269 + }, + { + "epoch": 0.3163101790566432, + "grad_norm": 0.4916110336780548, + "learning_rate": 3.352444507181409e-05, + "loss": 0.09277153015136719, + "step": 2270 + }, + { + "epoch": 0.3164495227478576, + "grad_norm": 0.5680664777755737, + "learning_rate": 3.3517498621415496e-05, + "loss": 0.10187339782714844, + "step": 2271 + }, + { + "epoch": 0.31658886643907197, + "grad_norm": 0.5609452724456787, + "learning_rate": 3.3510549167737316e-05, + "loss": 0.10607147216796875, + "step": 2272 + }, + { + "epoch": 0.31672821013028635, + "grad_norm": 0.37439391016960144, + "learning_rate": 3.350359671232356e-05, + "loss": 0.08568763732910156, + "step": 2273 + }, + { + "epoch": 0.3168675538215007, + "grad_norm": 0.4095214009284973, + "learning_rate": 3.349664125671891e-05, + "loss": 0.1047515869140625, + "step": 2274 + }, + { + "epoch": 0.3170068975127151, + "grad_norm": 0.41139715909957886, + "learning_rate": 3.3489682802468704e-05, + "loss": 0.09209442138671875, + "step": 2275 + }, + { + "epoch": 0.3171462412039295, + "grad_norm": 0.41550368070602417, + "learning_rate": 3.348272135111895e-05, + "loss": 0.09502029418945312, + "step": 2276 + }, + { + "epoch": 0.31728558489514386, + "grad_norm": 0.5404378771781921, + "learning_rate": 3.347575690421633e-05, + "loss": 0.07940387725830078, + "step": 2277 + }, + { + "epoch": 0.31742492858635823, + "grad_norm": 0.5004920959472656, + "learning_rate": 3.346878946330819e-05, + "loss": 0.1176910400390625, + "step": 2278 + }, + { + "epoch": 0.3175642722775726, + "grad_norm": 1.1535298824310303, + "learning_rate": 3.346181902994252e-05, + "loss": 0.1441478729248047, + "step": 2279 + }, + { + "epoch": 0.317703615968787, + "grad_norm": 0.745276689529419, + "learning_rate": 3.3454845605668e-05, + "loss": 0.12178802490234375, + "step": 2280 + }, + { + "epoch": 0.31784295966000137, + "grad_norm": 0.6689409017562866, + "learning_rate": 3.3447869192033974e-05, + "loss": 0.0941314697265625, + "step": 2281 + }, + { + "epoch": 0.3179823033512158, + "grad_norm": 0.6609335541725159, + "learning_rate": 3.344088979059042e-05, + "loss": 0.09619331359863281, + "step": 2282 + }, + { + "epoch": 0.3181216470424302, + "grad_norm": 0.8044742345809937, + "learning_rate": 3.343390740288803e-05, + "loss": 0.08963775634765625, + "step": 2283 + }, + { + "epoch": 0.31826099073364456, + "grad_norm": 0.5809816122055054, + "learning_rate": 3.3426922030478106e-05, + "loss": 0.09812259674072266, + "step": 2284 + }, + { + "epoch": 0.31840033442485893, + "grad_norm": 0.3415715992450714, + "learning_rate": 3.341993367491266e-05, + "loss": 0.0751953125, + "step": 2285 + }, + { + "epoch": 0.3185396781160733, + "grad_norm": 0.4177146255970001, + "learning_rate": 3.3412942337744326e-05, + "loss": 0.10131263732910156, + "step": 2286 + }, + { + "epoch": 0.3186790218072877, + "grad_norm": 0.6629387736320496, + "learning_rate": 3.340594802052642e-05, + "loss": 0.08909225463867188, + "step": 2287 + }, + { + "epoch": 0.31881836549850207, + "grad_norm": 0.41119757294654846, + "learning_rate": 3.339895072481294e-05, + "loss": 0.08612823486328125, + "step": 2288 + }, + { + "epoch": 0.31895770918971644, + "grad_norm": 1.2154244184494019, + "learning_rate": 3.3391950452158504e-05, + "loss": 0.11425399780273438, + "step": 2289 + }, + { + "epoch": 0.3190970528809308, + "grad_norm": 0.4612462520599365, + "learning_rate": 3.338494720411842e-05, + "loss": 0.09807968139648438, + "step": 2290 + }, + { + "epoch": 0.3192363965721452, + "grad_norm": 0.809097409248352, + "learning_rate": 3.337794098224866e-05, + "loss": 0.11237335205078125, + "step": 2291 + }, + { + "epoch": 0.3193757402633596, + "grad_norm": 0.4072269797325134, + "learning_rate": 3.337093178810583e-05, + "loss": 0.1015634536743164, + "step": 2292 + }, + { + "epoch": 0.31951508395457395, + "grad_norm": 0.3978210687637329, + "learning_rate": 3.336391962324722e-05, + "loss": 0.09713935852050781, + "step": 2293 + }, + { + "epoch": 0.31965442764578833, + "grad_norm": 0.6284505128860474, + "learning_rate": 3.3356904489230784e-05, + "loss": 0.09910392761230469, + "step": 2294 + }, + { + "epoch": 0.3197937713370027, + "grad_norm": 0.78513103723526, + "learning_rate": 3.3349886387615096e-05, + "loss": 0.09649085998535156, + "step": 2295 + }, + { + "epoch": 0.3199331150282171, + "grad_norm": 0.520699143409729, + "learning_rate": 3.334286531995945e-05, + "loss": 0.07777023315429688, + "step": 2296 + }, + { + "epoch": 0.32007245871943146, + "grad_norm": 0.4867660105228424, + "learning_rate": 3.3335841287823746e-05, + "loss": 0.10250663757324219, + "step": 2297 + }, + { + "epoch": 0.32021180241064584, + "grad_norm": 0.4976649582386017, + "learning_rate": 3.332881429276857e-05, + "loss": 0.08569526672363281, + "step": 2298 + }, + { + "epoch": 0.3203511461018602, + "grad_norm": 0.46216070652008057, + "learning_rate": 3.3321784336355163e-05, + "loss": 0.08386039733886719, + "step": 2299 + }, + { + "epoch": 0.3204904897930746, + "grad_norm": 0.5650388598442078, + "learning_rate": 3.331475142014542e-05, + "loss": 0.10301399230957031, + "step": 2300 + }, + { + "epoch": 0.320629833484289, + "grad_norm": 0.5254377722740173, + "learning_rate": 3.3307715545701885e-05, + "loss": 0.09463691711425781, + "step": 2301 + }, + { + "epoch": 0.3207691771755034, + "grad_norm": 0.5444901585578918, + "learning_rate": 3.3300676714587784e-05, + "loss": 0.10064888000488281, + "step": 2302 + }, + { + "epoch": 0.3209085208667178, + "grad_norm": 0.8121922016143799, + "learning_rate": 3.329363492836697e-05, + "loss": 0.10265159606933594, + "step": 2303 + }, + { + "epoch": 0.32104786455793216, + "grad_norm": 0.41406163573265076, + "learning_rate": 3.328659018860398e-05, + "loss": 0.09577560424804688, + "step": 2304 + }, + { + "epoch": 0.32118720824914654, + "grad_norm": 0.789827287197113, + "learning_rate": 3.3279542496863984e-05, + "loss": 0.11804962158203125, + "step": 2305 + }, + { + "epoch": 0.3213265519403609, + "grad_norm": 0.37246832251548767, + "learning_rate": 3.3272491854712825e-05, + "loss": 0.08568954467773438, + "step": 2306 + }, + { + "epoch": 0.3214658956315753, + "grad_norm": 0.5241382122039795, + "learning_rate": 3.326543826371699e-05, + "loss": 0.07253837585449219, + "step": 2307 + }, + { + "epoch": 0.32160523932278967, + "grad_norm": 0.5057677030563354, + "learning_rate": 3.3258381725443625e-05, + "loss": 0.0917501449584961, + "step": 2308 + }, + { + "epoch": 0.32174458301400405, + "grad_norm": 0.2883089780807495, + "learning_rate": 3.325132224146054e-05, + "loss": 0.07054519653320312, + "step": 2309 + }, + { + "epoch": 0.3218839267052184, + "grad_norm": 0.5538381338119507, + "learning_rate": 3.3244259813336185e-05, + "loss": 0.10818862915039062, + "step": 2310 + }, + { + "epoch": 0.3220232703964328, + "grad_norm": 0.639905571937561, + "learning_rate": 3.323719444263967e-05, + "loss": 0.09549331665039062, + "step": 2311 + }, + { + "epoch": 0.3221626140876472, + "grad_norm": 0.6212720274925232, + "learning_rate": 3.323012613094075e-05, + "loss": 0.09559965133666992, + "step": 2312 + }, + { + "epoch": 0.32230195777886156, + "grad_norm": 0.5240026712417603, + "learning_rate": 3.322305487980987e-05, + "loss": 0.093658447265625, + "step": 2313 + }, + { + "epoch": 0.32244130147007594, + "grad_norm": 0.4675423204898834, + "learning_rate": 3.3215980690818076e-05, + "loss": 0.09000778198242188, + "step": 2314 + }, + { + "epoch": 0.3225806451612903, + "grad_norm": 0.7503044605255127, + "learning_rate": 3.32089035655371e-05, + "loss": 0.11036324501037598, + "step": 2315 + }, + { + "epoch": 0.3227199888525047, + "grad_norm": 0.40796035528182983, + "learning_rate": 3.320182350553931e-05, + "loss": 0.09159469604492188, + "step": 2316 + }, + { + "epoch": 0.32285933254371907, + "grad_norm": 0.5123752951622009, + "learning_rate": 3.319474051239775e-05, + "loss": 0.10390472412109375, + "step": 2317 + }, + { + "epoch": 0.32299867623493345, + "grad_norm": 0.48697224259376526, + "learning_rate": 3.318765458768608e-05, + "loss": 0.09609603881835938, + "step": 2318 + }, + { + "epoch": 0.3231380199261478, + "grad_norm": 0.8237524628639221, + "learning_rate": 3.318056573297864e-05, + "loss": 0.1055755615234375, + "step": 2319 + }, + { + "epoch": 0.3232773636173622, + "grad_norm": 0.6503774523735046, + "learning_rate": 3.317347394985042e-05, + "loss": 0.11531257629394531, + "step": 2320 + }, + { + "epoch": 0.3234167073085766, + "grad_norm": 0.43967896699905396, + "learning_rate": 3.316637923987704e-05, + "loss": 0.09140586853027344, + "step": 2321 + }, + { + "epoch": 0.323556050999791, + "grad_norm": 0.38754788041114807, + "learning_rate": 3.315928160463478e-05, + "loss": 0.08174324035644531, + "step": 2322 + }, + { + "epoch": 0.3236953946910054, + "grad_norm": 0.3856387436389923, + "learning_rate": 3.3152181045700584e-05, + "loss": 0.09763145446777344, + "step": 2323 + }, + { + "epoch": 0.32383473838221977, + "grad_norm": 0.39927437901496887, + "learning_rate": 3.314507756465202e-05, + "loss": 0.0931854248046875, + "step": 2324 + }, + { + "epoch": 0.32397408207343414, + "grad_norm": 0.6152353882789612, + "learning_rate": 3.313797116306734e-05, + "loss": 0.11178016662597656, + "step": 2325 + }, + { + "epoch": 0.3241134257646485, + "grad_norm": 0.547783613204956, + "learning_rate": 3.3130861842525416e-05, + "loss": 0.09831619262695312, + "step": 2326 + }, + { + "epoch": 0.3242527694558629, + "grad_norm": 1.098124623298645, + "learning_rate": 3.3123749604605765e-05, + "loss": 0.12307357788085938, + "step": 2327 + }, + { + "epoch": 0.3243921131470773, + "grad_norm": 1.0094901323318481, + "learning_rate": 3.311663445088858e-05, + "loss": 0.122314453125, + "step": 2328 + }, + { + "epoch": 0.32453145683829165, + "grad_norm": 0.8977756500244141, + "learning_rate": 3.310951638295467e-05, + "loss": 0.1118621826171875, + "step": 2329 + }, + { + "epoch": 0.32467080052950603, + "grad_norm": 0.6694431304931641, + "learning_rate": 3.310239540238552e-05, + "loss": 0.08796310424804688, + "step": 2330 + }, + { + "epoch": 0.3248101442207204, + "grad_norm": 0.6276755332946777, + "learning_rate": 3.3095271510763234e-05, + "loss": 0.11400985717773438, + "step": 2331 + }, + { + "epoch": 0.3249494879119348, + "grad_norm": 0.5277177095413208, + "learning_rate": 3.3088144709670596e-05, + "loss": 0.09256172180175781, + "step": 2332 + }, + { + "epoch": 0.32508883160314916, + "grad_norm": 0.9122876524925232, + "learning_rate": 3.3081015000691014e-05, + "loss": 0.10659217834472656, + "step": 2333 + }, + { + "epoch": 0.32522817529436354, + "grad_norm": 0.32743483781814575, + "learning_rate": 3.3073882385408535e-05, + "loss": 0.08542633056640625, + "step": 2334 + }, + { + "epoch": 0.3253675189855779, + "grad_norm": 0.4393877685070038, + "learning_rate": 3.306674686540788e-05, + "loss": 0.09752273559570312, + "step": 2335 + }, + { + "epoch": 0.3255068626767923, + "grad_norm": 0.7819922566413879, + "learning_rate": 3.305960844227439e-05, + "loss": 0.1282062530517578, + "step": 2336 + }, + { + "epoch": 0.3256462063680067, + "grad_norm": 0.39942800998687744, + "learning_rate": 3.305246711759406e-05, + "loss": 0.09243965148925781, + "step": 2337 + }, + { + "epoch": 0.32578555005922105, + "grad_norm": 0.6639470458030701, + "learning_rate": 3.3045322892953524e-05, + "loss": 0.09267234802246094, + "step": 2338 + }, + { + "epoch": 0.32592489375043543, + "grad_norm": 0.36642181873321533, + "learning_rate": 3.303817576994008e-05, + "loss": 0.0873270034790039, + "step": 2339 + }, + { + "epoch": 0.3260642374416498, + "grad_norm": 0.4657596945762634, + "learning_rate": 3.303102575014164e-05, + "loss": 0.09372115135192871, + "step": 2340 + }, + { + "epoch": 0.3262035811328642, + "grad_norm": 0.45223748683929443, + "learning_rate": 3.3023872835146775e-05, + "loss": 0.09863471984863281, + "step": 2341 + }, + { + "epoch": 0.3263429248240786, + "grad_norm": 0.4033866226673126, + "learning_rate": 3.301671702654472e-05, + "loss": 0.09305953979492188, + "step": 2342 + }, + { + "epoch": 0.326482268515293, + "grad_norm": 0.4252415597438812, + "learning_rate": 3.300955832592531e-05, + "loss": 0.08015918731689453, + "step": 2343 + }, + { + "epoch": 0.3266216122065074, + "grad_norm": 0.45270392298698425, + "learning_rate": 3.300239673487905e-05, + "loss": 0.1024169921875, + "step": 2344 + }, + { + "epoch": 0.32676095589772175, + "grad_norm": 0.44730111956596375, + "learning_rate": 3.299523225499709e-05, + "loss": 0.094818115234375, + "step": 2345 + }, + { + "epoch": 0.3269002995889361, + "grad_norm": 0.4448380470275879, + "learning_rate": 3.298806488787121e-05, + "loss": 0.08975601196289062, + "step": 2346 + }, + { + "epoch": 0.3270396432801505, + "grad_norm": 0.5814468860626221, + "learning_rate": 3.2980894635093837e-05, + "loss": 0.10102653503417969, + "step": 2347 + }, + { + "epoch": 0.3271789869713649, + "grad_norm": 0.7441441416740417, + "learning_rate": 3.297372149825803e-05, + "loss": 0.10105705261230469, + "step": 2348 + }, + { + "epoch": 0.32731833066257926, + "grad_norm": 0.4186321198940277, + "learning_rate": 3.2966545478957504e-05, + "loss": 0.09266090393066406, + "step": 2349 + }, + { + "epoch": 0.32745767435379364, + "grad_norm": 0.7028970718383789, + "learning_rate": 3.29593665787866e-05, + "loss": 0.11047172546386719, + "step": 2350 + }, + { + "epoch": 0.327597018045008, + "grad_norm": 0.5042015910148621, + "learning_rate": 3.295218479934032e-05, + "loss": 0.110107421875, + "step": 2351 + }, + { + "epoch": 0.3277363617362224, + "grad_norm": 0.7965956330299377, + "learning_rate": 3.2945000142214274e-05, + "loss": 0.10779380798339844, + "step": 2352 + }, + { + "epoch": 0.32787570542743677, + "grad_norm": 0.42018258571624756, + "learning_rate": 3.293781260900473e-05, + "loss": 0.08737754821777344, + "step": 2353 + }, + { + "epoch": 0.32801504911865115, + "grad_norm": 0.3021685779094696, + "learning_rate": 3.29306222013086e-05, + "loss": 0.08080101013183594, + "step": 2354 + }, + { + "epoch": 0.3281543928098655, + "grad_norm": 0.4814617335796356, + "learning_rate": 3.292342892072344e-05, + "loss": 0.11190223693847656, + "step": 2355 + }, + { + "epoch": 0.3282937365010799, + "grad_norm": 0.3892059922218323, + "learning_rate": 3.2916232768847404e-05, + "loss": 0.09463882446289062, + "step": 2356 + }, + { + "epoch": 0.3284330801922943, + "grad_norm": 0.45521971583366394, + "learning_rate": 3.2909033747279344e-05, + "loss": 0.11078643798828125, + "step": 2357 + }, + { + "epoch": 0.32857242388350866, + "grad_norm": 0.6955869793891907, + "learning_rate": 3.29018318576187e-05, + "loss": 0.09880590438842773, + "step": 2358 + }, + { + "epoch": 0.32871176757472303, + "grad_norm": 0.2992124855518341, + "learning_rate": 3.289462710146557e-05, + "loss": 0.07469558715820312, + "step": 2359 + }, + { + "epoch": 0.3288511112659374, + "grad_norm": 0.4626840651035309, + "learning_rate": 3.288741948042069e-05, + "loss": 0.09128189086914062, + "step": 2360 + }, + { + "epoch": 0.3289904549571518, + "grad_norm": 0.5864615440368652, + "learning_rate": 3.288020899608542e-05, + "loss": 0.10052490234375, + "step": 2361 + }, + { + "epoch": 0.32912979864836617, + "grad_norm": 0.5681775808334351, + "learning_rate": 3.287299565006177e-05, + "loss": 0.08657646179199219, + "step": 2362 + }, + { + "epoch": 0.3292691423395806, + "grad_norm": 0.4877817630767822, + "learning_rate": 3.286577944395239e-05, + "loss": 0.11231040954589844, + "step": 2363 + }, + { + "epoch": 0.329408486030795, + "grad_norm": 0.6465470194816589, + "learning_rate": 3.2858560379360546e-05, + "loss": 0.0892634391784668, + "step": 2364 + }, + { + "epoch": 0.32954782972200936, + "grad_norm": 0.7092204689979553, + "learning_rate": 3.2851338457890154e-05, + "loss": 0.10835456848144531, + "step": 2365 + }, + { + "epoch": 0.32968717341322373, + "grad_norm": 0.44038110971450806, + "learning_rate": 3.284411368114575e-05, + "loss": 0.09505462646484375, + "step": 2366 + }, + { + "epoch": 0.3298265171044381, + "grad_norm": 0.4606986939907074, + "learning_rate": 3.283688605073253e-05, + "loss": 0.07994270324707031, + "step": 2367 + }, + { + "epoch": 0.3299658607956525, + "grad_norm": 0.4505268633365631, + "learning_rate": 3.282965556825629e-05, + "loss": 0.10715293884277344, + "step": 2368 + }, + { + "epoch": 0.33010520448686687, + "grad_norm": 1.086889624595642, + "learning_rate": 3.282242223532349e-05, + "loss": 0.1193695068359375, + "step": 2369 + }, + { + "epoch": 0.33024454817808124, + "grad_norm": 1.22451913356781, + "learning_rate": 3.281518605354123e-05, + "loss": 0.11787223815917969, + "step": 2370 + }, + { + "epoch": 0.3303838918692956, + "grad_norm": 0.7493208646774292, + "learning_rate": 3.280794702451719e-05, + "loss": 0.09403228759765625, + "step": 2371 + }, + { + "epoch": 0.33052323556051, + "grad_norm": 0.45919549465179443, + "learning_rate": 3.2800705149859725e-05, + "loss": 0.09326934814453125, + "step": 2372 + }, + { + "epoch": 0.3306625792517244, + "grad_norm": 0.5419887900352478, + "learning_rate": 3.2793460431177827e-05, + "loss": 0.09505844116210938, + "step": 2373 + }, + { + "epoch": 0.33080192294293875, + "grad_norm": 0.9653024673461914, + "learning_rate": 3.27862128700811e-05, + "loss": 0.09304046630859375, + "step": 2374 + }, + { + "epoch": 0.33094126663415313, + "grad_norm": 0.704807698726654, + "learning_rate": 3.277896246817979e-05, + "loss": 0.09473228454589844, + "step": 2375 + }, + { + "epoch": 0.3310806103253675, + "grad_norm": 0.46476882696151733, + "learning_rate": 3.277170922708477e-05, + "loss": 0.09077072143554688, + "step": 2376 + }, + { + "epoch": 0.3312199540165819, + "grad_norm": 0.5176045298576355, + "learning_rate": 3.276445314840754e-05, + "loss": 0.0906524658203125, + "step": 2377 + }, + { + "epoch": 0.33135929770779626, + "grad_norm": 0.7540048360824585, + "learning_rate": 3.275719423376024e-05, + "loss": 0.10770988464355469, + "step": 2378 + }, + { + "epoch": 0.33149864139901064, + "grad_norm": 0.434579998254776, + "learning_rate": 3.274993248475563e-05, + "loss": 0.10025787353515625, + "step": 2379 + }, + { + "epoch": 0.331637985090225, + "grad_norm": 0.295743465423584, + "learning_rate": 3.274266790300711e-05, + "loss": 0.08880615234375, + "step": 2380 + }, + { + "epoch": 0.3317773287814394, + "grad_norm": 0.4134382903575897, + "learning_rate": 3.2735400490128695e-05, + "loss": 0.08333778381347656, + "step": 2381 + }, + { + "epoch": 0.3319166724726538, + "grad_norm": 0.7237659692764282, + "learning_rate": 3.272813024773506e-05, + "loss": 0.11153030395507812, + "step": 2382 + }, + { + "epoch": 0.3320560161638682, + "grad_norm": 0.43105629086494446, + "learning_rate": 3.272085717744146e-05, + "loss": 0.09664535522460938, + "step": 2383 + }, + { + "epoch": 0.3321953598550826, + "grad_norm": 0.5362773537635803, + "learning_rate": 3.271358128086381e-05, + "loss": 0.08672142028808594, + "step": 2384 + }, + { + "epoch": 0.33233470354629696, + "grad_norm": 0.6899456977844238, + "learning_rate": 3.270630255961867e-05, + "loss": 0.12129592895507812, + "step": 2385 + }, + { + "epoch": 0.33247404723751134, + "grad_norm": 0.4811581075191498, + "learning_rate": 3.269902101532319e-05, + "loss": 0.11043167114257812, + "step": 2386 + }, + { + "epoch": 0.3326133909287257, + "grad_norm": 0.4920792281627655, + "learning_rate": 3.269173664959516e-05, + "loss": 0.09455108642578125, + "step": 2387 + }, + { + "epoch": 0.3327527346199401, + "grad_norm": 0.4840618371963501, + "learning_rate": 3.2684449464053006e-05, + "loss": 0.110809326171875, + "step": 2388 + }, + { + "epoch": 0.33289207831115447, + "grad_norm": 0.574271559715271, + "learning_rate": 3.2677159460315766e-05, + "loss": 0.10599136352539062, + "step": 2389 + }, + { + "epoch": 0.33303142200236885, + "grad_norm": 0.7400010824203491, + "learning_rate": 3.2669866640003124e-05, + "loss": 0.12714767456054688, + "step": 2390 + }, + { + "epoch": 0.3331707656935832, + "grad_norm": 0.42334288358688354, + "learning_rate": 3.266257100473538e-05, + "loss": 0.09051513671875, + "step": 2391 + }, + { + "epoch": 0.3333101093847976, + "grad_norm": 0.5489424467086792, + "learning_rate": 3.2655272556133436e-05, + "loss": 0.10000228881835938, + "step": 2392 + }, + { + "epoch": 0.333449453076012, + "grad_norm": 0.39669400453567505, + "learning_rate": 3.264797129581886e-05, + "loss": 0.08823394775390625, + "step": 2393 + }, + { + "epoch": 0.33358879676722636, + "grad_norm": 0.40014517307281494, + "learning_rate": 3.264066722541382e-05, + "loss": 0.10107040405273438, + "step": 2394 + }, + { + "epoch": 0.33372814045844074, + "grad_norm": 0.3657737374305725, + "learning_rate": 3.263336034654112e-05, + "loss": 0.09506797790527344, + "step": 2395 + }, + { + "epoch": 0.3338674841496551, + "grad_norm": 0.662957489490509, + "learning_rate": 3.262605066082417e-05, + "loss": 0.09232902526855469, + "step": 2396 + }, + { + "epoch": 0.3340068278408695, + "grad_norm": 0.33329206705093384, + "learning_rate": 3.261873816988702e-05, + "loss": 0.08463859558105469, + "step": 2397 + }, + { + "epoch": 0.33414617153208387, + "grad_norm": 0.42518696188926697, + "learning_rate": 3.261142287535433e-05, + "loss": 0.08338356018066406, + "step": 2398 + }, + { + "epoch": 0.33428551522329825, + "grad_norm": 0.46677079796791077, + "learning_rate": 3.2604104778851416e-05, + "loss": 0.0917978286743164, + "step": 2399 + }, + { + "epoch": 0.3344248589145126, + "grad_norm": 0.41970348358154297, + "learning_rate": 3.259678388200417e-05, + "loss": 0.08078193664550781, + "step": 2400 + }, + { + "epoch": 0.334564202605727, + "grad_norm": 0.4730377495288849, + "learning_rate": 3.258946018643914e-05, + "loss": 0.08669185638427734, + "step": 2401 + }, + { + "epoch": 0.3347035462969414, + "grad_norm": 0.4105498194694519, + "learning_rate": 3.2582133693783475e-05, + "loss": 0.09211349487304688, + "step": 2402 + }, + { + "epoch": 0.3348428899881558, + "grad_norm": 0.729034960269928, + "learning_rate": 3.257480440566496e-05, + "loss": 0.08756446838378906, + "step": 2403 + }, + { + "epoch": 0.3349822336793702, + "grad_norm": 0.4178294837474823, + "learning_rate": 3.256747232371199e-05, + "loss": 0.08561515808105469, + "step": 2404 + }, + { + "epoch": 0.33512157737058457, + "grad_norm": 0.5534301400184631, + "learning_rate": 3.256013744955359e-05, + "loss": 0.11523246765136719, + "step": 2405 + }, + { + "epoch": 0.33526092106179894, + "grad_norm": 0.5412822961807251, + "learning_rate": 3.25527997848194e-05, + "loss": 0.10440826416015625, + "step": 2406 + }, + { + "epoch": 0.3354002647530133, + "grad_norm": 0.3582060635089874, + "learning_rate": 3.2545459331139694e-05, + "loss": 0.08919525146484375, + "step": 2407 + }, + { + "epoch": 0.3355396084442277, + "grad_norm": 0.5264697670936584, + "learning_rate": 3.253811609014533e-05, + "loss": 0.10633659362792969, + "step": 2408 + }, + { + "epoch": 0.3356789521354421, + "grad_norm": 0.3479005992412567, + "learning_rate": 3.2530770063467835e-05, + "loss": 0.08569717407226562, + "step": 2409 + }, + { + "epoch": 0.33581829582665645, + "grad_norm": 0.4731580913066864, + "learning_rate": 3.2523421252739295e-05, + "loss": 0.11518669128417969, + "step": 2410 + }, + { + "epoch": 0.33595763951787083, + "grad_norm": 0.3838135898113251, + "learning_rate": 3.2516069659592485e-05, + "loss": 0.08365535736083984, + "step": 2411 + }, + { + "epoch": 0.3360969832090852, + "grad_norm": 0.5528795719146729, + "learning_rate": 3.2508715285660734e-05, + "loss": 0.11069679260253906, + "step": 2412 + }, + { + "epoch": 0.3362363269002996, + "grad_norm": 0.6881380081176758, + "learning_rate": 3.250135813257803e-05, + "loss": 0.10944175720214844, + "step": 2413 + }, + { + "epoch": 0.33637567059151396, + "grad_norm": 0.30752477049827576, + "learning_rate": 3.249399820197895e-05, + "loss": 0.07897567749023438, + "step": 2414 + }, + { + "epoch": 0.33651501428272834, + "grad_norm": 0.5265889763832092, + "learning_rate": 3.248663549549872e-05, + "loss": 0.09421157836914062, + "step": 2415 + }, + { + "epoch": 0.3366543579739427, + "grad_norm": 0.4007341265678406, + "learning_rate": 3.247927001477316e-05, + "loss": 0.08371543884277344, + "step": 2416 + }, + { + "epoch": 0.3367937016651571, + "grad_norm": 0.49600914120674133, + "learning_rate": 3.247190176143871e-05, + "loss": 0.11322975158691406, + "step": 2417 + }, + { + "epoch": 0.3369330453563715, + "grad_norm": 0.37522292137145996, + "learning_rate": 3.246453073713242e-05, + "loss": 0.09423065185546875, + "step": 2418 + }, + { + "epoch": 0.33707238904758585, + "grad_norm": 0.39163756370544434, + "learning_rate": 3.245715694349197e-05, + "loss": 0.08798408508300781, + "step": 2419 + }, + { + "epoch": 0.33721173273880023, + "grad_norm": 0.4539620280265808, + "learning_rate": 3.244978038215566e-05, + "loss": 0.10312652587890625, + "step": 2420 + }, + { + "epoch": 0.3373510764300146, + "grad_norm": 0.368399977684021, + "learning_rate": 3.244240105476237e-05, + "loss": 0.09232330322265625, + "step": 2421 + }, + { + "epoch": 0.337490420121229, + "grad_norm": 0.5461012125015259, + "learning_rate": 3.243501896295164e-05, + "loss": 0.08799171447753906, + "step": 2422 + }, + { + "epoch": 0.3376297638124434, + "grad_norm": 0.4502730965614319, + "learning_rate": 3.242763410836358e-05, + "loss": 0.12649822235107422, + "step": 2423 + }, + { + "epoch": 0.3377691075036578, + "grad_norm": 0.4995284676551819, + "learning_rate": 3.242024649263896e-05, + "loss": 0.09730339050292969, + "step": 2424 + }, + { + "epoch": 0.3379084511948722, + "grad_norm": 0.6383172869682312, + "learning_rate": 3.241285611741913e-05, + "loss": 0.11251544952392578, + "step": 2425 + }, + { + "epoch": 0.33804779488608655, + "grad_norm": 0.36442863941192627, + "learning_rate": 3.240546298434606e-05, + "loss": 0.08147430419921875, + "step": 2426 + }, + { + "epoch": 0.3381871385773009, + "grad_norm": 0.5115563869476318, + "learning_rate": 3.2398067095062325e-05, + "loss": 0.11065101623535156, + "step": 2427 + }, + { + "epoch": 0.3383264822685153, + "grad_norm": 0.41427096724510193, + "learning_rate": 3.239066845121114e-05, + "loss": 0.088836669921875, + "step": 2428 + }, + { + "epoch": 0.3384658259597297, + "grad_norm": 0.37318360805511475, + "learning_rate": 3.238326705443631e-05, + "loss": 0.09078407287597656, + "step": 2429 + }, + { + "epoch": 0.33860516965094406, + "grad_norm": 0.6644421815872192, + "learning_rate": 3.237586290638226e-05, + "loss": 0.08630752563476562, + "step": 2430 + }, + { + "epoch": 0.33874451334215844, + "grad_norm": 0.4491874873638153, + "learning_rate": 3.2368456008694014e-05, + "loss": 0.1075592041015625, + "step": 2431 + }, + { + "epoch": 0.3388838570333728, + "grad_norm": 0.7644556164741516, + "learning_rate": 3.2361046363017216e-05, + "loss": 0.12103080749511719, + "step": 2432 + }, + { + "epoch": 0.3390232007245872, + "grad_norm": 0.3858785629272461, + "learning_rate": 3.2353633970998135e-05, + "loss": 0.08921241760253906, + "step": 2433 + }, + { + "epoch": 0.33916254441580157, + "grad_norm": 0.885486900806427, + "learning_rate": 3.2346218834283605e-05, + "loss": 0.09995651245117188, + "step": 2434 + }, + { + "epoch": 0.33930188810701595, + "grad_norm": 0.5907622575759888, + "learning_rate": 3.233880095452113e-05, + "loss": 0.11020851135253906, + "step": 2435 + }, + { + "epoch": 0.3394412317982303, + "grad_norm": 0.5143834352493286, + "learning_rate": 3.2331380333358794e-05, + "loss": 0.11468029022216797, + "step": 2436 + }, + { + "epoch": 0.3395805754894447, + "grad_norm": 0.6746461987495422, + "learning_rate": 3.232395697244526e-05, + "loss": 0.10164642333984375, + "step": 2437 + }, + { + "epoch": 0.3397199191806591, + "grad_norm": 0.65516197681427, + "learning_rate": 3.231653087342986e-05, + "loss": 0.1036224365234375, + "step": 2438 + }, + { + "epoch": 0.33985926287187346, + "grad_norm": 0.4735730290412903, + "learning_rate": 3.230910203796248e-05, + "loss": 0.08969497680664062, + "step": 2439 + }, + { + "epoch": 0.33999860656308784, + "grad_norm": 0.29584819078445435, + "learning_rate": 3.2301670467693654e-05, + "loss": 0.08215713500976562, + "step": 2440 + }, + { + "epoch": 0.3401379502543022, + "grad_norm": 0.3599938452243805, + "learning_rate": 3.22942361642745e-05, + "loss": 0.09741783142089844, + "step": 2441 + }, + { + "epoch": 0.3402772939455166, + "grad_norm": 0.3938923180103302, + "learning_rate": 3.228679912935675e-05, + "loss": 0.0860595703125, + "step": 2442 + }, + { + "epoch": 0.340416637636731, + "grad_norm": 0.5885593295097351, + "learning_rate": 3.227935936459276e-05, + "loss": 0.08869171142578125, + "step": 2443 + }, + { + "epoch": 0.3405559813279454, + "grad_norm": 1.0802149772644043, + "learning_rate": 3.2271916871635455e-05, + "loss": 0.12016868591308594, + "step": 2444 + }, + { + "epoch": 0.3406953250191598, + "grad_norm": 0.7896969318389893, + "learning_rate": 3.226447165213839e-05, + "loss": 0.10231208801269531, + "step": 2445 + }, + { + "epoch": 0.34083466871037416, + "grad_norm": 0.4120590090751648, + "learning_rate": 3.225702370775572e-05, + "loss": 0.10833740234375, + "step": 2446 + }, + { + "epoch": 0.34097401240158853, + "grad_norm": 0.5636406540870667, + "learning_rate": 3.224957304014223e-05, + "loss": 0.1046905517578125, + "step": 2447 + }, + { + "epoch": 0.3411133560928029, + "grad_norm": 0.836674153804779, + "learning_rate": 3.224211965095326e-05, + "loss": 0.10136222839355469, + "step": 2448 + }, + { + "epoch": 0.3412526997840173, + "grad_norm": 1.1104596853256226, + "learning_rate": 3.2234663541844805e-05, + "loss": 0.11059188842773438, + "step": 2449 + }, + { + "epoch": 0.34139204347523167, + "grad_norm": 0.4171888828277588, + "learning_rate": 3.222720471447343e-05, + "loss": 0.09206771850585938, + "step": 2450 + }, + { + "epoch": 0.34153138716644604, + "grad_norm": 0.42294660210609436, + "learning_rate": 3.221974317049632e-05, + "loss": 0.09095001220703125, + "step": 2451 + }, + { + "epoch": 0.3416707308576604, + "grad_norm": 0.6998191475868225, + "learning_rate": 3.221227891157125e-05, + "loss": 0.10368537902832031, + "step": 2452 + }, + { + "epoch": 0.3418100745488748, + "grad_norm": 0.6839171051979065, + "learning_rate": 3.220481193935663e-05, + "loss": 0.087921142578125, + "step": 2453 + }, + { + "epoch": 0.3419494182400892, + "grad_norm": 0.6091374158859253, + "learning_rate": 3.219734225551143e-05, + "loss": 0.09507942199707031, + "step": 2454 + }, + { + "epoch": 0.34208876193130355, + "grad_norm": 0.34874123334884644, + "learning_rate": 3.2189869861695254e-05, + "loss": 0.08126068115234375, + "step": 2455 + }, + { + "epoch": 0.34222810562251793, + "grad_norm": 1.0301343202590942, + "learning_rate": 3.218239475956829e-05, + "loss": 0.09844589233398438, + "step": 2456 + }, + { + "epoch": 0.3423674493137323, + "grad_norm": 0.41472622752189636, + "learning_rate": 3.217491695079134e-05, + "loss": 0.0982666015625, + "step": 2457 + }, + { + "epoch": 0.3425067930049467, + "grad_norm": 0.23753787577152252, + "learning_rate": 3.216743643702581e-05, + "loss": 0.07179069519042969, + "step": 2458 + }, + { + "epoch": 0.34264613669616106, + "grad_norm": 0.5233490467071533, + "learning_rate": 3.215995321993368e-05, + "loss": 0.09961318969726562, + "step": 2459 + }, + { + "epoch": 0.34278548038737544, + "grad_norm": 0.836757242679596, + "learning_rate": 3.215246730117757e-05, + "loss": 0.10308837890625, + "step": 2460 + }, + { + "epoch": 0.3429248240785898, + "grad_norm": 0.4470323920249939, + "learning_rate": 3.2144978682420664e-05, + "loss": 0.09181594848632812, + "step": 2461 + }, + { + "epoch": 0.3430641677698042, + "grad_norm": 0.6449234485626221, + "learning_rate": 3.2137487365326773e-05, + "loss": 0.11301040649414062, + "step": 2462 + }, + { + "epoch": 0.34320351146101863, + "grad_norm": 0.5621340274810791, + "learning_rate": 3.212999335156029e-05, + "loss": 0.10026979446411133, + "step": 2463 + }, + { + "epoch": 0.343342855152233, + "grad_norm": 0.5645102858543396, + "learning_rate": 3.212249664278622e-05, + "loss": 0.09337997436523438, + "step": 2464 + }, + { + "epoch": 0.3434821988434474, + "grad_norm": 0.5579472780227661, + "learning_rate": 3.211499724067016e-05, + "loss": 0.09551048278808594, + "step": 2465 + }, + { + "epoch": 0.34362154253466176, + "grad_norm": 0.6934046149253845, + "learning_rate": 3.2107495146878295e-05, + "loss": 0.08920955657958984, + "step": 2466 + }, + { + "epoch": 0.34376088622587614, + "grad_norm": 0.6692370772361755, + "learning_rate": 3.2099990363077434e-05, + "loss": 0.09684371948242188, + "step": 2467 + }, + { + "epoch": 0.3439002299170905, + "grad_norm": 0.570891261100769, + "learning_rate": 3.209248289093496e-05, + "loss": 0.11626338958740234, + "step": 2468 + }, + { + "epoch": 0.3440395736083049, + "grad_norm": 0.8736332058906555, + "learning_rate": 3.208497273211886e-05, + "loss": 0.09200859069824219, + "step": 2469 + }, + { + "epoch": 0.34417891729951927, + "grad_norm": 0.40048038959503174, + "learning_rate": 3.207745988829773e-05, + "loss": 0.08631324768066406, + "step": 2470 + }, + { + "epoch": 0.34431826099073365, + "grad_norm": 0.5190144181251526, + "learning_rate": 3.206994436114074e-05, + "loss": 0.0774850845336914, + "step": 2471 + }, + { + "epoch": 0.344457604681948, + "grad_norm": 0.42077571153640747, + "learning_rate": 3.206242615231768e-05, + "loss": 0.07711982727050781, + "step": 2472 + }, + { + "epoch": 0.3445969483731624, + "grad_norm": 0.8102888464927673, + "learning_rate": 3.2054905263498916e-05, + "loss": 0.09466934204101562, + "step": 2473 + }, + { + "epoch": 0.3447362920643768, + "grad_norm": 0.7880376577377319, + "learning_rate": 3.2047381696355424e-05, + "loss": 0.089996337890625, + "step": 2474 + }, + { + "epoch": 0.34487563575559116, + "grad_norm": 0.3588189482688904, + "learning_rate": 3.2039855452558755e-05, + "loss": 0.07999610900878906, + "step": 2475 + }, + { + "epoch": 0.34501497944680554, + "grad_norm": 0.622969925403595, + "learning_rate": 3.203232653378109e-05, + "loss": 0.10825347900390625, + "step": 2476 + }, + { + "epoch": 0.3451543231380199, + "grad_norm": 0.327957421541214, + "learning_rate": 3.202479494169516e-05, + "loss": 0.08006858825683594, + "step": 2477 + }, + { + "epoch": 0.3452936668292343, + "grad_norm": 0.4184449017047882, + "learning_rate": 3.2017260677974346e-05, + "loss": 0.08614349365234375, + "step": 2478 + }, + { + "epoch": 0.34543301052044867, + "grad_norm": 0.49727723002433777, + "learning_rate": 3.200972374429255e-05, + "loss": 0.0896148681640625, + "step": 2479 + }, + { + "epoch": 0.34557235421166305, + "grad_norm": 0.729845404624939, + "learning_rate": 3.200218414232433e-05, + "loss": 0.09932804107666016, + "step": 2480 + }, + { + "epoch": 0.3457116979028774, + "grad_norm": 0.3942984342575073, + "learning_rate": 3.199464187374481e-05, + "loss": 0.08500099182128906, + "step": 2481 + }, + { + "epoch": 0.3458510415940918, + "grad_norm": 0.6143445372581482, + "learning_rate": 3.19870969402297e-05, + "loss": 0.11649322509765625, + "step": 2482 + }, + { + "epoch": 0.34599038528530623, + "grad_norm": 0.6522253751754761, + "learning_rate": 3.197954934345533e-05, + "loss": 0.09255790710449219, + "step": 2483 + }, + { + "epoch": 0.3461297289765206, + "grad_norm": 0.42800357937812805, + "learning_rate": 3.1971999085098583e-05, + "loss": 0.08526229858398438, + "step": 2484 + }, + { + "epoch": 0.346269072667735, + "grad_norm": 0.30015677213668823, + "learning_rate": 3.196444616683698e-05, + "loss": 0.08616065979003906, + "step": 2485 + }, + { + "epoch": 0.34640841635894937, + "grad_norm": 0.658868670463562, + "learning_rate": 3.195689059034858e-05, + "loss": 0.10631752014160156, + "step": 2486 + }, + { + "epoch": 0.34654776005016374, + "grad_norm": 0.8827950358390808, + "learning_rate": 3.194933235731207e-05, + "loss": 0.11169242858886719, + "step": 2487 + }, + { + "epoch": 0.3466871037413781, + "grad_norm": 0.48278310894966125, + "learning_rate": 3.194177146940673e-05, + "loss": 0.08446788787841797, + "step": 2488 + }, + { + "epoch": 0.3468264474325925, + "grad_norm": 0.7518747448921204, + "learning_rate": 3.193420792831239e-05, + "loss": 0.08782768249511719, + "step": 2489 + }, + { + "epoch": 0.3469657911238069, + "grad_norm": 0.6589075922966003, + "learning_rate": 3.192664173570952e-05, + "loss": 0.09102821350097656, + "step": 2490 + }, + { + "epoch": 0.34710513481502125, + "grad_norm": 0.4626648426055908, + "learning_rate": 3.1919072893279144e-05, + "loss": 0.09703636169433594, + "step": 2491 + }, + { + "epoch": 0.34724447850623563, + "grad_norm": 0.390327125787735, + "learning_rate": 3.1911501402702886e-05, + "loss": 0.08299064636230469, + "step": 2492 + }, + { + "epoch": 0.34738382219745, + "grad_norm": 0.5017620325088501, + "learning_rate": 3.1903927265662965e-05, + "loss": 0.09414386749267578, + "step": 2493 + }, + { + "epoch": 0.3475231658886644, + "grad_norm": 0.6850275993347168, + "learning_rate": 3.189635048384217e-05, + "loss": 0.10660743713378906, + "step": 2494 + }, + { + "epoch": 0.34766250957987876, + "grad_norm": 0.7754870057106018, + "learning_rate": 3.18887710589239e-05, + "loss": 0.12614822387695312, + "step": 2495 + }, + { + "epoch": 0.34780185327109314, + "grad_norm": 0.5521329641342163, + "learning_rate": 3.188118899259213e-05, + "loss": 0.10074329376220703, + "step": 2496 + }, + { + "epoch": 0.3479411969623075, + "grad_norm": 0.32712846994400024, + "learning_rate": 3.1873604286531415e-05, + "loss": 0.07354736328125, + "step": 2497 + }, + { + "epoch": 0.3480805406535219, + "grad_norm": 0.7212967276573181, + "learning_rate": 3.18660169424269e-05, + "loss": 0.09866046905517578, + "step": 2498 + }, + { + "epoch": 0.3482198843447363, + "grad_norm": 0.4247094392776489, + "learning_rate": 3.185842696196434e-05, + "loss": 0.09700393676757812, + "step": 2499 + }, + { + "epoch": 0.34835922803595065, + "grad_norm": 0.44107261300086975, + "learning_rate": 3.185083434683003e-05, + "loss": 0.08858394622802734, + "step": 2500 + }, + { + "epoch": 0.34849857172716503, + "grad_norm": 0.366940438747406, + "learning_rate": 3.184323909871089e-05, + "loss": 0.09437179565429688, + "step": 2501 + }, + { + "epoch": 0.3486379154183794, + "grad_norm": 0.49815887212753296, + "learning_rate": 3.1835641219294414e-05, + "loss": 0.10512733459472656, + "step": 2502 + }, + { + "epoch": 0.34877725910959384, + "grad_norm": 0.6216877698898315, + "learning_rate": 3.182804071026867e-05, + "loss": 0.09451484680175781, + "step": 2503 + }, + { + "epoch": 0.3489166028008082, + "grad_norm": 0.5617043972015381, + "learning_rate": 3.1820437573322306e-05, + "loss": 0.09344482421875, + "step": 2504 + }, + { + "epoch": 0.3490559464920226, + "grad_norm": 0.349010705947876, + "learning_rate": 3.181283181014459e-05, + "loss": 0.08999824523925781, + "step": 2505 + }, + { + "epoch": 0.349195290183237, + "grad_norm": 0.26771077513694763, + "learning_rate": 3.1805223422425334e-05, + "loss": 0.079345703125, + "step": 2506 + }, + { + "epoch": 0.34933463387445135, + "grad_norm": 0.5876293182373047, + "learning_rate": 3.179761241185495e-05, + "loss": 0.103363037109375, + "step": 2507 + }, + { + "epoch": 0.3494739775656657, + "grad_norm": 0.48751550912857056, + "learning_rate": 3.178999878012443e-05, + "loss": 0.09234428405761719, + "step": 2508 + }, + { + "epoch": 0.3496133212568801, + "grad_norm": 0.5078559517860413, + "learning_rate": 3.178238252892536e-05, + "loss": 0.08591270446777344, + "step": 2509 + }, + { + "epoch": 0.3497526649480945, + "grad_norm": 0.4036638140678406, + "learning_rate": 3.177476365994989e-05, + "loss": 0.08040237426757812, + "step": 2510 + }, + { + "epoch": 0.34989200863930886, + "grad_norm": 0.41222885251045227, + "learning_rate": 3.1767142174890746e-05, + "loss": 0.0978231430053711, + "step": 2511 + }, + { + "epoch": 0.35003135233052324, + "grad_norm": 0.4188211262226105, + "learning_rate": 3.175951807544126e-05, + "loss": 0.09803009033203125, + "step": 2512 + }, + { + "epoch": 0.3501706960217376, + "grad_norm": 0.5038278698921204, + "learning_rate": 3.1751891363295344e-05, + "loss": 0.09929847717285156, + "step": 2513 + }, + { + "epoch": 0.350310039712952, + "grad_norm": 0.551529586315155, + "learning_rate": 3.1744262040147454e-05, + "loss": 0.13569259643554688, + "step": 2514 + }, + { + "epoch": 0.35044938340416637, + "grad_norm": 0.40178897976875305, + "learning_rate": 3.173663010769267e-05, + "loss": 0.09111785888671875, + "step": 2515 + }, + { + "epoch": 0.35058872709538075, + "grad_norm": 0.3263823091983795, + "learning_rate": 3.172899556762663e-05, + "loss": 0.07968330383300781, + "step": 2516 + }, + { + "epoch": 0.3507280707865951, + "grad_norm": 0.6626636981964111, + "learning_rate": 3.172135842164555e-05, + "loss": 0.09186744689941406, + "step": 2517 + }, + { + "epoch": 0.3508674144778095, + "grad_norm": 1.4507453441619873, + "learning_rate": 3.171371867144624e-05, + "loss": 0.12017440795898438, + "step": 2518 + }, + { + "epoch": 0.3510067581690239, + "grad_norm": 0.600793182849884, + "learning_rate": 3.1706076318726056e-05, + "loss": 0.10542678833007812, + "step": 2519 + }, + { + "epoch": 0.35114610186023826, + "grad_norm": 0.6299943923950195, + "learning_rate": 3.1698431365182974e-05, + "loss": 0.12064361572265625, + "step": 2520 + }, + { + "epoch": 0.35128544555145264, + "grad_norm": 0.5295590162277222, + "learning_rate": 3.169078381251552e-05, + "loss": 0.07940292358398438, + "step": 2521 + }, + { + "epoch": 0.351424789242667, + "grad_norm": 0.785427987575531, + "learning_rate": 3.168313366242281e-05, + "loss": 0.07431316375732422, + "step": 2522 + }, + { + "epoch": 0.35156413293388145, + "grad_norm": 0.39260411262512207, + "learning_rate": 3.167548091660454e-05, + "loss": 0.10193061828613281, + "step": 2523 + }, + { + "epoch": 0.3517034766250958, + "grad_norm": 0.41282394528388977, + "learning_rate": 3.166782557676095e-05, + "loss": 0.09600067138671875, + "step": 2524 + }, + { + "epoch": 0.3518428203163102, + "grad_norm": 0.3696941137313843, + "learning_rate": 3.1660167644592915e-05, + "loss": 0.08517074584960938, + "step": 2525 + }, + { + "epoch": 0.3519821640075246, + "grad_norm": 0.6783550381660461, + "learning_rate": 3.165250712180182e-05, + "loss": 0.10532760620117188, + "step": 2526 + }, + { + "epoch": 0.35212150769873896, + "grad_norm": 0.46508872509002686, + "learning_rate": 3.1644844010089686e-05, + "loss": 0.08648490905761719, + "step": 2527 + }, + { + "epoch": 0.35226085138995333, + "grad_norm": 0.3848843574523926, + "learning_rate": 3.163717831115906e-05, + "loss": 0.08680152893066406, + "step": 2528 + }, + { + "epoch": 0.3524001950811677, + "grad_norm": 0.3898068964481354, + "learning_rate": 3.1629510026713095e-05, + "loss": 0.06781291961669922, + "step": 2529 + }, + { + "epoch": 0.3525395387723821, + "grad_norm": 0.42753472924232483, + "learning_rate": 3.162183915845551e-05, + "loss": 0.10159111022949219, + "step": 2530 + }, + { + "epoch": 0.35267888246359647, + "grad_norm": 0.5769897103309631, + "learning_rate": 3.16141657080906e-05, + "loss": 0.0869140625, + "step": 2531 + }, + { + "epoch": 0.35281822615481084, + "grad_norm": 0.5728771090507507, + "learning_rate": 3.160648967732322e-05, + "loss": 0.10646438598632812, + "step": 2532 + }, + { + "epoch": 0.3529575698460252, + "grad_norm": 0.3244978189468384, + "learning_rate": 3.159881106785882e-05, + "loss": 0.08256721496582031, + "step": 2533 + }, + { + "epoch": 0.3530969135372396, + "grad_norm": 0.3691733479499817, + "learning_rate": 3.15911298814034e-05, + "loss": 0.09164047241210938, + "step": 2534 + }, + { + "epoch": 0.353236257228454, + "grad_norm": 0.44982120394706726, + "learning_rate": 3.1583446119663555e-05, + "loss": 0.10359764099121094, + "step": 2535 + }, + { + "epoch": 0.35337560091966835, + "grad_norm": 0.4273984730243683, + "learning_rate": 3.1575759784346436e-05, + "loss": 0.08274221420288086, + "step": 2536 + }, + { + "epoch": 0.35351494461088273, + "grad_norm": 0.4406343400478363, + "learning_rate": 3.1568070877159766e-05, + "loss": 0.08126068115234375, + "step": 2537 + }, + { + "epoch": 0.3536542883020971, + "grad_norm": 0.47040992975234985, + "learning_rate": 3.1560379399811856e-05, + "loss": 0.08247566223144531, + "step": 2538 + }, + { + "epoch": 0.3537936319933115, + "grad_norm": 0.406099408864975, + "learning_rate": 3.155268535401157e-05, + "loss": 0.09544944763183594, + "step": 2539 + }, + { + "epoch": 0.35393297568452586, + "grad_norm": 0.3819940984249115, + "learning_rate": 3.1544988741468353e-05, + "loss": 0.10126495361328125, + "step": 2540 + }, + { + "epoch": 0.35407231937574024, + "grad_norm": 0.43441683053970337, + "learning_rate": 3.153728956389221e-05, + "loss": 0.0829010009765625, + "step": 2541 + }, + { + "epoch": 0.3542116630669546, + "grad_norm": 0.39023691415786743, + "learning_rate": 3.152958782299373e-05, + "loss": 0.089752197265625, + "step": 2542 + }, + { + "epoch": 0.35435100675816905, + "grad_norm": 0.3187289535999298, + "learning_rate": 3.152188352048406e-05, + "loss": 0.08340263366699219, + "step": 2543 + }, + { + "epoch": 0.35449035044938343, + "grad_norm": 0.4141504764556885, + "learning_rate": 3.1514176658074925e-05, + "loss": 0.10068798065185547, + "step": 2544 + }, + { + "epoch": 0.3546296941405978, + "grad_norm": 0.35704895853996277, + "learning_rate": 3.15064672374786e-05, + "loss": 0.07587051391601562, + "step": 2545 + }, + { + "epoch": 0.3547690378318122, + "grad_norm": 0.5955180525779724, + "learning_rate": 3.149875526040796e-05, + "loss": 0.09829330444335938, + "step": 2546 + }, + { + "epoch": 0.35490838152302656, + "grad_norm": 0.47540855407714844, + "learning_rate": 3.1491040728576416e-05, + "loss": 0.10313129425048828, + "step": 2547 + }, + { + "epoch": 0.35504772521424094, + "grad_norm": 0.43991246819496155, + "learning_rate": 3.1483323643697965e-05, + "loss": 0.08271217346191406, + "step": 2548 + }, + { + "epoch": 0.3551870689054553, + "grad_norm": 0.43044447898864746, + "learning_rate": 3.147560400748719e-05, + "loss": 0.08501052856445312, + "step": 2549 + }, + { + "epoch": 0.3553264125966697, + "grad_norm": 0.5553862452507019, + "learning_rate": 3.146788182165917e-05, + "loss": 0.10389518737792969, + "step": 2550 + }, + { + "epoch": 0.35546575628788407, + "grad_norm": 0.35470104217529297, + "learning_rate": 3.146015708792964e-05, + "loss": 0.07456588745117188, + "step": 2551 + }, + { + "epoch": 0.35560509997909845, + "grad_norm": 0.7183926701545715, + "learning_rate": 3.1452429808014845e-05, + "loss": 0.09378814697265625, + "step": 2552 + }, + { + "epoch": 0.3557444436703128, + "grad_norm": 0.5061410665512085, + "learning_rate": 3.1444699983631604e-05, + "loss": 0.11388206481933594, + "step": 2553 + }, + { + "epoch": 0.3558837873615272, + "grad_norm": 0.6117453575134277, + "learning_rate": 3.143696761649732e-05, + "loss": 0.10689449310302734, + "step": 2554 + }, + { + "epoch": 0.3560231310527416, + "grad_norm": 0.6196308135986328, + "learning_rate": 3.1429232708329935e-05, + "loss": 0.12854766845703125, + "step": 2555 + }, + { + "epoch": 0.35616247474395596, + "grad_norm": 0.6860495805740356, + "learning_rate": 3.142149526084798e-05, + "loss": 0.08374786376953125, + "step": 2556 + }, + { + "epoch": 0.35630181843517034, + "grad_norm": 0.40438172221183777, + "learning_rate": 3.1413755275770533e-05, + "loss": 0.0809783935546875, + "step": 2557 + }, + { + "epoch": 0.3564411621263847, + "grad_norm": 0.5380021929740906, + "learning_rate": 3.1406012754817246e-05, + "loss": 0.11009407043457031, + "step": 2558 + }, + { + "epoch": 0.3565805058175991, + "grad_norm": 0.5174099802970886, + "learning_rate": 3.139826769970833e-05, + "loss": 0.11548805236816406, + "step": 2559 + }, + { + "epoch": 0.35671984950881347, + "grad_norm": 0.7302741408348083, + "learning_rate": 3.139052011216456e-05, + "loss": 0.10605049133300781, + "step": 2560 + }, + { + "epoch": 0.35685919320002785, + "grad_norm": 0.5335695147514343, + "learning_rate": 3.138276999390726e-05, + "loss": 0.09950447082519531, + "step": 2561 + }, + { + "epoch": 0.3569985368912422, + "grad_norm": 0.6066809892654419, + "learning_rate": 3.1375017346658354e-05, + "loss": 0.1013040542602539, + "step": 2562 + }, + { + "epoch": 0.3571378805824566, + "grad_norm": 0.5961622595787048, + "learning_rate": 3.136726217214028e-05, + "loss": 0.10289192199707031, + "step": 2563 + }, + { + "epoch": 0.35727722427367103, + "grad_norm": 0.5314041376113892, + "learning_rate": 3.1359504472076074e-05, + "loss": 0.1021270751953125, + "step": 2564 + }, + { + "epoch": 0.3574165679648854, + "grad_norm": 0.4799005091190338, + "learning_rate": 3.1351744248189314e-05, + "loss": 0.0928955078125, + "step": 2565 + }, + { + "epoch": 0.3575559116560998, + "grad_norm": 0.6831679344177246, + "learning_rate": 3.134398150220415e-05, + "loss": 0.09646415710449219, + "step": 2566 + }, + { + "epoch": 0.35769525534731417, + "grad_norm": 0.6177690029144287, + "learning_rate": 3.133621623584528e-05, + "loss": 0.10936355590820312, + "step": 2567 + }, + { + "epoch": 0.35783459903852854, + "grad_norm": 0.3529089093208313, + "learning_rate": 3.132844845083798e-05, + "loss": 0.09012222290039062, + "step": 2568 + }, + { + "epoch": 0.3579739427297429, + "grad_norm": 0.531903862953186, + "learning_rate": 3.132067814890806e-05, + "loss": 0.0753936767578125, + "step": 2569 + }, + { + "epoch": 0.3581132864209573, + "grad_norm": 0.634239137172699, + "learning_rate": 3.1312905331781914e-05, + "loss": 0.10400581359863281, + "step": 2570 + }, + { + "epoch": 0.3582526301121717, + "grad_norm": 0.4712630808353424, + "learning_rate": 3.130513000118648e-05, + "loss": 0.10760116577148438, + "step": 2571 + }, + { + "epoch": 0.35839197380338605, + "grad_norm": 0.6282480955123901, + "learning_rate": 3.1297352158849264e-05, + "loss": 0.125091552734375, + "step": 2572 + }, + { + "epoch": 0.35853131749460043, + "grad_norm": 0.4568396508693695, + "learning_rate": 3.128957180649832e-05, + "loss": 0.08289718627929688, + "step": 2573 + }, + { + "epoch": 0.3586706611858148, + "grad_norm": 1.524487853050232, + "learning_rate": 3.128178894586226e-05, + "loss": 0.13258743286132812, + "step": 2574 + }, + { + "epoch": 0.3588100048770292, + "grad_norm": 0.5153091549873352, + "learning_rate": 3.1274003578670264e-05, + "loss": 0.10664176940917969, + "step": 2575 + }, + { + "epoch": 0.35894934856824356, + "grad_norm": 1.0039058923721313, + "learning_rate": 3.126621570665207e-05, + "loss": 0.10049247741699219, + "step": 2576 + }, + { + "epoch": 0.35908869225945794, + "grad_norm": 1.613863229751587, + "learning_rate": 3.125842533153796e-05, + "loss": 0.11232948303222656, + "step": 2577 + }, + { + "epoch": 0.3592280359506723, + "grad_norm": 0.5973495841026306, + "learning_rate": 3.1250632455058764e-05, + "loss": 0.10900688171386719, + "step": 2578 + }, + { + "epoch": 0.3593673796418867, + "grad_norm": 0.6121526956558228, + "learning_rate": 3.12428370789459e-05, + "loss": 0.10919570922851562, + "step": 2579 + }, + { + "epoch": 0.3595067233331011, + "grad_norm": 1.107312560081482, + "learning_rate": 3.1235039204931316e-05, + "loss": 0.10827827453613281, + "step": 2580 + }, + { + "epoch": 0.35964606702431545, + "grad_norm": 0.8499792814254761, + "learning_rate": 3.122723883474752e-05, + "loss": 0.09174346923828125, + "step": 2581 + }, + { + "epoch": 0.35978541071552983, + "grad_norm": 0.46599119901657104, + "learning_rate": 3.1219435970127574e-05, + "loss": 0.10707855224609375, + "step": 2582 + }, + { + "epoch": 0.3599247544067442, + "grad_norm": 0.4361051619052887, + "learning_rate": 3.12116306128051e-05, + "loss": 0.087554931640625, + "step": 2583 + }, + { + "epoch": 0.36006409809795864, + "grad_norm": 0.35114380717277527, + "learning_rate": 3.1203822764514274e-05, + "loss": 0.09110069274902344, + "step": 2584 + }, + { + "epoch": 0.360203441789173, + "grad_norm": 0.5652362108230591, + "learning_rate": 3.1196012426989814e-05, + "loss": 0.13123703002929688, + "step": 2585 + }, + { + "epoch": 0.3603427854803874, + "grad_norm": 0.488444983959198, + "learning_rate": 3.1188199601967e-05, + "loss": 0.10687637329101562, + "step": 2586 + }, + { + "epoch": 0.3604821291716018, + "grad_norm": 0.5584848523139954, + "learning_rate": 3.118038429118167e-05, + "loss": 0.07702445983886719, + "step": 2587 + }, + { + "epoch": 0.36062147286281615, + "grad_norm": 0.9693449139595032, + "learning_rate": 3.1172566496370205e-05, + "loss": 0.09733772277832031, + "step": 2588 + }, + { + "epoch": 0.36076081655403053, + "grad_norm": 0.8434732556343079, + "learning_rate": 3.116474621926953e-05, + "loss": 0.09353828430175781, + "step": 2589 + }, + { + "epoch": 0.3609001602452449, + "grad_norm": 0.34158191084861755, + "learning_rate": 3.115692346161715e-05, + "loss": 0.07790756225585938, + "step": 2590 + }, + { + "epoch": 0.3610395039364593, + "grad_norm": 0.6178411841392517, + "learning_rate": 3.1149098225151086e-05, + "loss": 0.09272575378417969, + "step": 2591 + }, + { + "epoch": 0.36117884762767366, + "grad_norm": 0.4924629032611847, + "learning_rate": 3.114127051160994e-05, + "loss": 0.10402679443359375, + "step": 2592 + }, + { + "epoch": 0.36131819131888804, + "grad_norm": 0.4220390319824219, + "learning_rate": 3.1133440322732846e-05, + "loss": 0.08599472045898438, + "step": 2593 + }, + { + "epoch": 0.3614575350101024, + "grad_norm": 0.5129683017730713, + "learning_rate": 3.112560766025949e-05, + "loss": 0.09280967712402344, + "step": 2594 + }, + { + "epoch": 0.3615968787013168, + "grad_norm": 0.7852360606193542, + "learning_rate": 3.1117772525930115e-05, + "loss": 0.10050582885742188, + "step": 2595 + }, + { + "epoch": 0.36173622239253117, + "grad_norm": 0.5825809240341187, + "learning_rate": 3.11099349214855e-05, + "loss": 0.09610652923583984, + "step": 2596 + }, + { + "epoch": 0.36187556608374555, + "grad_norm": 0.4947592616081238, + "learning_rate": 3.1102094848667e-05, + "loss": 0.08903121948242188, + "step": 2597 + }, + { + "epoch": 0.3620149097749599, + "grad_norm": 0.41364240646362305, + "learning_rate": 3.109425230921649e-05, + "loss": 0.09671783447265625, + "step": 2598 + }, + { + "epoch": 0.3621542534661743, + "grad_norm": 0.38247090578079224, + "learning_rate": 3.10864073048764e-05, + "loss": 0.07243537902832031, + "step": 2599 + }, + { + "epoch": 0.3622935971573887, + "grad_norm": 0.5047247409820557, + "learning_rate": 3.107855983738971e-05, + "loss": 0.08980560302734375, + "step": 2600 + }, + { + "epoch": 0.36243294084860306, + "grad_norm": 0.62384033203125, + "learning_rate": 3.107070990849995e-05, + "loss": 0.08967781066894531, + "step": 2601 + }, + { + "epoch": 0.36257228453981744, + "grad_norm": 0.6264933943748474, + "learning_rate": 3.1062857519951215e-05, + "loss": 0.12082481384277344, + "step": 2602 + }, + { + "epoch": 0.3627116282310318, + "grad_norm": 0.5413516759872437, + "learning_rate": 3.105500267348809e-05, + "loss": 0.11745834350585938, + "step": 2603 + }, + { + "epoch": 0.36285097192224625, + "grad_norm": 0.33942386507987976, + "learning_rate": 3.1047145370855764e-05, + "loss": 0.09207916259765625, + "step": 2604 + }, + { + "epoch": 0.3629903156134606, + "grad_norm": 0.5689101815223694, + "learning_rate": 3.103928561379996e-05, + "loss": 0.10602378845214844, + "step": 2605 + }, + { + "epoch": 0.363129659304675, + "grad_norm": 0.28526315093040466, + "learning_rate": 3.103142340406691e-05, + "loss": 0.07642745971679688, + "step": 2606 + }, + { + "epoch": 0.3632690029958894, + "grad_norm": 0.7817153930664062, + "learning_rate": 3.102355874340343e-05, + "loss": 0.08271598815917969, + "step": 2607 + }, + { + "epoch": 0.36340834668710376, + "grad_norm": 1.0438140630722046, + "learning_rate": 3.101569163355688e-05, + "loss": 0.12830352783203125, + "step": 2608 + }, + { + "epoch": 0.36354769037831813, + "grad_norm": 0.8876070380210876, + "learning_rate": 3.100782207627513e-05, + "loss": 0.10644721984863281, + "step": 2609 + }, + { + "epoch": 0.3636870340695325, + "grad_norm": 0.38669657707214355, + "learning_rate": 3.099995007330664e-05, + "loss": 0.09250259399414062, + "step": 2610 + }, + { + "epoch": 0.3638263777607469, + "grad_norm": 0.5331718921661377, + "learning_rate": 3.099207562640037e-05, + "loss": 0.10978889465332031, + "step": 2611 + }, + { + "epoch": 0.36396572145196127, + "grad_norm": 0.6679280996322632, + "learning_rate": 3.098419873730585e-05, + "loss": 0.09931373596191406, + "step": 2612 + }, + { + "epoch": 0.36410506514317564, + "grad_norm": 0.5264922380447388, + "learning_rate": 3.097631940777314e-05, + "loss": 0.09230232238769531, + "step": 2613 + }, + { + "epoch": 0.36424440883439, + "grad_norm": 0.5238666534423828, + "learning_rate": 3.096843763955285e-05, + "loss": 0.0916748046875, + "step": 2614 + }, + { + "epoch": 0.3643837525256044, + "grad_norm": 0.5635100603103638, + "learning_rate": 3.096055343439614e-05, + "loss": 0.08341217041015625, + "step": 2615 + }, + { + "epoch": 0.3645230962168188, + "grad_norm": 0.5157157778739929, + "learning_rate": 3.095266679405468e-05, + "loss": 0.10056304931640625, + "step": 2616 + }, + { + "epoch": 0.36466243990803315, + "grad_norm": 0.3669660985469818, + "learning_rate": 3.094477772028072e-05, + "loss": 0.0934591293334961, + "step": 2617 + }, + { + "epoch": 0.36480178359924753, + "grad_norm": 0.5156422853469849, + "learning_rate": 3.0936886214827024e-05, + "loss": 0.09497642517089844, + "step": 2618 + }, + { + "epoch": 0.3649411272904619, + "grad_norm": 0.47423824667930603, + "learning_rate": 3.09289922794469e-05, + "loss": 0.08374786376953125, + "step": 2619 + }, + { + "epoch": 0.3650804709816763, + "grad_norm": 0.5236391425132751, + "learning_rate": 3.092109591589421e-05, + "loss": 0.10018539428710938, + "step": 2620 + }, + { + "epoch": 0.36521981467289066, + "grad_norm": 0.7541472315788269, + "learning_rate": 3.091319712592333e-05, + "loss": 0.09245109558105469, + "step": 2621 + }, + { + "epoch": 0.36535915836410504, + "grad_norm": 0.6133775115013123, + "learning_rate": 3.0905295911289216e-05, + "loss": 0.1040496826171875, + "step": 2622 + }, + { + "epoch": 0.3654985020553194, + "grad_norm": 0.2892720401287079, + "learning_rate": 3.089739227374732e-05, + "loss": 0.076416015625, + "step": 2623 + }, + { + "epoch": 0.36563784574653385, + "grad_norm": 0.4997168481349945, + "learning_rate": 3.088948621505364e-05, + "loss": 0.10030364990234375, + "step": 2624 + }, + { + "epoch": 0.36577718943774823, + "grad_norm": 0.41946476697921753, + "learning_rate": 3.088157773696474e-05, + "loss": 0.10021448135375977, + "step": 2625 + }, + { + "epoch": 0.3659165331289626, + "grad_norm": 0.9029008150100708, + "learning_rate": 3.08736668412377e-05, + "loss": 0.10507392883300781, + "step": 2626 + }, + { + "epoch": 0.366055876820177, + "grad_norm": 0.9772976040840149, + "learning_rate": 3.0865753529630135e-05, + "loss": 0.10790634155273438, + "step": 2627 + }, + { + "epoch": 0.36619522051139136, + "grad_norm": 0.4835531711578369, + "learning_rate": 3.085783780390021e-05, + "loss": 0.10380744934082031, + "step": 2628 + }, + { + "epoch": 0.36633456420260574, + "grad_norm": 0.5344784259796143, + "learning_rate": 3.0849919665806605e-05, + "loss": 0.10208702087402344, + "step": 2629 + }, + { + "epoch": 0.3664739078938201, + "grad_norm": 0.4517846405506134, + "learning_rate": 3.0841999117108564e-05, + "loss": 0.10371875762939453, + "step": 2630 + }, + { + "epoch": 0.3666132515850345, + "grad_norm": 0.4185256063938141, + "learning_rate": 3.0834076159565847e-05, + "loss": 0.09045600891113281, + "step": 2631 + }, + { + "epoch": 0.36675259527624887, + "grad_norm": 0.4983043670654297, + "learning_rate": 3.082615079493876e-05, + "loss": 0.0888824462890625, + "step": 2632 + }, + { + "epoch": 0.36689193896746325, + "grad_norm": 0.5422556400299072, + "learning_rate": 3.081822302498812e-05, + "loss": 0.10758018493652344, + "step": 2633 + }, + { + "epoch": 0.3670312826586776, + "grad_norm": 0.4882579743862152, + "learning_rate": 3.081029285147531e-05, + "loss": 0.10788917541503906, + "step": 2634 + }, + { + "epoch": 0.367170626349892, + "grad_norm": 0.3706008791923523, + "learning_rate": 3.080236027616224e-05, + "loss": 0.07702255249023438, + "step": 2635 + }, + { + "epoch": 0.3673099700411064, + "grad_norm": 0.5982608199119568, + "learning_rate": 3.079442530081133e-05, + "loss": 0.09633827209472656, + "step": 2636 + }, + { + "epoch": 0.36744931373232076, + "grad_norm": 0.5632056593894958, + "learning_rate": 3.078648792718556e-05, + "loss": 0.0933990478515625, + "step": 2637 + }, + { + "epoch": 0.36758865742353514, + "grad_norm": 0.4927740693092346, + "learning_rate": 3.0778548157048434e-05, + "loss": 0.10204696655273438, + "step": 2638 + }, + { + "epoch": 0.3677280011147495, + "grad_norm": 0.3899678885936737, + "learning_rate": 3.0770605992163986e-05, + "loss": 0.09213638305664062, + "step": 2639 + }, + { + "epoch": 0.3678673448059639, + "grad_norm": 0.42441123723983765, + "learning_rate": 3.076266143429679e-05, + "loss": 0.09523487091064453, + "step": 2640 + }, + { + "epoch": 0.36800668849717827, + "grad_norm": 0.4437398612499237, + "learning_rate": 3.0754714485211925e-05, + "loss": 0.083953857421875, + "step": 2641 + }, + { + "epoch": 0.36814603218839265, + "grad_norm": 0.3815813958644867, + "learning_rate": 3.0746765146675043e-05, + "loss": 0.08805179595947266, + "step": 2642 + }, + { + "epoch": 0.368285375879607, + "grad_norm": 0.609484851360321, + "learning_rate": 3.0738813420452295e-05, + "loss": 0.11574077606201172, + "step": 2643 + }, + { + "epoch": 0.36842471957082146, + "grad_norm": 0.31761297583580017, + "learning_rate": 3.073085930831038e-05, + "loss": 0.06921577453613281, + "step": 2644 + }, + { + "epoch": 0.36856406326203583, + "grad_norm": 0.5742952227592468, + "learning_rate": 3.072290281201652e-05, + "loss": 0.09092903137207031, + "step": 2645 + }, + { + "epoch": 0.3687034069532502, + "grad_norm": 0.49034351110458374, + "learning_rate": 3.071494393333846e-05, + "loss": 0.0991983413696289, + "step": 2646 + }, + { + "epoch": 0.3688427506444646, + "grad_norm": 0.6634458303451538, + "learning_rate": 3.0706982674044486e-05, + "loss": 0.09711456298828125, + "step": 2647 + }, + { + "epoch": 0.36898209433567897, + "grad_norm": 0.5322089195251465, + "learning_rate": 3.06990190359034e-05, + "loss": 0.08977508544921875, + "step": 2648 + }, + { + "epoch": 0.36912143802689334, + "grad_norm": 0.494231641292572, + "learning_rate": 3.069105302068455e-05, + "loss": 0.10292768478393555, + "step": 2649 + }, + { + "epoch": 0.3692607817181077, + "grad_norm": 0.4044037163257599, + "learning_rate": 3.06830846301578e-05, + "loss": 0.09699058532714844, + "step": 2650 + }, + { + "epoch": 0.3694001254093221, + "grad_norm": 0.3748975396156311, + "learning_rate": 3.067511386609354e-05, + "loss": 0.0838470458984375, + "step": 2651 + }, + { + "epoch": 0.3695394691005365, + "grad_norm": 0.34188467264175415, + "learning_rate": 3.0667140730262706e-05, + "loss": 0.09354972839355469, + "step": 2652 + }, + { + "epoch": 0.36967881279175085, + "grad_norm": 0.5857479572296143, + "learning_rate": 3.065916522443673e-05, + "loss": 0.106719970703125, + "step": 2653 + }, + { + "epoch": 0.36981815648296523, + "grad_norm": 0.4976840317249298, + "learning_rate": 3.06511873503876e-05, + "loss": 0.10580062866210938, + "step": 2654 + }, + { + "epoch": 0.3699575001741796, + "grad_norm": 0.7127572298049927, + "learning_rate": 3.0643207109887804e-05, + "loss": 0.13005638122558594, + "step": 2655 + }, + { + "epoch": 0.370096843865394, + "grad_norm": 0.6492970585823059, + "learning_rate": 3.063522450471038e-05, + "loss": 0.10020828247070312, + "step": 2656 + }, + { + "epoch": 0.37023618755660836, + "grad_norm": 0.7340102791786194, + "learning_rate": 3.062723953662888e-05, + "loss": 0.10669326782226562, + "step": 2657 + }, + { + "epoch": 0.37037553124782274, + "grad_norm": 0.5192208886146545, + "learning_rate": 3.061925220741738e-05, + "loss": 0.0759124755859375, + "step": 2658 + }, + { + "epoch": 0.3705148749390371, + "grad_norm": 0.8337069749832153, + "learning_rate": 3.0611262518850464e-05, + "loss": 0.10817146301269531, + "step": 2659 + }, + { + "epoch": 0.3706542186302515, + "grad_norm": 0.5799688100814819, + "learning_rate": 3.0603270472703294e-05, + "loss": 0.11566162109375, + "step": 2660 + }, + { + "epoch": 0.3707935623214659, + "grad_norm": 0.3261812627315521, + "learning_rate": 3.05952760707515e-05, + "loss": 0.07670211791992188, + "step": 2661 + }, + { + "epoch": 0.37093290601268025, + "grad_norm": 0.4491943120956421, + "learning_rate": 3.0587279314771253e-05, + "loss": 0.07176589965820312, + "step": 2662 + }, + { + "epoch": 0.37107224970389463, + "grad_norm": 0.5094791054725647, + "learning_rate": 3.057928020653925e-05, + "loss": 0.09195709228515625, + "step": 2663 + }, + { + "epoch": 0.37121159339510906, + "grad_norm": 0.6611687541007996, + "learning_rate": 3.057127874783272e-05, + "loss": 0.1024017333984375, + "step": 2664 + }, + { + "epoch": 0.37135093708632344, + "grad_norm": 0.5406239628791809, + "learning_rate": 3.0563274940429404e-05, + "loss": 0.07692146301269531, + "step": 2665 + }, + { + "epoch": 0.3714902807775378, + "grad_norm": 0.6640320420265198, + "learning_rate": 3.055526878610755e-05, + "loss": 0.11758804321289062, + "step": 2666 + }, + { + "epoch": 0.3716296244687522, + "grad_norm": 0.4772275388240814, + "learning_rate": 3.054726028664595e-05, + "loss": 0.09078788757324219, + "step": 2667 + }, + { + "epoch": 0.3717689681599666, + "grad_norm": 0.38769009709358215, + "learning_rate": 3.053924944382393e-05, + "loss": 0.08221244812011719, + "step": 2668 + }, + { + "epoch": 0.37190831185118095, + "grad_norm": 0.32055947184562683, + "learning_rate": 3.053123625942128e-05, + "loss": 0.09814834594726562, + "step": 2669 + }, + { + "epoch": 0.37204765554239533, + "grad_norm": 0.5195102691650391, + "learning_rate": 3.052322073521837e-05, + "loss": 0.08394336700439453, + "step": 2670 + }, + { + "epoch": 0.3721869992336097, + "grad_norm": 0.38161730766296387, + "learning_rate": 3.0515202872996067e-05, + "loss": 0.08862686157226562, + "step": 2671 + }, + { + "epoch": 0.3723263429248241, + "grad_norm": 0.6260156035423279, + "learning_rate": 3.050718267453575e-05, + "loss": 0.08218193054199219, + "step": 2672 + }, + { + "epoch": 0.37246568661603846, + "grad_norm": 1.1528509855270386, + "learning_rate": 3.0499160141619323e-05, + "loss": 0.10898303985595703, + "step": 2673 + }, + { + "epoch": 0.37260503030725284, + "grad_norm": 0.7019554972648621, + "learning_rate": 3.049113527602922e-05, + "loss": 0.091705322265625, + "step": 2674 + }, + { + "epoch": 0.3727443739984672, + "grad_norm": 0.6635509133338928, + "learning_rate": 3.0483108079548366e-05, + "loss": 0.09930038452148438, + "step": 2675 + }, + { + "epoch": 0.3728837176896816, + "grad_norm": 0.674980878829956, + "learning_rate": 3.0475078553960234e-05, + "loss": 0.09355640411376953, + "step": 2676 + }, + { + "epoch": 0.37302306138089597, + "grad_norm": 0.9195274114608765, + "learning_rate": 3.0467046701048795e-05, + "loss": 0.10723114013671875, + "step": 2677 + }, + { + "epoch": 0.37316240507211035, + "grad_norm": 0.5770914554595947, + "learning_rate": 3.0459012522598553e-05, + "loss": 0.10249519348144531, + "step": 2678 + }, + { + "epoch": 0.3733017487633247, + "grad_norm": 0.4010397791862488, + "learning_rate": 3.04509760203945e-05, + "loss": 0.08688163757324219, + "step": 2679 + }, + { + "epoch": 0.3734410924545391, + "grad_norm": 0.41461271047592163, + "learning_rate": 3.0442937196222186e-05, + "loss": 0.07854843139648438, + "step": 2680 + }, + { + "epoch": 0.3735804361457535, + "grad_norm": 0.6605979204177856, + "learning_rate": 3.043489605186764e-05, + "loss": 0.07791900634765625, + "step": 2681 + }, + { + "epoch": 0.37371977983696786, + "grad_norm": 0.7707806825637817, + "learning_rate": 3.0426852589117422e-05, + "loss": 0.10687255859375, + "step": 2682 + }, + { + "epoch": 0.37385912352818224, + "grad_norm": 0.7498394846916199, + "learning_rate": 3.041880680975861e-05, + "loss": 0.1289825439453125, + "step": 2683 + }, + { + "epoch": 0.37399846721939667, + "grad_norm": 0.3958666920661926, + "learning_rate": 3.041075871557879e-05, + "loss": 0.10088920593261719, + "step": 2684 + }, + { + "epoch": 0.37413781091061105, + "grad_norm": 0.3899536430835724, + "learning_rate": 3.0402708308366066e-05, + "loss": 0.07924270629882812, + "step": 2685 + }, + { + "epoch": 0.3742771546018254, + "grad_norm": 0.4115699529647827, + "learning_rate": 3.039465558990905e-05, + "loss": 0.09044361114501953, + "step": 2686 + }, + { + "epoch": 0.3744164982930398, + "grad_norm": 0.39313071966171265, + "learning_rate": 3.038660056199688e-05, + "loss": 0.08771324157714844, + "step": 2687 + }, + { + "epoch": 0.3745558419842542, + "grad_norm": 0.41234084963798523, + "learning_rate": 3.037854322641919e-05, + "loss": 0.08519744873046875, + "step": 2688 + }, + { + "epoch": 0.37469518567546856, + "grad_norm": 0.4430343806743622, + "learning_rate": 3.0370483584966144e-05, + "loss": 0.0808115005493164, + "step": 2689 + }, + { + "epoch": 0.37483452936668293, + "grad_norm": 0.6639008522033691, + "learning_rate": 3.036242163942841e-05, + "loss": 0.08904552459716797, + "step": 2690 + }, + { + "epoch": 0.3749738730578973, + "grad_norm": 0.7145813703536987, + "learning_rate": 3.035435739159716e-05, + "loss": 0.1018829345703125, + "step": 2691 + }, + { + "epoch": 0.3751132167491117, + "grad_norm": 0.7242342829704285, + "learning_rate": 3.03462908432641e-05, + "loss": 0.11047840118408203, + "step": 2692 + }, + { + "epoch": 0.37525256044032607, + "grad_norm": 0.3908018171787262, + "learning_rate": 3.033822199622142e-05, + "loss": 0.10414695739746094, + "step": 2693 + }, + { + "epoch": 0.37539190413154044, + "grad_norm": 0.3752339780330658, + "learning_rate": 3.033015085226184e-05, + "loss": 0.09634017944335938, + "step": 2694 + }, + { + "epoch": 0.3755312478227548, + "grad_norm": 0.37833017110824585, + "learning_rate": 3.0322077413178578e-05, + "loss": 0.08203125, + "step": 2695 + }, + { + "epoch": 0.3756705915139692, + "grad_norm": 0.7441471815109253, + "learning_rate": 3.0314001680765375e-05, + "loss": 0.10453605651855469, + "step": 2696 + }, + { + "epoch": 0.3758099352051836, + "grad_norm": 0.36959928274154663, + "learning_rate": 3.0305923656816473e-05, + "loss": 0.0829458236694336, + "step": 2697 + }, + { + "epoch": 0.37594927889639795, + "grad_norm": 0.9989264011383057, + "learning_rate": 3.0297843343126617e-05, + "loss": 0.10042285919189453, + "step": 2698 + }, + { + "epoch": 0.37608862258761233, + "grad_norm": 0.7625870704650879, + "learning_rate": 3.0289760741491077e-05, + "loss": 0.09199714660644531, + "step": 2699 + }, + { + "epoch": 0.3762279662788267, + "grad_norm": 0.48181337118148804, + "learning_rate": 3.028167585370562e-05, + "loss": 0.08163261413574219, + "step": 2700 + }, + { + "epoch": 0.3763673099700411, + "grad_norm": 0.4886201024055481, + "learning_rate": 3.0273588681566523e-05, + "loss": 0.12115478515625, + "step": 2701 + }, + { + "epoch": 0.37650665366125546, + "grad_norm": 0.5258240103721619, + "learning_rate": 3.026549922687057e-05, + "loss": 0.090240478515625, + "step": 2702 + }, + { + "epoch": 0.37664599735246984, + "grad_norm": 0.7181178331375122, + "learning_rate": 3.0257407491415053e-05, + "loss": 0.09222412109375, + "step": 2703 + }, + { + "epoch": 0.3767853410436843, + "grad_norm": 0.574062168598175, + "learning_rate": 3.0249313476997772e-05, + "loss": 0.08495521545410156, + "step": 2704 + }, + { + "epoch": 0.37692468473489865, + "grad_norm": 0.45386841893196106, + "learning_rate": 3.0241217185417034e-05, + "loss": 0.08888053894042969, + "step": 2705 + }, + { + "epoch": 0.37706402842611303, + "grad_norm": 0.43053752183914185, + "learning_rate": 3.023311861847165e-05, + "loss": 0.08097457885742188, + "step": 2706 + }, + { + "epoch": 0.3772033721173274, + "grad_norm": 0.27032771706581116, + "learning_rate": 3.0225017777960927e-05, + "loss": 0.08102035522460938, + "step": 2707 + }, + { + "epoch": 0.3773427158085418, + "grad_norm": 0.6524795293807983, + "learning_rate": 3.0216914665684705e-05, + "loss": 0.09961700439453125, + "step": 2708 + }, + { + "epoch": 0.37748205949975616, + "grad_norm": 0.3357102572917938, + "learning_rate": 3.020880928344329e-05, + "loss": 0.08326053619384766, + "step": 2709 + }, + { + "epoch": 0.37762140319097054, + "grad_norm": 0.8977958559989929, + "learning_rate": 3.0200701633037534e-05, + "loss": 0.11735153198242188, + "step": 2710 + }, + { + "epoch": 0.3777607468821849, + "grad_norm": 0.39443618059158325, + "learning_rate": 3.0192591716268755e-05, + "loss": 0.08238792419433594, + "step": 2711 + }, + { + "epoch": 0.3779000905733993, + "grad_norm": 0.4962442219257355, + "learning_rate": 3.0184479534938797e-05, + "loss": 0.11371994018554688, + "step": 2712 + }, + { + "epoch": 0.37803943426461367, + "grad_norm": 0.587270200252533, + "learning_rate": 3.0176365090850005e-05, + "loss": 0.1200714111328125, + "step": 2713 + }, + { + "epoch": 0.37817877795582805, + "grad_norm": 0.5821568369865417, + "learning_rate": 3.0168248385805223e-05, + "loss": 0.09647941589355469, + "step": 2714 + }, + { + "epoch": 0.3783181216470424, + "grad_norm": 0.577163815498352, + "learning_rate": 3.0160129421607792e-05, + "loss": 0.08432388305664062, + "step": 2715 + }, + { + "epoch": 0.3784574653382568, + "grad_norm": 0.5951326489448547, + "learning_rate": 3.015200820006156e-05, + "loss": 0.09123086929321289, + "step": 2716 + }, + { + "epoch": 0.3785968090294712, + "grad_norm": 0.6108888387680054, + "learning_rate": 3.014388472297088e-05, + "loss": 0.08601188659667969, + "step": 2717 + }, + { + "epoch": 0.37873615272068556, + "grad_norm": 0.3983878791332245, + "learning_rate": 3.013575899214061e-05, + "loss": 0.10161018371582031, + "step": 2718 + }, + { + "epoch": 0.37887549641189994, + "grad_norm": 0.7092560529708862, + "learning_rate": 3.0127631009376093e-05, + "loss": 0.11614544689655304, + "step": 2719 + }, + { + "epoch": 0.3790148401031143, + "grad_norm": 0.6303610801696777, + "learning_rate": 3.011950077648318e-05, + "loss": 0.11548423767089844, + "step": 2720 + }, + { + "epoch": 0.3791541837943287, + "grad_norm": 1.1409763097763062, + "learning_rate": 3.0111368295268225e-05, + "loss": 0.1091756820678711, + "step": 2721 + }, + { + "epoch": 0.37929352748554307, + "grad_norm": 0.5074110627174377, + "learning_rate": 3.0103233567538086e-05, + "loss": 0.07831192016601562, + "step": 2722 + }, + { + "epoch": 0.37943287117675745, + "grad_norm": 0.34338194131851196, + "learning_rate": 3.009509659510011e-05, + "loss": 0.0916910171508789, + "step": 2723 + }, + { + "epoch": 0.3795722148679719, + "grad_norm": 0.5136479735374451, + "learning_rate": 3.008695737976214e-05, + "loss": 0.08601760864257812, + "step": 2724 + }, + { + "epoch": 0.37971155855918626, + "grad_norm": 0.5456742644309998, + "learning_rate": 3.0078815923332532e-05, + "loss": 0.10174179077148438, + "step": 2725 + }, + { + "epoch": 0.37985090225040063, + "grad_norm": 0.8891974687576294, + "learning_rate": 3.007067222762013e-05, + "loss": 0.0932769775390625, + "step": 2726 + }, + { + "epoch": 0.379990245941615, + "grad_norm": 0.6588044166564941, + "learning_rate": 3.0062526294434273e-05, + "loss": 0.10466766357421875, + "step": 2727 + }, + { + "epoch": 0.3801295896328294, + "grad_norm": 0.8662517666816711, + "learning_rate": 3.005437812558481e-05, + "loss": 0.09021949768066406, + "step": 2728 + }, + { + "epoch": 0.38026893332404377, + "grad_norm": 0.6791878938674927, + "learning_rate": 3.004622772288207e-05, + "loss": 0.09589385986328125, + "step": 2729 + }, + { + "epoch": 0.38040827701525814, + "grad_norm": 0.5724003911018372, + "learning_rate": 3.003807508813689e-05, + "loss": 0.09053230285644531, + "step": 2730 + }, + { + "epoch": 0.3805476207064725, + "grad_norm": 0.5454049706459045, + "learning_rate": 3.00299202231606e-05, + "loss": 0.09765052795410156, + "step": 2731 + }, + { + "epoch": 0.3806869643976869, + "grad_norm": 0.5287002921104431, + "learning_rate": 3.0021763129765024e-05, + "loss": 0.09499740600585938, + "step": 2732 + }, + { + "epoch": 0.3808263080889013, + "grad_norm": 0.5488462448120117, + "learning_rate": 3.0013603809762473e-05, + "loss": 0.09705924987792969, + "step": 2733 + }, + { + "epoch": 0.38096565178011566, + "grad_norm": 0.7213091850280762, + "learning_rate": 3.0005442264965778e-05, + "loss": 0.10485649108886719, + "step": 2734 + }, + { + "epoch": 0.38110499547133003, + "grad_norm": 0.6268873810768127, + "learning_rate": 2.9997278497188236e-05, + "loss": 0.09526824951171875, + "step": 2735 + }, + { + "epoch": 0.3812443391625444, + "grad_norm": 0.4168356657028198, + "learning_rate": 2.9989112508243655e-05, + "loss": 0.0840911865234375, + "step": 2736 + }, + { + "epoch": 0.3813836828537588, + "grad_norm": 1.162355899810791, + "learning_rate": 2.998094429994633e-05, + "loss": 0.12319183349609375, + "step": 2737 + }, + { + "epoch": 0.38152302654497317, + "grad_norm": 1.0116894245147705, + "learning_rate": 2.9972773874111057e-05, + "loss": 0.11832809448242188, + "step": 2738 + }, + { + "epoch": 0.38166237023618754, + "grad_norm": 1.0368438959121704, + "learning_rate": 2.99646012325531e-05, + "loss": 0.11009979248046875, + "step": 2739 + }, + { + "epoch": 0.3818017139274019, + "grad_norm": 0.5901070833206177, + "learning_rate": 2.995642637708825e-05, + "loss": 0.10802841186523438, + "step": 2740 + }, + { + "epoch": 0.3819410576186163, + "grad_norm": 0.5195164084434509, + "learning_rate": 2.9948249309532768e-05, + "loss": 0.08477020263671875, + "step": 2741 + }, + { + "epoch": 0.3820804013098307, + "grad_norm": 0.488233357667923, + "learning_rate": 2.9940070031703413e-05, + "loss": 0.10563850402832031, + "step": 2742 + }, + { + "epoch": 0.38221974500104505, + "grad_norm": 0.6009315848350525, + "learning_rate": 2.9931888545417435e-05, + "loss": 0.09035682678222656, + "step": 2743 + }, + { + "epoch": 0.3823590886922595, + "grad_norm": 0.594501793384552, + "learning_rate": 2.9923704852492566e-05, + "loss": 0.1070404052734375, + "step": 2744 + }, + { + "epoch": 0.38249843238347386, + "grad_norm": 0.5943362712860107, + "learning_rate": 2.9915518954747038e-05, + "loss": 0.0744466781616211, + "step": 2745 + }, + { + "epoch": 0.38263777607468824, + "grad_norm": 0.5611352920532227, + "learning_rate": 2.9907330853999583e-05, + "loss": 0.09606742858886719, + "step": 2746 + }, + { + "epoch": 0.3827771197659026, + "grad_norm": 0.571186900138855, + "learning_rate": 2.9899140552069396e-05, + "loss": 0.09640693664550781, + "step": 2747 + }, + { + "epoch": 0.382916463457117, + "grad_norm": 0.6140549182891846, + "learning_rate": 2.989094805077618e-05, + "loss": 0.10701560974121094, + "step": 2748 + }, + { + "epoch": 0.3830558071483314, + "grad_norm": 0.29268214106559753, + "learning_rate": 2.9882753351940115e-05, + "loss": 0.07107925415039062, + "step": 2749 + }, + { + "epoch": 0.38319515083954575, + "grad_norm": 0.5259759426116943, + "learning_rate": 2.987455645738189e-05, + "loss": 0.09951019287109375, + "step": 2750 + }, + { + "epoch": 0.38333449453076013, + "grad_norm": 0.537269651889801, + "learning_rate": 2.9866357368922657e-05, + "loss": 0.09992599487304688, + "step": 2751 + }, + { + "epoch": 0.3834738382219745, + "grad_norm": 0.5093732476234436, + "learning_rate": 2.985815608838407e-05, + "loss": 0.08361625671386719, + "step": 2752 + }, + { + "epoch": 0.3836131819131889, + "grad_norm": 1.073272466659546, + "learning_rate": 2.984995261758827e-05, + "loss": 0.12824630737304688, + "step": 2753 + }, + { + "epoch": 0.38375252560440326, + "grad_norm": 0.5091570019721985, + "learning_rate": 2.984174695835787e-05, + "loss": 0.10081863403320312, + "step": 2754 + }, + { + "epoch": 0.38389186929561764, + "grad_norm": 0.3003741502761841, + "learning_rate": 2.983353911251599e-05, + "loss": 0.08161163330078125, + "step": 2755 + }, + { + "epoch": 0.384031212986832, + "grad_norm": 0.5256957411766052, + "learning_rate": 2.9825329081886222e-05, + "loss": 0.11307048797607422, + "step": 2756 + }, + { + "epoch": 0.3841705566780464, + "grad_norm": 0.34138381481170654, + "learning_rate": 2.981711686829264e-05, + "loss": 0.07543182373046875, + "step": 2757 + }, + { + "epoch": 0.38430990036926077, + "grad_norm": 0.7325676083564758, + "learning_rate": 2.9808902473559835e-05, + "loss": 0.11411666870117188, + "step": 2758 + }, + { + "epoch": 0.38444924406047515, + "grad_norm": 0.5013800263404846, + "learning_rate": 2.9800685899512828e-05, + "loss": 0.09502983093261719, + "step": 2759 + }, + { + "epoch": 0.3845885877516895, + "grad_norm": 0.6472126841545105, + "learning_rate": 2.9792467147977174e-05, + "loss": 0.11131095886230469, + "step": 2760 + }, + { + "epoch": 0.3847279314429039, + "grad_norm": 0.3696441650390625, + "learning_rate": 2.9784246220778885e-05, + "loss": 0.09356307983398438, + "step": 2761 + }, + { + "epoch": 0.3848672751341183, + "grad_norm": 0.4215986728668213, + "learning_rate": 2.9776023119744462e-05, + "loss": 0.09070587158203125, + "step": 2762 + }, + { + "epoch": 0.38500661882533266, + "grad_norm": 0.33960506319999695, + "learning_rate": 2.976779784670089e-05, + "loss": 0.09393310546875, + "step": 2763 + }, + { + "epoch": 0.38514596251654704, + "grad_norm": 0.42824599146842957, + "learning_rate": 2.9759570403475644e-05, + "loss": 0.10552787780761719, + "step": 2764 + }, + { + "epoch": 0.38528530620776147, + "grad_norm": 0.3146350681781769, + "learning_rate": 2.975134079189667e-05, + "loss": 0.07209205627441406, + "step": 2765 + }, + { + "epoch": 0.38542464989897585, + "grad_norm": 0.4251825511455536, + "learning_rate": 2.9743109013792395e-05, + "loss": 0.09514999389648438, + "step": 2766 + }, + { + "epoch": 0.3855639935901902, + "grad_norm": 0.39905688166618347, + "learning_rate": 2.9734875070991736e-05, + "loss": 0.09475517272949219, + "step": 2767 + }, + { + "epoch": 0.3857033372814046, + "grad_norm": 0.3633705675601959, + "learning_rate": 2.9726638965324088e-05, + "loss": 0.07698249816894531, + "step": 2768 + }, + { + "epoch": 0.385842680972619, + "grad_norm": 0.5275864005088806, + "learning_rate": 2.9718400698619327e-05, + "loss": 0.10350990295410156, + "step": 2769 + }, + { + "epoch": 0.38598202466383336, + "grad_norm": 0.6753994226455688, + "learning_rate": 2.9710160272707803e-05, + "loss": 0.09576606750488281, + "step": 2770 + }, + { + "epoch": 0.38612136835504773, + "grad_norm": 0.3894367814064026, + "learning_rate": 2.9701917689420354e-05, + "loss": 0.0880126953125, + "step": 2771 + }, + { + "epoch": 0.3862607120462621, + "grad_norm": 0.5333966016769409, + "learning_rate": 2.9693672950588292e-05, + "loss": 0.11261558532714844, + "step": 2772 + }, + { + "epoch": 0.3864000557374765, + "grad_norm": 0.4037832021713257, + "learning_rate": 2.9685426058043414e-05, + "loss": 0.08189201354980469, + "step": 2773 + }, + { + "epoch": 0.38653939942869087, + "grad_norm": 0.6135942935943604, + "learning_rate": 2.9677177013617987e-05, + "loss": 0.09828853607177734, + "step": 2774 + }, + { + "epoch": 0.38667874311990524, + "grad_norm": 0.3378565013408661, + "learning_rate": 2.9668925819144755e-05, + "loss": 0.08607769012451172, + "step": 2775 + }, + { + "epoch": 0.3868180868111196, + "grad_norm": 0.6044371128082275, + "learning_rate": 2.966067247645696e-05, + "loss": 0.11027908325195312, + "step": 2776 + }, + { + "epoch": 0.386957430502334, + "grad_norm": 0.682166576385498, + "learning_rate": 2.965241698738829e-05, + "loss": 0.10427284240722656, + "step": 2777 + }, + { + "epoch": 0.3870967741935484, + "grad_norm": 0.5190744996070862, + "learning_rate": 2.9644159353772937e-05, + "loss": 0.10151481628417969, + "step": 2778 + }, + { + "epoch": 0.38723611788476275, + "grad_norm": 0.33361655473709106, + "learning_rate": 2.9635899577445558e-05, + "loss": 0.08853912353515625, + "step": 2779 + }, + { + "epoch": 0.38737546157597713, + "grad_norm": 0.42076587677001953, + "learning_rate": 2.9627637660241283e-05, + "loss": 0.10107612609863281, + "step": 2780 + }, + { + "epoch": 0.3875148052671915, + "grad_norm": 1.1679097414016724, + "learning_rate": 2.9619373603995724e-05, + "loss": 0.10711479187011719, + "step": 2781 + }, + { + "epoch": 0.3876541489584059, + "grad_norm": 0.8666403293609619, + "learning_rate": 2.9611107410544958e-05, + "loss": 0.09740447998046875, + "step": 2782 + }, + { + "epoch": 0.38779349264962026, + "grad_norm": 0.45909562706947327, + "learning_rate": 2.9602839081725558e-05, + "loss": 0.09262847900390625, + "step": 2783 + }, + { + "epoch": 0.38793283634083464, + "grad_norm": 0.3647949993610382, + "learning_rate": 2.959456861937455e-05, + "loss": 0.10426902770996094, + "step": 2784 + }, + { + "epoch": 0.3880721800320491, + "grad_norm": 0.3671012818813324, + "learning_rate": 2.958629602532944e-05, + "loss": 0.09206008911132812, + "step": 2785 + }, + { + "epoch": 0.38821152372326345, + "grad_norm": 0.34836500883102417, + "learning_rate": 2.9578021301428212e-05, + "loss": 0.08223152160644531, + "step": 2786 + }, + { + "epoch": 0.38835086741447783, + "grad_norm": 0.5991067886352539, + "learning_rate": 2.9569744449509322e-05, + "loss": 0.0905609130859375, + "step": 2787 + }, + { + "epoch": 0.3884902111056922, + "grad_norm": 0.5513537526130676, + "learning_rate": 2.9561465471411693e-05, + "loss": 0.0977163314819336, + "step": 2788 + }, + { + "epoch": 0.3886295547969066, + "grad_norm": 0.3497450053691864, + "learning_rate": 2.955318436897473e-05, + "loss": 0.06902885437011719, + "step": 2789 + }, + { + "epoch": 0.38876889848812096, + "grad_norm": 0.2626107931137085, + "learning_rate": 2.9544901144038303e-05, + "loss": 0.07303237915039062, + "step": 2790 + }, + { + "epoch": 0.38890824217933534, + "grad_norm": 0.3460049331188202, + "learning_rate": 2.9536615798442755e-05, + "loss": 0.08888053894042969, + "step": 2791 + }, + { + "epoch": 0.3890475858705497, + "grad_norm": 0.6136528849601746, + "learning_rate": 2.9528328334028903e-05, + "loss": 0.0909271240234375, + "step": 2792 + }, + { + "epoch": 0.3891869295617641, + "grad_norm": 0.6220869421958923, + "learning_rate": 2.952003875263803e-05, + "loss": 0.09996414184570312, + "step": 2793 + }, + { + "epoch": 0.38932627325297847, + "grad_norm": 0.6757078170776367, + "learning_rate": 2.9511747056111893e-05, + "loss": 0.11027908325195312, + "step": 2794 + }, + { + "epoch": 0.38946561694419285, + "grad_norm": 0.4822178781032562, + "learning_rate": 2.9503453246292716e-05, + "loss": 0.09409713745117188, + "step": 2795 + }, + { + "epoch": 0.3896049606354072, + "grad_norm": 0.504410445690155, + "learning_rate": 2.9495157325023195e-05, + "loss": 0.11029243469238281, + "step": 2796 + }, + { + "epoch": 0.3897443043266216, + "grad_norm": 1.1279255151748657, + "learning_rate": 2.9486859294146497e-05, + "loss": 0.10991477966308594, + "step": 2797 + }, + { + "epoch": 0.389883648017836, + "grad_norm": 1.133557677268982, + "learning_rate": 2.9478559155506244e-05, + "loss": 0.12289714813232422, + "step": 2798 + }, + { + "epoch": 0.39002299170905036, + "grad_norm": 0.44316402077674866, + "learning_rate": 2.9470256910946555e-05, + "loss": 0.0987091064453125, + "step": 2799 + }, + { + "epoch": 0.39016233540026474, + "grad_norm": 0.6127455830574036, + "learning_rate": 2.946195256231199e-05, + "loss": 0.08694839477539062, + "step": 2800 + }, + { + "epoch": 0.3903016790914791, + "grad_norm": 0.47221896052360535, + "learning_rate": 2.9453646111447582e-05, + "loss": 0.11772537231445312, + "step": 2801 + }, + { + "epoch": 0.3904410227826935, + "grad_norm": 0.4385365843772888, + "learning_rate": 2.944533756019884e-05, + "loss": 0.09111595153808594, + "step": 2802 + }, + { + "epoch": 0.39058036647390787, + "grad_norm": 0.59828120470047, + "learning_rate": 2.9437026910411734e-05, + "loss": 0.09972000122070312, + "step": 2803 + }, + { + "epoch": 0.39071971016512225, + "grad_norm": 0.6515618562698364, + "learning_rate": 2.9428714163932697e-05, + "loss": 0.0914459228515625, + "step": 2804 + }, + { + "epoch": 0.3908590538563367, + "grad_norm": 0.4461486339569092, + "learning_rate": 2.9420399322608637e-05, + "loss": 0.087799072265625, + "step": 2805 + }, + { + "epoch": 0.39099839754755106, + "grad_norm": 0.4231763184070587, + "learning_rate": 2.9412082388286916e-05, + "loss": 0.08214950561523438, + "step": 2806 + }, + { + "epoch": 0.39113774123876544, + "grad_norm": 0.47879841923713684, + "learning_rate": 2.940376336281537e-05, + "loss": 0.10944366455078125, + "step": 2807 + }, + { + "epoch": 0.3912770849299798, + "grad_norm": 0.4841216504573822, + "learning_rate": 2.9395442248042297e-05, + "loss": 0.0946493148803711, + "step": 2808 + }, + { + "epoch": 0.3914164286211942, + "grad_norm": 0.9635735750198364, + "learning_rate": 2.9387119045816453e-05, + "loss": 0.14458084106445312, + "step": 2809 + }, + { + "epoch": 0.39155577231240857, + "grad_norm": 0.7816514372825623, + "learning_rate": 2.9378793757987082e-05, + "loss": 0.12192153930664062, + "step": 2810 + }, + { + "epoch": 0.39169511600362295, + "grad_norm": 0.6650937795639038, + "learning_rate": 2.9370466386403843e-05, + "loss": 0.10123825073242188, + "step": 2811 + }, + { + "epoch": 0.3918344596948373, + "grad_norm": 0.5407100915908813, + "learning_rate": 2.9362136932916914e-05, + "loss": 0.0932159423828125, + "step": 2812 + }, + { + "epoch": 0.3919738033860517, + "grad_norm": 0.5805373787879944, + "learning_rate": 2.93538053993769e-05, + "loss": 0.11596870422363281, + "step": 2813 + }, + { + "epoch": 0.3921131470772661, + "grad_norm": 0.5293424129486084, + "learning_rate": 2.9345471787634873e-05, + "loss": 0.09319686889648438, + "step": 2814 + }, + { + "epoch": 0.39225249076848046, + "grad_norm": 0.44215843081474304, + "learning_rate": 2.933713609954238e-05, + "loss": 0.09727191925048828, + "step": 2815 + }, + { + "epoch": 0.39239183445969483, + "grad_norm": 0.7751300930976868, + "learning_rate": 2.9328798336951415e-05, + "loss": 0.09369087219238281, + "step": 2816 + }, + { + "epoch": 0.3925311781509092, + "grad_norm": 0.6871502995491028, + "learning_rate": 2.9320458501714437e-05, + "loss": 0.09773826599121094, + "step": 2817 + }, + { + "epoch": 0.3926705218421236, + "grad_norm": 0.3540394604206085, + "learning_rate": 2.931211659568437e-05, + "loss": 0.08000946044921875, + "step": 2818 + }, + { + "epoch": 0.39280986553333797, + "grad_norm": 1.134804606437683, + "learning_rate": 2.93037726207146e-05, + "loss": 0.0972757339477539, + "step": 2819 + }, + { + "epoch": 0.39294920922455234, + "grad_norm": 0.5506985783576965, + "learning_rate": 2.929542657865896e-05, + "loss": 0.1025390625, + "step": 2820 + }, + { + "epoch": 0.3930885529157667, + "grad_norm": 0.6221711039543152, + "learning_rate": 2.9287078471371747e-05, + "loss": 0.08147811889648438, + "step": 2821 + }, + { + "epoch": 0.3932278966069811, + "grad_norm": 0.8393847346305847, + "learning_rate": 2.927872830070773e-05, + "loss": 0.11625289916992188, + "step": 2822 + }, + { + "epoch": 0.3933672402981955, + "grad_norm": 0.39326179027557373, + "learning_rate": 2.9270376068522117e-05, + "loss": 0.10148906707763672, + "step": 2823 + }, + { + "epoch": 0.39350658398940985, + "grad_norm": 0.397451788187027, + "learning_rate": 2.9262021776670594e-05, + "loss": 0.09712409973144531, + "step": 2824 + }, + { + "epoch": 0.3936459276806243, + "grad_norm": 0.5791568756103516, + "learning_rate": 2.9253665427009283e-05, + "loss": 0.11098098754882812, + "step": 2825 + }, + { + "epoch": 0.39378527137183866, + "grad_norm": 1.1083165407180786, + "learning_rate": 2.9245307021394787e-05, + "loss": 0.12569808959960938, + "step": 2826 + }, + { + "epoch": 0.39392461506305304, + "grad_norm": 0.48420849442481995, + "learning_rate": 2.9236946561684133e-05, + "loss": 0.11586570739746094, + "step": 2827 + }, + { + "epoch": 0.3940639587542674, + "grad_norm": 0.5278047919273376, + "learning_rate": 2.922858404973484e-05, + "loss": 0.1070108413696289, + "step": 2828 + }, + { + "epoch": 0.3942033024454818, + "grad_norm": 0.5983778238296509, + "learning_rate": 2.922021948740487e-05, + "loss": 0.10889244079589844, + "step": 2829 + }, + { + "epoch": 0.3943426461366962, + "grad_norm": 0.7376692295074463, + "learning_rate": 2.9211852876552624e-05, + "loss": 0.10622215270996094, + "step": 2830 + }, + { + "epoch": 0.39448198982791055, + "grad_norm": 0.4195288419723511, + "learning_rate": 2.9203484219036986e-05, + "loss": 0.103424072265625, + "step": 2831 + }, + { + "epoch": 0.39462133351912493, + "grad_norm": 0.5125700831413269, + "learning_rate": 2.9195113516717267e-05, + "loss": 0.08629417419433594, + "step": 2832 + }, + { + "epoch": 0.3947606772103393, + "grad_norm": 0.4848294258117676, + "learning_rate": 2.9186740771453253e-05, + "loss": 0.09199714660644531, + "step": 2833 + }, + { + "epoch": 0.3949000209015537, + "grad_norm": 0.5505048632621765, + "learning_rate": 2.9178365985105182e-05, + "loss": 0.0869293212890625, + "step": 2834 + }, + { + "epoch": 0.39503936459276806, + "grad_norm": 0.6677141785621643, + "learning_rate": 2.916998915953373e-05, + "loss": 0.10593414306640625, + "step": 2835 + }, + { + "epoch": 0.39517870828398244, + "grad_norm": 0.4136343002319336, + "learning_rate": 2.916161029660004e-05, + "loss": 0.09665298461914062, + "step": 2836 + }, + { + "epoch": 0.3953180519751968, + "grad_norm": 0.4028083086013794, + "learning_rate": 2.915322939816571e-05, + "loss": 0.08940696716308594, + "step": 2837 + }, + { + "epoch": 0.3954573956664112, + "grad_norm": 0.45842161774635315, + "learning_rate": 2.9144846466092773e-05, + "loss": 0.09146785736083984, + "step": 2838 + }, + { + "epoch": 0.39559673935762557, + "grad_norm": 0.6173732280731201, + "learning_rate": 2.9136461502243735e-05, + "loss": 0.09614944458007812, + "step": 2839 + }, + { + "epoch": 0.39573608304883995, + "grad_norm": 0.34206080436706543, + "learning_rate": 2.9128074508481544e-05, + "loss": 0.08624649047851562, + "step": 2840 + }, + { + "epoch": 0.3958754267400543, + "grad_norm": 0.41365528106689453, + "learning_rate": 2.9119685486669587e-05, + "loss": 0.08660697937011719, + "step": 2841 + }, + { + "epoch": 0.3960147704312687, + "grad_norm": 0.5335280299186707, + "learning_rate": 2.911129443867173e-05, + "loss": 0.09888267517089844, + "step": 2842 + }, + { + "epoch": 0.3961541141224831, + "grad_norm": 0.6412641406059265, + "learning_rate": 2.9102901366352254e-05, + "loss": 0.09218597412109375, + "step": 2843 + }, + { + "epoch": 0.39629345781369746, + "grad_norm": 0.3396451473236084, + "learning_rate": 2.909450627157592e-05, + "loss": 0.07420539855957031, + "step": 2844 + }, + { + "epoch": 0.3964328015049119, + "grad_norm": 1.1378116607666016, + "learning_rate": 2.9086109156207926e-05, + "loss": 0.115447998046875, + "step": 2845 + }, + { + "epoch": 0.39657214519612627, + "grad_norm": 0.46904492378234863, + "learning_rate": 2.9077710022113918e-05, + "loss": 0.10770225524902344, + "step": 2846 + }, + { + "epoch": 0.39671148888734065, + "grad_norm": 0.7583391070365906, + "learning_rate": 2.906930887115999e-05, + "loss": 0.10854339599609375, + "step": 2847 + }, + { + "epoch": 0.396850832578555, + "grad_norm": 0.6225755214691162, + "learning_rate": 2.906090570521268e-05, + "loss": 0.09821701049804688, + "step": 2848 + }, + { + "epoch": 0.3969901762697694, + "grad_norm": 0.6595415472984314, + "learning_rate": 2.9052500526138994e-05, + "loss": 0.08878707885742188, + "step": 2849 + }, + { + "epoch": 0.3971295199609838, + "grad_norm": 0.5401531457901001, + "learning_rate": 2.904409333580636e-05, + "loss": 0.08434104919433594, + "step": 2850 + }, + { + "epoch": 0.39726886365219816, + "grad_norm": 0.29161539673805237, + "learning_rate": 2.903568413608267e-05, + "loss": 0.078460693359375, + "step": 2851 + }, + { + "epoch": 0.39740820734341253, + "grad_norm": 0.4754908084869385, + "learning_rate": 2.9027272928836248e-05, + "loss": 0.10128402709960938, + "step": 2852 + }, + { + "epoch": 0.3975475510346269, + "grad_norm": 0.8355169296264648, + "learning_rate": 2.901885971593588e-05, + "loss": 0.09772872924804688, + "step": 2853 + }, + { + "epoch": 0.3976868947258413, + "grad_norm": 0.8549501895904541, + "learning_rate": 2.901044449925079e-05, + "loss": 0.12134337425231934, + "step": 2854 + }, + { + "epoch": 0.39782623841705567, + "grad_norm": 0.46173352003097534, + "learning_rate": 2.9002027280650643e-05, + "loss": 0.09795951843261719, + "step": 2855 + }, + { + "epoch": 0.39796558210827004, + "grad_norm": 0.44166067242622375, + "learning_rate": 2.899360806200555e-05, + "loss": 0.09861946105957031, + "step": 2856 + }, + { + "epoch": 0.3981049257994844, + "grad_norm": 0.4352531433105469, + "learning_rate": 2.8985186845186077e-05, + "loss": 0.0917062759399414, + "step": 2857 + }, + { + "epoch": 0.3982442694906988, + "grad_norm": 0.360923171043396, + "learning_rate": 2.897676363206322e-05, + "loss": 0.0909423828125, + "step": 2858 + }, + { + "epoch": 0.3983836131819132, + "grad_norm": 0.8740816712379456, + "learning_rate": 2.8968338424508426e-05, + "loss": 0.09923553466796875, + "step": 2859 + }, + { + "epoch": 0.39852295687312755, + "grad_norm": 0.3143211305141449, + "learning_rate": 2.895991122439359e-05, + "loss": 0.08576393127441406, + "step": 2860 + }, + { + "epoch": 0.39866230056434193, + "grad_norm": 0.4825614094734192, + "learning_rate": 2.895148203359103e-05, + "loss": 0.101531982421875, + "step": 2861 + }, + { + "epoch": 0.3988016442555563, + "grad_norm": 0.4319736361503601, + "learning_rate": 2.8943050853973536e-05, + "loss": 0.08923149108886719, + "step": 2862 + }, + { + "epoch": 0.3989409879467707, + "grad_norm": 0.638459324836731, + "learning_rate": 2.893461768741431e-05, + "loss": 0.1082763671875, + "step": 2863 + }, + { + "epoch": 0.39908033163798506, + "grad_norm": 0.6601674556732178, + "learning_rate": 2.892618253578702e-05, + "loss": 0.10466575622558594, + "step": 2864 + }, + { + "epoch": 0.3992196753291995, + "grad_norm": 0.4916825294494629, + "learning_rate": 2.8917745400965755e-05, + "loss": 0.09776496887207031, + "step": 2865 + }, + { + "epoch": 0.3993590190204139, + "grad_norm": 0.31457051634788513, + "learning_rate": 2.8909306284825058e-05, + "loss": 0.07593250274658203, + "step": 2866 + }, + { + "epoch": 0.39949836271162825, + "grad_norm": 0.5395392775535583, + "learning_rate": 2.8900865189239907e-05, + "loss": 0.10656356811523438, + "step": 2867 + }, + { + "epoch": 0.39963770640284263, + "grad_norm": 0.39573755860328674, + "learning_rate": 2.889242211608572e-05, + "loss": 0.08907413482666016, + "step": 2868 + }, + { + "epoch": 0.399777050094057, + "grad_norm": 0.3772701025009155, + "learning_rate": 2.8883977067238363e-05, + "loss": 0.07821178436279297, + "step": 2869 + }, + { + "epoch": 0.3999163937852714, + "grad_norm": 0.35247790813446045, + "learning_rate": 2.887553004457412e-05, + "loss": 0.09283447265625, + "step": 2870 + }, + { + "epoch": 0.40005573747648576, + "grad_norm": 0.609028697013855, + "learning_rate": 2.8867081049969738e-05, + "loss": 0.11035537719726562, + "step": 2871 + }, + { + "epoch": 0.40019508116770014, + "grad_norm": 0.6808987855911255, + "learning_rate": 2.8858630085302378e-05, + "loss": 0.12310409545898438, + "step": 2872 + }, + { + "epoch": 0.4003344248589145, + "grad_norm": 0.34903618693351746, + "learning_rate": 2.885017715244966e-05, + "loss": 0.09306621551513672, + "step": 2873 + }, + { + "epoch": 0.4004737685501289, + "grad_norm": 0.4060359597206116, + "learning_rate": 2.884172225328964e-05, + "loss": 0.07036781311035156, + "step": 2874 + }, + { + "epoch": 0.40061311224134327, + "grad_norm": 0.6052448749542236, + "learning_rate": 2.883326538970079e-05, + "loss": 0.12880897521972656, + "step": 2875 + }, + { + "epoch": 0.40075245593255765, + "grad_norm": 0.7020437121391296, + "learning_rate": 2.8824806563562037e-05, + "loss": 0.12180423736572266, + "step": 2876 + }, + { + "epoch": 0.400891799623772, + "grad_norm": 0.5779969096183777, + "learning_rate": 2.8816345776752737e-05, + "loss": 0.08927345275878906, + "step": 2877 + }, + { + "epoch": 0.4010311433149864, + "grad_norm": 0.5955784320831299, + "learning_rate": 2.880788303115269e-05, + "loss": 0.10546684265136719, + "step": 2878 + }, + { + "epoch": 0.4011704870062008, + "grad_norm": 0.5054724812507629, + "learning_rate": 2.8799418328642116e-05, + "loss": 0.10929298400878906, + "step": 2879 + }, + { + "epoch": 0.40130983069741516, + "grad_norm": 0.8599271774291992, + "learning_rate": 2.879095167110169e-05, + "loss": 0.1128997802734375, + "step": 2880 + }, + { + "epoch": 0.40144917438862954, + "grad_norm": 0.9904197454452515, + "learning_rate": 2.8782483060412502e-05, + "loss": 0.11627578735351562, + "step": 2881 + }, + { + "epoch": 0.4015885180798439, + "grad_norm": 0.3755672574043274, + "learning_rate": 2.8774012498456083e-05, + "loss": 0.08917045593261719, + "step": 2882 + }, + { + "epoch": 0.4017278617710583, + "grad_norm": 0.7331463694572449, + "learning_rate": 2.8765539987114403e-05, + "loss": 0.09305000305175781, + "step": 2883 + }, + { + "epoch": 0.40186720546227267, + "grad_norm": 0.45406004786491394, + "learning_rate": 2.8757065528269855e-05, + "loss": 0.10400772094726562, + "step": 2884 + }, + { + "epoch": 0.4020065491534871, + "grad_norm": 0.45659419894218445, + "learning_rate": 2.8748589123805274e-05, + "loss": 0.08726119995117188, + "step": 2885 + }, + { + "epoch": 0.4021458928447015, + "grad_norm": 0.7280967831611633, + "learning_rate": 2.874011077560393e-05, + "loss": 0.11332321166992188, + "step": 2886 + }, + { + "epoch": 0.40228523653591586, + "grad_norm": 0.6195258498191833, + "learning_rate": 2.8731630485549504e-05, + "loss": 0.10857772827148438, + "step": 2887 + }, + { + "epoch": 0.40242458022713024, + "grad_norm": 0.3824032247066498, + "learning_rate": 2.8723148255526138e-05, + "loss": 0.07977867126464844, + "step": 2888 + }, + { + "epoch": 0.4025639239183446, + "grad_norm": 0.5416215062141418, + "learning_rate": 2.8714664087418374e-05, + "loss": 0.10330772399902344, + "step": 2889 + }, + { + "epoch": 0.402703267609559, + "grad_norm": 0.461029589176178, + "learning_rate": 2.8706177983111216e-05, + "loss": 0.0762643814086914, + "step": 2890 + }, + { + "epoch": 0.40284261130077337, + "grad_norm": 0.411589652299881, + "learning_rate": 2.869768994449007e-05, + "loss": 0.09348869323730469, + "step": 2891 + }, + { + "epoch": 0.40298195499198775, + "grad_norm": 0.3849911391735077, + "learning_rate": 2.86891999734408e-05, + "loss": 0.08840751647949219, + "step": 2892 + }, + { + "epoch": 0.4031212986832021, + "grad_norm": 0.5328934788703918, + "learning_rate": 2.868070807184966e-05, + "loss": 0.10837554931640625, + "step": 2893 + }, + { + "epoch": 0.4032606423744165, + "grad_norm": 0.49019646644592285, + "learning_rate": 2.867221424160338e-05, + "loss": 0.10293960571289062, + "step": 2894 + }, + { + "epoch": 0.4033999860656309, + "grad_norm": 0.4809171259403229, + "learning_rate": 2.866371848458908e-05, + "loss": 0.115081787109375, + "step": 2895 + }, + { + "epoch": 0.40353932975684526, + "grad_norm": 0.4847126007080078, + "learning_rate": 2.8655220802694334e-05, + "loss": 0.09662437438964844, + "step": 2896 + }, + { + "epoch": 0.40367867344805963, + "grad_norm": 0.5178582668304443, + "learning_rate": 2.864672119780713e-05, + "loss": 0.0999307632446289, + "step": 2897 + }, + { + "epoch": 0.403818017139274, + "grad_norm": 0.31332525610923767, + "learning_rate": 2.8638219671815873e-05, + "loss": 0.07782173156738281, + "step": 2898 + }, + { + "epoch": 0.4039573608304884, + "grad_norm": 0.5190490484237671, + "learning_rate": 2.8629716226609427e-05, + "loss": 0.10632514953613281, + "step": 2899 + }, + { + "epoch": 0.40409670452170277, + "grad_norm": 0.32359007000923157, + "learning_rate": 2.8621210864077053e-05, + "loss": 0.07534980773925781, + "step": 2900 + }, + { + "epoch": 0.40423604821291714, + "grad_norm": 0.6217821836471558, + "learning_rate": 2.861270358610845e-05, + "loss": 0.09606361389160156, + "step": 2901 + }, + { + "epoch": 0.4043753919041315, + "grad_norm": 0.34392407536506653, + "learning_rate": 2.8604194394593744e-05, + "loss": 0.08053970336914062, + "step": 2902 + }, + { + "epoch": 0.4045147355953459, + "grad_norm": 0.4522004723548889, + "learning_rate": 2.8595683291423476e-05, + "loss": 0.09348773956298828, + "step": 2903 + }, + { + "epoch": 0.4046540792865603, + "grad_norm": 0.7409149408340454, + "learning_rate": 2.858717027848863e-05, + "loss": 0.10047149658203125, + "step": 2904 + }, + { + "epoch": 0.4047934229777747, + "grad_norm": 0.41662681102752686, + "learning_rate": 2.857865535768059e-05, + "loss": 0.10126686096191406, + "step": 2905 + }, + { + "epoch": 0.4049327666689891, + "grad_norm": 0.43190208077430725, + "learning_rate": 2.8570138530891188e-05, + "loss": 0.08818435668945312, + "step": 2906 + }, + { + "epoch": 0.40507211036020346, + "grad_norm": 0.37472298741340637, + "learning_rate": 2.8561619800012657e-05, + "loss": 0.08783912658691406, + "step": 2907 + }, + { + "epoch": 0.40521145405141784, + "grad_norm": 0.4135981500148773, + "learning_rate": 2.8553099166937685e-05, + "loss": 0.0995779037475586, + "step": 2908 + }, + { + "epoch": 0.4053507977426322, + "grad_norm": 0.6982704997062683, + "learning_rate": 2.8544576633559335e-05, + "loss": 0.0909733772277832, + "step": 2909 + }, + { + "epoch": 0.4054901414338466, + "grad_norm": 0.9492422938346863, + "learning_rate": 2.853605220177114e-05, + "loss": 0.08944320678710938, + "step": 2910 + }, + { + "epoch": 0.405629485125061, + "grad_norm": 0.5189287066459656, + "learning_rate": 2.8527525873467022e-05, + "loss": 0.1103515625, + "step": 2911 + }, + { + "epoch": 0.40576882881627535, + "grad_norm": 0.40534958243370056, + "learning_rate": 2.851899765054135e-05, + "loss": 0.0743265151977539, + "step": 2912 + }, + { + "epoch": 0.40590817250748973, + "grad_norm": 0.43130603432655334, + "learning_rate": 2.8510467534888886e-05, + "loss": 0.07879257202148438, + "step": 2913 + }, + { + "epoch": 0.4060475161987041, + "grad_norm": 0.5563517212867737, + "learning_rate": 2.8501935528404833e-05, + "loss": 0.08867835998535156, + "step": 2914 + }, + { + "epoch": 0.4061868598899185, + "grad_norm": 0.6749670505523682, + "learning_rate": 2.849340163298481e-05, + "loss": 0.10833740234375, + "step": 2915 + }, + { + "epoch": 0.40632620358113286, + "grad_norm": 0.4775892496109009, + "learning_rate": 2.848486585052485e-05, + "loss": 0.097412109375, + "step": 2916 + }, + { + "epoch": 0.40646554727234724, + "grad_norm": 0.3751201331615448, + "learning_rate": 2.8476328182921414e-05, + "loss": 0.07197189331054688, + "step": 2917 + }, + { + "epoch": 0.4066048909635616, + "grad_norm": 0.44520506262779236, + "learning_rate": 2.8467788632071367e-05, + "loss": 0.10322952270507812, + "step": 2918 + }, + { + "epoch": 0.406744234654776, + "grad_norm": 0.6472240090370178, + "learning_rate": 2.845924719987202e-05, + "loss": 0.10959434509277344, + "step": 2919 + }, + { + "epoch": 0.40688357834599037, + "grad_norm": 0.6000158786773682, + "learning_rate": 2.8450703888221066e-05, + "loss": 0.08866500854492188, + "step": 2920 + }, + { + "epoch": 0.40702292203720475, + "grad_norm": 0.40244683623313904, + "learning_rate": 2.844215869901664e-05, + "loss": 0.08601570129394531, + "step": 2921 + }, + { + "epoch": 0.4071622657284191, + "grad_norm": 0.5499755144119263, + "learning_rate": 2.8433611634157293e-05, + "loss": 0.09284710884094238, + "step": 2922 + }, + { + "epoch": 0.4073016094196335, + "grad_norm": 0.6300138831138611, + "learning_rate": 2.8425062695541975e-05, + "loss": 0.09605598449707031, + "step": 2923 + }, + { + "epoch": 0.4074409531108479, + "grad_norm": 0.5823957920074463, + "learning_rate": 2.8416511885070085e-05, + "loss": 0.08244514465332031, + "step": 2924 + }, + { + "epoch": 0.4075802968020623, + "grad_norm": 0.43599772453308105, + "learning_rate": 2.84079592046414e-05, + "loss": 0.09096336364746094, + "step": 2925 + }, + { + "epoch": 0.4077196404932767, + "grad_norm": 0.44517889618873596, + "learning_rate": 2.839940465615614e-05, + "loss": 0.09456443786621094, + "step": 2926 + }, + { + "epoch": 0.40785898418449107, + "grad_norm": 0.5021443367004395, + "learning_rate": 2.8390848241514918e-05, + "loss": 0.09317588806152344, + "step": 2927 + }, + { + "epoch": 0.40799832787570545, + "grad_norm": 0.7567723989486694, + "learning_rate": 2.8382289962618793e-05, + "loss": 0.12032699584960938, + "step": 2928 + }, + { + "epoch": 0.4081376715669198, + "grad_norm": 0.589116096496582, + "learning_rate": 2.8373729821369206e-05, + "loss": 0.10871315002441406, + "step": 2929 + }, + { + "epoch": 0.4082770152581342, + "grad_norm": 0.4995080828666687, + "learning_rate": 2.8365167819668027e-05, + "loss": 0.1036977767944336, + "step": 2930 + }, + { + "epoch": 0.4084163589493486, + "grad_norm": 0.54853755235672, + "learning_rate": 2.835660395941754e-05, + "loss": 0.11352920532226562, + "step": 2931 + }, + { + "epoch": 0.40855570264056296, + "grad_norm": 0.5054824352264404, + "learning_rate": 2.8348038242520438e-05, + "loss": 0.08898448944091797, + "step": 2932 + }, + { + "epoch": 0.40869504633177733, + "grad_norm": 0.40900272130966187, + "learning_rate": 2.833947067087983e-05, + "loss": 0.09438467025756836, + "step": 2933 + }, + { + "epoch": 0.4088343900229917, + "grad_norm": 0.5690659880638123, + "learning_rate": 2.833090124639923e-05, + "loss": 0.10432624816894531, + "step": 2934 + }, + { + "epoch": 0.4089737337142061, + "grad_norm": 0.3579818904399872, + "learning_rate": 2.832232997098257e-05, + "loss": 0.08412361145019531, + "step": 2935 + }, + { + "epoch": 0.40911307740542047, + "grad_norm": 0.6443154215812683, + "learning_rate": 2.831375684653419e-05, + "loss": 0.11402416229248047, + "step": 2936 + }, + { + "epoch": 0.40925242109663484, + "grad_norm": 0.812432587146759, + "learning_rate": 2.8305181874958844e-05, + "loss": 0.11894989013671875, + "step": 2937 + }, + { + "epoch": 0.4093917647878492, + "grad_norm": 0.578347384929657, + "learning_rate": 2.82966050581617e-05, + "loss": 0.10814285278320312, + "step": 2938 + }, + { + "epoch": 0.4095311084790636, + "grad_norm": 0.3618258237838745, + "learning_rate": 2.8288026398048326e-05, + "loss": 0.09039592742919922, + "step": 2939 + }, + { + "epoch": 0.409670452170278, + "grad_norm": 0.4175632894039154, + "learning_rate": 2.8279445896524705e-05, + "loss": 0.09069633483886719, + "step": 2940 + }, + { + "epoch": 0.40980979586149235, + "grad_norm": 0.48554718494415283, + "learning_rate": 2.8270863555497227e-05, + "loss": 0.08725166320800781, + "step": 2941 + }, + { + "epoch": 0.40994913955270673, + "grad_norm": 0.46895942091941833, + "learning_rate": 2.82622793768727e-05, + "loss": 0.09845161437988281, + "step": 2942 + }, + { + "epoch": 0.4100884832439211, + "grad_norm": 0.5636808276176453, + "learning_rate": 2.8253693362558322e-05, + "loss": 0.09823799133300781, + "step": 2943 + }, + { + "epoch": 0.4102278269351355, + "grad_norm": 0.4987310469150543, + "learning_rate": 2.8245105514461712e-05, + "loss": 0.09772872924804688, + "step": 2944 + }, + { + "epoch": 0.4103671706263499, + "grad_norm": 0.4420800507068634, + "learning_rate": 2.82365158344909e-05, + "loss": 0.09976482391357422, + "step": 2945 + }, + { + "epoch": 0.4105065143175643, + "grad_norm": 0.7441503405570984, + "learning_rate": 2.822792432455431e-05, + "loss": 0.11904525756835938, + "step": 2946 + }, + { + "epoch": 0.4106458580087787, + "grad_norm": 0.4920496642589569, + "learning_rate": 2.8219330986560783e-05, + "loss": 0.10842132568359375, + "step": 2947 + }, + { + "epoch": 0.41078520169999305, + "grad_norm": 0.5731411576271057, + "learning_rate": 2.821073582241956e-05, + "loss": 0.09579086303710938, + "step": 2948 + }, + { + "epoch": 0.41092454539120743, + "grad_norm": 0.5805268883705139, + "learning_rate": 2.820213883404029e-05, + "loss": 0.09607505798339844, + "step": 2949 + }, + { + "epoch": 0.4110638890824218, + "grad_norm": 0.5276578664779663, + "learning_rate": 2.8193540023333033e-05, + "loss": 0.07748985290527344, + "step": 2950 + }, + { + "epoch": 0.4112032327736362, + "grad_norm": 0.3648580312728882, + "learning_rate": 2.818493939220824e-05, + "loss": 0.10042381286621094, + "step": 2951 + }, + { + "epoch": 0.41134257646485056, + "grad_norm": 0.8306750655174255, + "learning_rate": 2.8176336942576785e-05, + "loss": 0.09855461120605469, + "step": 2952 + }, + { + "epoch": 0.41148192015606494, + "grad_norm": 0.5308519005775452, + "learning_rate": 2.816773267634993e-05, + "loss": 0.09615898132324219, + "step": 2953 + }, + { + "epoch": 0.4116212638472793, + "grad_norm": 0.5333397388458252, + "learning_rate": 2.8159126595439344e-05, + "loss": 0.10750579833984375, + "step": 2954 + }, + { + "epoch": 0.4117606075384937, + "grad_norm": 0.284778356552124, + "learning_rate": 2.8150518701757104e-05, + "loss": 0.06998634338378906, + "step": 2955 + }, + { + "epoch": 0.4118999512297081, + "grad_norm": 0.5066236257553101, + "learning_rate": 2.814190899721569e-05, + "loss": 0.09974861145019531, + "step": 2956 + }, + { + "epoch": 0.41203929492092245, + "grad_norm": 0.5990965962409973, + "learning_rate": 2.8133297483727972e-05, + "loss": 0.08843231201171875, + "step": 2957 + }, + { + "epoch": 0.4121786386121368, + "grad_norm": 0.4448465406894684, + "learning_rate": 2.8124684163207252e-05, + "loss": 0.07489204406738281, + "step": 2958 + }, + { + "epoch": 0.4123179823033512, + "grad_norm": 0.5233722925186157, + "learning_rate": 2.8116069037567187e-05, + "loss": 0.09121227264404297, + "step": 2959 + }, + { + "epoch": 0.4124573259945656, + "grad_norm": 1.1697279214859009, + "learning_rate": 2.8107452108721887e-05, + "loss": 0.11814117431640625, + "step": 2960 + }, + { + "epoch": 0.41259666968577996, + "grad_norm": 0.422964483499527, + "learning_rate": 2.809883337858582e-05, + "loss": 0.09369468688964844, + "step": 2961 + }, + { + "epoch": 0.41273601337699434, + "grad_norm": 0.8539126515388489, + "learning_rate": 2.8090212849073877e-05, + "loss": 0.09987640380859375, + "step": 2962 + }, + { + "epoch": 0.4128753570682087, + "grad_norm": 0.6519345045089722, + "learning_rate": 2.8081590522101342e-05, + "loss": 0.09702396392822266, + "step": 2963 + }, + { + "epoch": 0.4130147007594231, + "grad_norm": 0.6482611298561096, + "learning_rate": 2.8072966399583897e-05, + "loss": 0.09321784973144531, + "step": 2964 + }, + { + "epoch": 0.41315404445063747, + "grad_norm": 0.5604673624038696, + "learning_rate": 2.8064340483437625e-05, + "loss": 0.10479736328125, + "step": 2965 + }, + { + "epoch": 0.4132933881418519, + "grad_norm": 0.4709319472312927, + "learning_rate": 2.8055712775579012e-05, + "loss": 0.08739089965820312, + "step": 2966 + }, + { + "epoch": 0.4134327318330663, + "grad_norm": 0.5740588307380676, + "learning_rate": 2.8047083277924935e-05, + "loss": 0.09753608703613281, + "step": 2967 + }, + { + "epoch": 0.41357207552428066, + "grad_norm": 0.4532027542591095, + "learning_rate": 2.803845199239267e-05, + "loss": 0.09313011169433594, + "step": 2968 + }, + { + "epoch": 0.41371141921549504, + "grad_norm": 0.436908483505249, + "learning_rate": 2.8029818920899902e-05, + "loss": 0.08924674987792969, + "step": 2969 + }, + { + "epoch": 0.4138507629067094, + "grad_norm": 0.59698885679245, + "learning_rate": 2.8021184065364684e-05, + "loss": 0.07694435119628906, + "step": 2970 + }, + { + "epoch": 0.4139901065979238, + "grad_norm": 0.5712708830833435, + "learning_rate": 2.8012547427705497e-05, + "loss": 0.098846435546875, + "step": 2971 + }, + { + "epoch": 0.41412945028913817, + "grad_norm": 0.3588418662548065, + "learning_rate": 2.80039090098412e-05, + "loss": 0.10082817077636719, + "step": 2972 + }, + { + "epoch": 0.41426879398035255, + "grad_norm": 0.38131189346313477, + "learning_rate": 2.7995268813691052e-05, + "loss": 0.08627891540527344, + "step": 2973 + }, + { + "epoch": 0.4144081376715669, + "grad_norm": 0.5846775770187378, + "learning_rate": 2.7986626841174717e-05, + "loss": 0.10328483581542969, + "step": 2974 + }, + { + "epoch": 0.4145474813627813, + "grad_norm": 0.41094911098480225, + "learning_rate": 2.7977983094212224e-05, + "loss": 0.09392166137695312, + "step": 2975 + }, + { + "epoch": 0.4146868250539957, + "grad_norm": 0.5093223452568054, + "learning_rate": 2.7969337574724033e-05, + "loss": 0.08341789245605469, + "step": 2976 + }, + { + "epoch": 0.41482616874521006, + "grad_norm": 0.4492948651313782, + "learning_rate": 2.7960690284630976e-05, + "loss": 0.07980918884277344, + "step": 2977 + }, + { + "epoch": 0.41496551243642443, + "grad_norm": 0.4902838468551636, + "learning_rate": 2.7952041225854283e-05, + "loss": 0.11735153198242188, + "step": 2978 + }, + { + "epoch": 0.4151048561276388, + "grad_norm": 0.6056779623031616, + "learning_rate": 2.7943390400315577e-05, + "loss": 0.11848068237304688, + "step": 2979 + }, + { + "epoch": 0.4152441998188532, + "grad_norm": 0.45564889907836914, + "learning_rate": 2.793473780993688e-05, + "loss": 0.10610771179199219, + "step": 2980 + }, + { + "epoch": 0.41538354351006757, + "grad_norm": 0.4448404908180237, + "learning_rate": 2.792608345664059e-05, + "loss": 0.08258342742919922, + "step": 2981 + }, + { + "epoch": 0.41552288720128194, + "grad_norm": 0.4752736985683441, + "learning_rate": 2.791742734234951e-05, + "loss": 0.10558795928955078, + "step": 2982 + }, + { + "epoch": 0.4156622308924963, + "grad_norm": 0.6635417342185974, + "learning_rate": 2.7908769468986837e-05, + "loss": 0.09200286865234375, + "step": 2983 + }, + { + "epoch": 0.4158015745837107, + "grad_norm": 0.37455153465270996, + "learning_rate": 2.7900109838476138e-05, + "loss": 0.09337043762207031, + "step": 2984 + }, + { + "epoch": 0.4159409182749251, + "grad_norm": 0.5683290958404541, + "learning_rate": 2.789144845274141e-05, + "loss": 0.09326171875, + "step": 2985 + }, + { + "epoch": 0.4160802619661395, + "grad_norm": 0.37762451171875, + "learning_rate": 2.7882785313706996e-05, + "loss": 0.08819389343261719, + "step": 2986 + }, + { + "epoch": 0.4162196056573539, + "grad_norm": 0.5106504559516907, + "learning_rate": 2.787412042329765e-05, + "loss": 0.08465003967285156, + "step": 2987 + }, + { + "epoch": 0.41635894934856826, + "grad_norm": 0.27853456139564514, + "learning_rate": 2.7865453783438517e-05, + "loss": 0.0756072998046875, + "step": 2988 + }, + { + "epoch": 0.41649829303978264, + "grad_norm": 0.32106247544288635, + "learning_rate": 2.785678539605512e-05, + "loss": 0.07726287841796875, + "step": 2989 + }, + { + "epoch": 0.416637636730997, + "grad_norm": 0.4411569833755493, + "learning_rate": 2.7848115263073386e-05, + "loss": 0.09566879272460938, + "step": 2990 + }, + { + "epoch": 0.4167769804222114, + "grad_norm": 0.7195849418640137, + "learning_rate": 2.7839443386419613e-05, + "loss": 0.09708404541015625, + "step": 2991 + }, + { + "epoch": 0.4169163241134258, + "grad_norm": 0.5881362557411194, + "learning_rate": 2.7830769768020504e-05, + "loss": 0.10743904113769531, + "step": 2992 + }, + { + "epoch": 0.41705566780464015, + "grad_norm": 0.778492271900177, + "learning_rate": 2.782209440980312e-05, + "loss": 0.09187507629394531, + "step": 2993 + }, + { + "epoch": 0.41719501149585453, + "grad_norm": 0.26583850383758545, + "learning_rate": 2.781341731369495e-05, + "loss": 0.07072830200195312, + "step": 2994 + }, + { + "epoch": 0.4173343551870689, + "grad_norm": 0.5877175331115723, + "learning_rate": 2.780473848162383e-05, + "loss": 0.09725379943847656, + "step": 2995 + }, + { + "epoch": 0.4174736988782833, + "grad_norm": 0.6680020689964294, + "learning_rate": 2.779605791551801e-05, + "loss": 0.10860252380371094, + "step": 2996 + }, + { + "epoch": 0.41761304256949766, + "grad_norm": 0.46664339303970337, + "learning_rate": 2.778737561730611e-05, + "loss": 0.102874755859375, + "step": 2997 + }, + { + "epoch": 0.41775238626071204, + "grad_norm": 0.6650407910346985, + "learning_rate": 2.7778691588917127e-05, + "loss": 0.0956888198852539, + "step": 2998 + }, + { + "epoch": 0.4178917299519264, + "grad_norm": 0.5131734013557434, + "learning_rate": 2.777000583228047e-05, + "loss": 0.09196090698242188, + "step": 2999 + }, + { + "epoch": 0.4180310736431408, + "grad_norm": 0.7522026896476746, + "learning_rate": 2.776131834932591e-05, + "loss": 0.0962371826171875, + "step": 3000 + }, + { + "epoch": 0.41817041733435517, + "grad_norm": 0.6152926087379456, + "learning_rate": 2.7752629141983605e-05, + "loss": 0.08402442932128906, + "step": 3001 + }, + { + "epoch": 0.41830976102556955, + "grad_norm": 0.38077259063720703, + "learning_rate": 2.77439382121841e-05, + "loss": 0.06903266906738281, + "step": 3002 + }, + { + "epoch": 0.4184491047167839, + "grad_norm": 0.40338584780693054, + "learning_rate": 2.773524556185832e-05, + "loss": 0.08277130126953125, + "step": 3003 + }, + { + "epoch": 0.4185884484079983, + "grad_norm": 0.2582605183124542, + "learning_rate": 2.7726551192937577e-05, + "loss": 0.0741729736328125, + "step": 3004 + }, + { + "epoch": 0.4187277920992127, + "grad_norm": 0.4311891794204712, + "learning_rate": 2.7717855107353557e-05, + "loss": 0.09609794616699219, + "step": 3005 + }, + { + "epoch": 0.4188671357904271, + "grad_norm": 0.37057438492774963, + "learning_rate": 2.770915730703834e-05, + "loss": 0.093841552734375, + "step": 3006 + }, + { + "epoch": 0.4190064794816415, + "grad_norm": 0.5573936104774475, + "learning_rate": 2.7700457793924357e-05, + "loss": 0.09706497192382812, + "step": 3007 + }, + { + "epoch": 0.41914582317285587, + "grad_norm": 0.7417018413543701, + "learning_rate": 2.7691756569944473e-05, + "loss": 0.10038185119628906, + "step": 3008 + }, + { + "epoch": 0.41928516686407025, + "grad_norm": 0.549315869808197, + "learning_rate": 2.7683053637031874e-05, + "loss": 0.09817314147949219, + "step": 3009 + }, + { + "epoch": 0.4194245105552846, + "grad_norm": 0.446477472782135, + "learning_rate": 2.7674348997120174e-05, + "loss": 0.09652328491210938, + "step": 3010 + }, + { + "epoch": 0.419563854246499, + "grad_norm": 0.6101611852645874, + "learning_rate": 2.7665642652143327e-05, + "loss": 0.08949470520019531, + "step": 3011 + }, + { + "epoch": 0.4197031979377134, + "grad_norm": 0.6454870104789734, + "learning_rate": 2.7656934604035694e-05, + "loss": 0.09646797180175781, + "step": 3012 + }, + { + "epoch": 0.41984254162892776, + "grad_norm": 0.6989848017692566, + "learning_rate": 2.7648224854732005e-05, + "loss": 0.10060501098632812, + "step": 3013 + }, + { + "epoch": 0.41998188532014213, + "grad_norm": 0.33093732595443726, + "learning_rate": 2.7639513406167363e-05, + "loss": 0.07634925842285156, + "step": 3014 + }, + { + "epoch": 0.4201212290113565, + "grad_norm": 0.49371328949928284, + "learning_rate": 2.763080026027726e-05, + "loss": 0.09195899963378906, + "step": 3015 + }, + { + "epoch": 0.4202605727025709, + "grad_norm": 0.4125482141971588, + "learning_rate": 2.762208541899755e-05, + "loss": 0.08239555358886719, + "step": 3016 + }, + { + "epoch": 0.42039991639378527, + "grad_norm": 0.33943620324134827, + "learning_rate": 2.761336888426448e-05, + "loss": 0.08311843872070312, + "step": 3017 + }, + { + "epoch": 0.42053926008499964, + "grad_norm": 0.7574641704559326, + "learning_rate": 2.7604650658014648e-05, + "loss": 0.08667945861816406, + "step": 3018 + }, + { + "epoch": 0.420678603776214, + "grad_norm": 0.5255314111709595, + "learning_rate": 2.7595930742185068e-05, + "loss": 0.0995626449584961, + "step": 3019 + }, + { + "epoch": 0.4208179474674284, + "grad_norm": 0.6734025478363037, + "learning_rate": 2.758720913871309e-05, + "loss": 0.11497783660888672, + "step": 3020 + }, + { + "epoch": 0.4209572911586428, + "grad_norm": 0.6795871257781982, + "learning_rate": 2.7578485849536464e-05, + "loss": 0.09479141235351562, + "step": 3021 + }, + { + "epoch": 0.42109663484985715, + "grad_norm": 0.5725025534629822, + "learning_rate": 2.7569760876593298e-05, + "loss": 0.10504341125488281, + "step": 3022 + }, + { + "epoch": 0.42123597854107153, + "grad_norm": 0.46840617060661316, + "learning_rate": 2.7561034221822085e-05, + "loss": 0.09742355346679688, + "step": 3023 + }, + { + "epoch": 0.4213753222322859, + "grad_norm": 1.137412190437317, + "learning_rate": 2.7552305887161693e-05, + "loss": 0.14860916137695312, + "step": 3024 + }, + { + "epoch": 0.4215146659235003, + "grad_norm": 0.9785850644111633, + "learning_rate": 2.754357587455135e-05, + "loss": 0.10292243957519531, + "step": 3025 + }, + { + "epoch": 0.4216540096147147, + "grad_norm": 0.5311426520347595, + "learning_rate": 2.7534844185930674e-05, + "loss": 0.09931373596191406, + "step": 3026 + }, + { + "epoch": 0.4217933533059291, + "grad_norm": 0.5054702162742615, + "learning_rate": 2.7526110823239647e-05, + "loss": 0.11260032653808594, + "step": 3027 + }, + { + "epoch": 0.4219326969971435, + "grad_norm": 0.5442692637443542, + "learning_rate": 2.7517375788418613e-05, + "loss": 0.10688591003417969, + "step": 3028 + }, + { + "epoch": 0.42207204068835785, + "grad_norm": 0.4213983416557312, + "learning_rate": 2.7508639083408306e-05, + "loss": 0.08884620666503906, + "step": 3029 + }, + { + "epoch": 0.42221138437957223, + "grad_norm": 0.7086731195449829, + "learning_rate": 2.749990071014982e-05, + "loss": 0.10064220428466797, + "step": 3030 + }, + { + "epoch": 0.4223507280707866, + "grad_norm": 0.8342176079750061, + "learning_rate": 2.749116067058462e-05, + "loss": 0.11626434326171875, + "step": 3031 + }, + { + "epoch": 0.422490071762001, + "grad_norm": 0.9133973717689514, + "learning_rate": 2.748241896665455e-05, + "loss": 0.10024070739746094, + "step": 3032 + }, + { + "epoch": 0.42262941545321536, + "grad_norm": 0.3978542983531952, + "learning_rate": 2.7473675600301807e-05, + "loss": 0.10305404663085938, + "step": 3033 + }, + { + "epoch": 0.42276875914442974, + "grad_norm": 0.39516597986221313, + "learning_rate": 2.7464930573468973e-05, + "loss": 0.07706260681152344, + "step": 3034 + }, + { + "epoch": 0.4229081028356441, + "grad_norm": 0.618717610836029, + "learning_rate": 2.7456183888098995e-05, + "loss": 0.07825088500976562, + "step": 3035 + }, + { + "epoch": 0.4230474465268585, + "grad_norm": 0.4538498818874359, + "learning_rate": 2.7447435546135186e-05, + "loss": 0.099090576171875, + "step": 3036 + }, + { + "epoch": 0.4231867902180729, + "grad_norm": 0.7334125638008118, + "learning_rate": 2.7438685549521228e-05, + "loss": 0.12238311767578125, + "step": 3037 + }, + { + "epoch": 0.42332613390928725, + "grad_norm": 0.7368573546409607, + "learning_rate": 2.742993390020116e-05, + "loss": 0.09494590759277344, + "step": 3038 + }, + { + "epoch": 0.4234654776005016, + "grad_norm": 0.5485541820526123, + "learning_rate": 2.742118060011941e-05, + "loss": 0.09022903442382812, + "step": 3039 + }, + { + "epoch": 0.423604821291716, + "grad_norm": 0.4637262523174286, + "learning_rate": 2.7412425651220767e-05, + "loss": 0.08284187316894531, + "step": 3040 + }, + { + "epoch": 0.4237441649829304, + "grad_norm": 0.6901918053627014, + "learning_rate": 2.7403669055450363e-05, + "loss": 0.10400009155273438, + "step": 3041 + }, + { + "epoch": 0.42388350867414476, + "grad_norm": 0.5572826266288757, + "learning_rate": 2.739491081475373e-05, + "loss": 0.09828376770019531, + "step": 3042 + }, + { + "epoch": 0.42402285236535914, + "grad_norm": 0.38089942932128906, + "learning_rate": 2.738615093107674e-05, + "loss": 0.10228347778320312, + "step": 3043 + }, + { + "epoch": 0.4241621960565735, + "grad_norm": 0.5142936110496521, + "learning_rate": 2.7377389406365642e-05, + "loss": 0.10459136962890625, + "step": 3044 + }, + { + "epoch": 0.4243015397477879, + "grad_norm": 0.4282490909099579, + "learning_rate": 2.7368626242567046e-05, + "loss": 0.0811920166015625, + "step": 3045 + }, + { + "epoch": 0.4244408834390023, + "grad_norm": 0.4213114380836487, + "learning_rate": 2.735986144162793e-05, + "loss": 0.08380317687988281, + "step": 3046 + }, + { + "epoch": 0.4245802271302167, + "grad_norm": 0.40901559591293335, + "learning_rate": 2.735109500549563e-05, + "loss": 0.08199310302734375, + "step": 3047 + }, + { + "epoch": 0.4247195708214311, + "grad_norm": 0.5306272506713867, + "learning_rate": 2.7342326936117847e-05, + "loss": 0.09329795837402344, + "step": 3048 + }, + { + "epoch": 0.42485891451264546, + "grad_norm": 0.4718775153160095, + "learning_rate": 2.7333557235442648e-05, + "loss": 0.084442138671875, + "step": 3049 + }, + { + "epoch": 0.42499825820385984, + "grad_norm": 0.544550895690918, + "learning_rate": 2.732478590541846e-05, + "loss": 0.08545494079589844, + "step": 3050 + }, + { + "epoch": 0.4251376018950742, + "grad_norm": 0.5700148940086365, + "learning_rate": 2.7316012947994067e-05, + "loss": 0.108673095703125, + "step": 3051 + }, + { + "epoch": 0.4252769455862886, + "grad_norm": 0.4546658396720886, + "learning_rate": 2.730723836511863e-05, + "loss": 0.07529973983764648, + "step": 3052 + }, + { + "epoch": 0.42541628927750297, + "grad_norm": 0.4239502549171448, + "learning_rate": 2.729846215874165e-05, + "loss": 0.08328437805175781, + "step": 3053 + }, + { + "epoch": 0.42555563296871735, + "grad_norm": 0.5185831785202026, + "learning_rate": 2.728968433081301e-05, + "loss": 0.10678863525390625, + "step": 3054 + }, + { + "epoch": 0.4256949766599317, + "grad_norm": 0.5435839295387268, + "learning_rate": 2.728090488328293e-05, + "loss": 0.08280467987060547, + "step": 3055 + }, + { + "epoch": 0.4258343203511461, + "grad_norm": 0.3610723614692688, + "learning_rate": 2.727212381810202e-05, + "loss": 0.08401012420654297, + "step": 3056 + }, + { + "epoch": 0.4259736640423605, + "grad_norm": 0.5807370543479919, + "learning_rate": 2.7263341137221217e-05, + "loss": 0.10148239135742188, + "step": 3057 + }, + { + "epoch": 0.42611300773357486, + "grad_norm": 0.6016828417778015, + "learning_rate": 2.725455684259185e-05, + "loss": 0.10858154296875, + "step": 3058 + }, + { + "epoch": 0.42625235142478923, + "grad_norm": 0.424111008644104, + "learning_rate": 2.724577093616556e-05, + "loss": 0.08702659606933594, + "step": 3059 + }, + { + "epoch": 0.4263916951160036, + "grad_norm": 0.4403296411037445, + "learning_rate": 2.72369834198944e-05, + "loss": 0.08836936950683594, + "step": 3060 + }, + { + "epoch": 0.426531038807218, + "grad_norm": 0.3959251046180725, + "learning_rate": 2.7228194295730747e-05, + "loss": 0.08930587768554688, + "step": 3061 + }, + { + "epoch": 0.42667038249843237, + "grad_norm": 0.5231762528419495, + "learning_rate": 2.7219403565627342e-05, + "loss": 0.11045646667480469, + "step": 3062 + }, + { + "epoch": 0.42680972618964674, + "grad_norm": 0.750094473361969, + "learning_rate": 2.721061123153729e-05, + "loss": 0.09348106384277344, + "step": 3063 + }, + { + "epoch": 0.4269490698808611, + "grad_norm": 0.4318065345287323, + "learning_rate": 2.720181729541404e-05, + "loss": 0.09076499938964844, + "step": 3064 + }, + { + "epoch": 0.4270884135720755, + "grad_norm": 0.7475475072860718, + "learning_rate": 2.719302175921141e-05, + "loss": 0.10673713684082031, + "step": 3065 + }, + { + "epoch": 0.42722775726328993, + "grad_norm": 0.7964162230491638, + "learning_rate": 2.7184224624883566e-05, + "loss": 0.12247848510742188, + "step": 3066 + }, + { + "epoch": 0.4273671009545043, + "grad_norm": 0.3446725010871887, + "learning_rate": 2.7175425894385026e-05, + "loss": 0.08147621154785156, + "step": 3067 + }, + { + "epoch": 0.4275064446457187, + "grad_norm": 0.4002397656440735, + "learning_rate": 2.7166625569670664e-05, + "loss": 0.0934906005859375, + "step": 3068 + }, + { + "epoch": 0.42764578833693306, + "grad_norm": 0.40373876690864563, + "learning_rate": 2.715782365269573e-05, + "loss": 0.07409477233886719, + "step": 3069 + }, + { + "epoch": 0.42778513202814744, + "grad_norm": 0.4212893843650818, + "learning_rate": 2.714902014541579e-05, + "loss": 0.0972585678100586, + "step": 3070 + }, + { + "epoch": 0.4279244757193618, + "grad_norm": 0.3323911726474762, + "learning_rate": 2.7140215049786783e-05, + "loss": 0.0923004150390625, + "step": 3071 + }, + { + "epoch": 0.4280638194105762, + "grad_norm": 0.4203546941280365, + "learning_rate": 2.7131408367765017e-05, + "loss": 0.07325935363769531, + "step": 3072 + }, + { + "epoch": 0.4282031631017906, + "grad_norm": 0.5236432552337646, + "learning_rate": 2.7122600101307113e-05, + "loss": 0.11879539489746094, + "step": 3073 + }, + { + "epoch": 0.42834250679300495, + "grad_norm": 0.3955090641975403, + "learning_rate": 2.7113790252370093e-05, + "loss": 0.0804595947265625, + "step": 3074 + }, + { + "epoch": 0.42848185048421933, + "grad_norm": 0.5941450595855713, + "learning_rate": 2.710497882291127e-05, + "loss": 0.09859275817871094, + "step": 3075 + }, + { + "epoch": 0.4286211941754337, + "grad_norm": 0.5268476009368896, + "learning_rate": 2.7096165814888373e-05, + "loss": 0.12664031982421875, + "step": 3076 + }, + { + "epoch": 0.4287605378666481, + "grad_norm": 0.6365874409675598, + "learning_rate": 2.7087351230259442e-05, + "loss": 0.10497283935546875, + "step": 3077 + }, + { + "epoch": 0.42889988155786246, + "grad_norm": 0.5890157222747803, + "learning_rate": 2.7078535070982873e-05, + "loss": 0.09531974792480469, + "step": 3078 + }, + { + "epoch": 0.42903922524907684, + "grad_norm": 0.48273563385009766, + "learning_rate": 2.7069717339017415e-05, + "loss": 0.08882713317871094, + "step": 3079 + }, + { + "epoch": 0.4291785689402912, + "grad_norm": 0.41555437445640564, + "learning_rate": 2.706089803632217e-05, + "loss": 0.09592056274414062, + "step": 3080 + }, + { + "epoch": 0.4293179126315056, + "grad_norm": 0.3641863763332367, + "learning_rate": 2.7052077164856584e-05, + "loss": 0.07898998260498047, + "step": 3081 + }, + { + "epoch": 0.42945725632271997, + "grad_norm": 0.5315077900886536, + "learning_rate": 2.7043254726580457e-05, + "loss": 0.10401248931884766, + "step": 3082 + }, + { + "epoch": 0.42959660001393435, + "grad_norm": 0.5078031420707703, + "learning_rate": 2.7034430723453925e-05, + "loss": 0.09507942199707031, + "step": 3083 + }, + { + "epoch": 0.4297359437051487, + "grad_norm": 0.7888258099555969, + "learning_rate": 2.7025605157437483e-05, + "loss": 0.09615325927734375, + "step": 3084 + }, + { + "epoch": 0.4298752873963631, + "grad_norm": 0.7578043341636658, + "learning_rate": 2.701677803049198e-05, + "loss": 0.10435009002685547, + "step": 3085 + }, + { + "epoch": 0.43001463108757754, + "grad_norm": 0.34007367491722107, + "learning_rate": 2.700794934457859e-05, + "loss": 0.09197139739990234, + "step": 3086 + }, + { + "epoch": 0.4301539747787919, + "grad_norm": 0.5391762852668762, + "learning_rate": 2.6999119101658854e-05, + "loss": 0.08833503723144531, + "step": 3087 + }, + { + "epoch": 0.4302933184700063, + "grad_norm": 0.6036341190338135, + "learning_rate": 2.699028730369464e-05, + "loss": 0.09738922119140625, + "step": 3088 + }, + { + "epoch": 0.43043266216122067, + "grad_norm": 0.6305001378059387, + "learning_rate": 2.6981453952648178e-05, + "loss": 0.09507560729980469, + "step": 3089 + }, + { + "epoch": 0.43057200585243505, + "grad_norm": 0.45831966400146484, + "learning_rate": 2.6972619050482044e-05, + "loss": 0.07948017120361328, + "step": 3090 + }, + { + "epoch": 0.4307113495436494, + "grad_norm": 0.5339564681053162, + "learning_rate": 2.6963782599159135e-05, + "loss": 0.0931243896484375, + "step": 3091 + }, + { + "epoch": 0.4308506932348638, + "grad_norm": 0.6365578174591064, + "learning_rate": 2.6954944600642724e-05, + "loss": 0.10805892944335938, + "step": 3092 + }, + { + "epoch": 0.4309900369260782, + "grad_norm": 0.3705732524394989, + "learning_rate": 2.6946105056896406e-05, + "loss": 0.08967018127441406, + "step": 3093 + }, + { + "epoch": 0.43112938061729256, + "grad_norm": 0.6176947951316833, + "learning_rate": 2.693726396988413e-05, + "loss": 0.07723426818847656, + "step": 3094 + }, + { + "epoch": 0.43126872430850693, + "grad_norm": 0.48498016595840454, + "learning_rate": 2.6928421341570178e-05, + "loss": 0.09575462341308594, + "step": 3095 + }, + { + "epoch": 0.4314080679997213, + "grad_norm": 0.529654324054718, + "learning_rate": 2.691957717391918e-05, + "loss": 0.096832275390625, + "step": 3096 + }, + { + "epoch": 0.4315474116909357, + "grad_norm": 0.4283432066440582, + "learning_rate": 2.6910731468896112e-05, + "loss": 0.0773630142211914, + "step": 3097 + }, + { + "epoch": 0.43168675538215007, + "grad_norm": 0.8923866152763367, + "learning_rate": 2.690188422846629e-05, + "loss": 0.10509109497070312, + "step": 3098 + }, + { + "epoch": 0.43182609907336444, + "grad_norm": 0.4889918267726898, + "learning_rate": 2.6893035454595363e-05, + "loss": 0.10801124572753906, + "step": 3099 + }, + { + "epoch": 0.4319654427645788, + "grad_norm": 0.6441252827644348, + "learning_rate": 2.688418514924932e-05, + "loss": 0.10915184020996094, + "step": 3100 + }, + { + "epoch": 0.4321047864557932, + "grad_norm": 0.4568740427494049, + "learning_rate": 2.6875333314394517e-05, + "loss": 0.09547996520996094, + "step": 3101 + }, + { + "epoch": 0.4322441301470076, + "grad_norm": 1.3106317520141602, + "learning_rate": 2.6866479951997616e-05, + "loss": 0.11777400970458984, + "step": 3102 + }, + { + "epoch": 0.43238347383822195, + "grad_norm": 0.7486590147018433, + "learning_rate": 2.685762506402563e-05, + "loss": 0.11155080795288086, + "step": 3103 + }, + { + "epoch": 0.43252281752943633, + "grad_norm": 0.8793076872825623, + "learning_rate": 2.6848768652445924e-05, + "loss": 0.12741386890411377, + "step": 3104 + }, + { + "epoch": 0.4326621612206507, + "grad_norm": 0.8365741968154907, + "learning_rate": 2.6839910719226173e-05, + "loss": 0.09282112121582031, + "step": 3105 + }, + { + "epoch": 0.43280150491186514, + "grad_norm": 0.5619701147079468, + "learning_rate": 2.683105126633443e-05, + "loss": 0.09993362426757812, + "step": 3106 + }, + { + "epoch": 0.4329408486030795, + "grad_norm": 0.3525545597076416, + "learning_rate": 2.6822190295739038e-05, + "loss": 0.08100128173828125, + "step": 3107 + }, + { + "epoch": 0.4330801922942939, + "grad_norm": 0.6947034001350403, + "learning_rate": 2.6813327809408723e-05, + "loss": 0.09427261352539062, + "step": 3108 + }, + { + "epoch": 0.4332195359855083, + "grad_norm": 0.7060475945472717, + "learning_rate": 2.680446380931252e-05, + "loss": 0.10474014282226562, + "step": 3109 + }, + { + "epoch": 0.43335887967672265, + "grad_norm": 0.7375085353851318, + "learning_rate": 2.6795598297419806e-05, + "loss": 0.08777999877929688, + "step": 3110 + }, + { + "epoch": 0.43349822336793703, + "grad_norm": 0.69596928358078, + "learning_rate": 2.6786731275700297e-05, + "loss": 0.09242057800292969, + "step": 3111 + }, + { + "epoch": 0.4336375670591514, + "grad_norm": 0.950225830078125, + "learning_rate": 2.6777862746124045e-05, + "loss": 0.088897705078125, + "step": 3112 + }, + { + "epoch": 0.4337769107503658, + "grad_norm": 0.9346666932106018, + "learning_rate": 2.6768992710661428e-05, + "loss": 0.11571407318115234, + "step": 3113 + }, + { + "epoch": 0.43391625444158016, + "grad_norm": 0.32204678654670715, + "learning_rate": 2.676012117128317e-05, + "loss": 0.06927680969238281, + "step": 3114 + }, + { + "epoch": 0.43405559813279454, + "grad_norm": 0.9486885666847229, + "learning_rate": 2.6751248129960323e-05, + "loss": 0.0976104736328125, + "step": 3115 + }, + { + "epoch": 0.4341949418240089, + "grad_norm": 0.8771523237228394, + "learning_rate": 2.6742373588664276e-05, + "loss": 0.09846687316894531, + "step": 3116 + }, + { + "epoch": 0.4343342855152233, + "grad_norm": 0.5330292582511902, + "learning_rate": 2.673349754936675e-05, + "loss": 0.07909870147705078, + "step": 3117 + }, + { + "epoch": 0.4344736292064377, + "grad_norm": 1.220893144607544, + "learning_rate": 2.6724620014039794e-05, + "loss": 0.10825157165527344, + "step": 3118 + }, + { + "epoch": 0.43461297289765205, + "grad_norm": 0.9182577729225159, + "learning_rate": 2.67157409846558e-05, + "loss": 0.10835647583007812, + "step": 3119 + }, + { + "epoch": 0.4347523165888664, + "grad_norm": 0.43931835889816284, + "learning_rate": 2.670686046318748e-05, + "loss": 0.09704017639160156, + "step": 3120 + }, + { + "epoch": 0.4348916602800808, + "grad_norm": 0.5912973880767822, + "learning_rate": 2.669797845160788e-05, + "loss": 0.09370803833007812, + "step": 3121 + }, + { + "epoch": 0.4350310039712952, + "grad_norm": 0.3448405861854553, + "learning_rate": 2.66890949518904e-05, + "loss": 0.08853530883789062, + "step": 3122 + }, + { + "epoch": 0.43517034766250956, + "grad_norm": 0.5537985563278198, + "learning_rate": 2.6680209966008727e-05, + "loss": 0.10497260093688965, + "step": 3123 + }, + { + "epoch": 0.43530969135372394, + "grad_norm": 0.7242429852485657, + "learning_rate": 2.6671323495936913e-05, + "loss": 0.11418533325195312, + "step": 3124 + }, + { + "epoch": 0.4354490350449383, + "grad_norm": 0.5532090067863464, + "learning_rate": 2.666243554364932e-05, + "loss": 0.1037750244140625, + "step": 3125 + }, + { + "epoch": 0.43558837873615275, + "grad_norm": 0.8382121920585632, + "learning_rate": 2.6653546111120664e-05, + "loss": 0.09795570373535156, + "step": 3126 + }, + { + "epoch": 0.4357277224273671, + "grad_norm": 0.9270533919334412, + "learning_rate": 2.664465520032596e-05, + "loss": 0.08738517761230469, + "step": 3127 + }, + { + "epoch": 0.4358670661185815, + "grad_norm": 0.543664813041687, + "learning_rate": 2.6635762813240574e-05, + "loss": 0.1306171417236328, + "step": 3128 + }, + { + "epoch": 0.4360064098097959, + "grad_norm": 0.3884970545768738, + "learning_rate": 2.662686895184019e-05, + "loss": 0.08842277526855469, + "step": 3129 + }, + { + "epoch": 0.43614575350101026, + "grad_norm": 0.5974640250205994, + "learning_rate": 2.6617973618100817e-05, + "loss": 0.07721900939941406, + "step": 3130 + }, + { + "epoch": 0.43628509719222464, + "grad_norm": 0.4646095335483551, + "learning_rate": 2.6609076813998795e-05, + "loss": 0.08674049377441406, + "step": 3131 + }, + { + "epoch": 0.436424440883439, + "grad_norm": 0.40435728430747986, + "learning_rate": 2.6600178541510792e-05, + "loss": 0.089630126953125, + "step": 3132 + }, + { + "epoch": 0.4365637845746534, + "grad_norm": 0.571613609790802, + "learning_rate": 2.65912788026138e-05, + "loss": 0.0951690673828125, + "step": 3133 + }, + { + "epoch": 0.43670312826586777, + "grad_norm": 0.33197516202926636, + "learning_rate": 2.6582377599285143e-05, + "loss": 0.08372688293457031, + "step": 3134 + }, + { + "epoch": 0.43684247195708215, + "grad_norm": 0.29005926847457886, + "learning_rate": 2.6573474933502466e-05, + "loss": 0.08405113220214844, + "step": 3135 + }, + { + "epoch": 0.4369818156482965, + "grad_norm": 0.3667845129966736, + "learning_rate": 2.6564570807243728e-05, + "loss": 0.07433700561523438, + "step": 3136 + }, + { + "epoch": 0.4371211593395109, + "grad_norm": 0.5476770401000977, + "learning_rate": 2.655566522248723e-05, + "loss": 0.08976984024047852, + "step": 3137 + }, + { + "epoch": 0.4372605030307253, + "grad_norm": 0.5674693584442139, + "learning_rate": 2.6546758181211593e-05, + "loss": 0.10907554626464844, + "step": 3138 + }, + { + "epoch": 0.43739984672193966, + "grad_norm": 0.5082097053527832, + "learning_rate": 2.653784968539574e-05, + "loss": 0.0919952392578125, + "step": 3139 + }, + { + "epoch": 0.43753919041315403, + "grad_norm": 0.7675885558128357, + "learning_rate": 2.652893973701896e-05, + "loss": 0.11803722381591797, + "step": 3140 + }, + { + "epoch": 0.4376785341043684, + "grad_norm": 0.4279431700706482, + "learning_rate": 2.652002833806082e-05, + "loss": 0.08559989929199219, + "step": 3141 + }, + { + "epoch": 0.4378178777955828, + "grad_norm": 0.5346590280532837, + "learning_rate": 2.6511115490501244e-05, + "loss": 0.0888833999633789, + "step": 3142 + }, + { + "epoch": 0.43795722148679717, + "grad_norm": 0.6689817309379578, + "learning_rate": 2.650220119632046e-05, + "loss": 0.09102821350097656, + "step": 3143 + }, + { + "epoch": 0.43809656517801154, + "grad_norm": 0.2536977231502533, + "learning_rate": 2.649328545749901e-05, + "loss": 0.06326580047607422, + "step": 3144 + }, + { + "epoch": 0.4382359088692259, + "grad_norm": 0.3289286494255066, + "learning_rate": 2.648436827601778e-05, + "loss": 0.09313011169433594, + "step": 3145 + }, + { + "epoch": 0.4383752525604403, + "grad_norm": 0.3906937837600708, + "learning_rate": 2.6475449653857964e-05, + "loss": 0.08383369445800781, + "step": 3146 + }, + { + "epoch": 0.43851459625165473, + "grad_norm": 0.5098361968994141, + "learning_rate": 2.6466529593001065e-05, + "loss": 0.09640312194824219, + "step": 3147 + }, + { + "epoch": 0.4386539399428691, + "grad_norm": 0.47138336300849915, + "learning_rate": 2.6457608095428925e-05, + "loss": 0.08377838134765625, + "step": 3148 + }, + { + "epoch": 0.4387932836340835, + "grad_norm": 0.4178135097026825, + "learning_rate": 2.64486851631237e-05, + "loss": 0.0731269121170044, + "step": 3149 + }, + { + "epoch": 0.43893262732529786, + "grad_norm": 0.45695051550865173, + "learning_rate": 2.6439760798067854e-05, + "loss": 0.08374214172363281, + "step": 3150 + }, + { + "epoch": 0.43907197101651224, + "grad_norm": 0.3975287973880768, + "learning_rate": 2.6430835002244183e-05, + "loss": 0.0998382568359375, + "step": 3151 + }, + { + "epoch": 0.4392113147077266, + "grad_norm": 0.4727335274219513, + "learning_rate": 2.6421907777635793e-05, + "loss": 0.08861923217773438, + "step": 3152 + }, + { + "epoch": 0.439350658398941, + "grad_norm": 0.7764759063720703, + "learning_rate": 2.641297912622611e-05, + "loss": 0.11166000366210938, + "step": 3153 + }, + { + "epoch": 0.4394900020901554, + "grad_norm": 0.618724524974823, + "learning_rate": 2.640404904999887e-05, + "loss": 0.107330322265625, + "step": 3154 + }, + { + "epoch": 0.43962934578136975, + "grad_norm": 0.6484370827674866, + "learning_rate": 2.639511755093814e-05, + "loss": 0.12183570861816406, + "step": 3155 + }, + { + "epoch": 0.43976868947258413, + "grad_norm": 0.756770133972168, + "learning_rate": 2.63861846310283e-05, + "loss": 0.10442924499511719, + "step": 3156 + }, + { + "epoch": 0.4399080331637985, + "grad_norm": 0.42841291427612305, + "learning_rate": 2.6377250292254023e-05, + "loss": 0.09116172790527344, + "step": 3157 + }, + { + "epoch": 0.4400473768550129, + "grad_norm": 0.4780941903591156, + "learning_rate": 2.6368314536600337e-05, + "loss": 0.10268878936767578, + "step": 3158 + }, + { + "epoch": 0.44018672054622726, + "grad_norm": 0.4483542740345001, + "learning_rate": 2.6359377366052546e-05, + "loss": 0.08422088623046875, + "step": 3159 + }, + { + "epoch": 0.44032606423744164, + "grad_norm": 0.594098687171936, + "learning_rate": 2.6350438782596293e-05, + "loss": 0.08356285095214844, + "step": 3160 + }, + { + "epoch": 0.440465407928656, + "grad_norm": 0.5443497896194458, + "learning_rate": 2.6341498788217527e-05, + "loss": 0.11600112915039062, + "step": 3161 + }, + { + "epoch": 0.4406047516198704, + "grad_norm": 0.44148004055023193, + "learning_rate": 2.6332557384902506e-05, + "loss": 0.09584808349609375, + "step": 3162 + }, + { + "epoch": 0.44074409531108477, + "grad_norm": 0.6426171660423279, + "learning_rate": 2.6323614574637812e-05, + "loss": 0.09318923950195312, + "step": 3163 + }, + { + "epoch": 0.44088343900229915, + "grad_norm": 0.5102519392967224, + "learning_rate": 2.6314670359410332e-05, + "loss": 0.09684562683105469, + "step": 3164 + }, + { + "epoch": 0.4410227826935135, + "grad_norm": 0.48343363404273987, + "learning_rate": 2.630572474120726e-05, + "loss": 0.09850311279296875, + "step": 3165 + }, + { + "epoch": 0.4411621263847279, + "grad_norm": 0.3599223792552948, + "learning_rate": 2.6296777722016108e-05, + "loss": 0.09379768371582031, + "step": 3166 + }, + { + "epoch": 0.44130147007594234, + "grad_norm": 1.0520060062408447, + "learning_rate": 2.6287829303824713e-05, + "loss": 0.12682723999023438, + "step": 3167 + }, + { + "epoch": 0.4414408137671567, + "grad_norm": 0.589067816734314, + "learning_rate": 2.6278879488621197e-05, + "loss": 0.11132049560546875, + "step": 3168 + }, + { + "epoch": 0.4415801574583711, + "grad_norm": 0.5577420592308044, + "learning_rate": 2.626992827839401e-05, + "loss": 0.08998298645019531, + "step": 3169 + }, + { + "epoch": 0.44171950114958547, + "grad_norm": 0.5797557234764099, + "learning_rate": 2.62609756751319e-05, + "loss": 0.09960436820983887, + "step": 3170 + }, + { + "epoch": 0.44185884484079985, + "grad_norm": 0.24352103471755981, + "learning_rate": 2.6252021680823937e-05, + "loss": 0.07390111684799194, + "step": 3171 + }, + { + "epoch": 0.4419981885320142, + "grad_norm": 0.3343530297279358, + "learning_rate": 2.6243066297459495e-05, + "loss": 0.08007049560546875, + "step": 3172 + }, + { + "epoch": 0.4421375322232286, + "grad_norm": 0.6590198874473572, + "learning_rate": 2.623410952702825e-05, + "loss": 0.1015939712524414, + "step": 3173 + }, + { + "epoch": 0.442276875914443, + "grad_norm": 0.36289915442466736, + "learning_rate": 2.62251513715202e-05, + "loss": 0.08614635467529297, + "step": 3174 + }, + { + "epoch": 0.44241621960565736, + "grad_norm": 0.3955261707305908, + "learning_rate": 2.6216191832925634e-05, + "loss": 0.10087203979492188, + "step": 3175 + }, + { + "epoch": 0.44255556329687173, + "grad_norm": 0.31133216619491577, + "learning_rate": 2.620723091323516e-05, + "loss": 0.07803916931152344, + "step": 3176 + }, + { + "epoch": 0.4426949069880861, + "grad_norm": 0.5812323689460754, + "learning_rate": 2.6198268614439694e-05, + "loss": 0.09415245056152344, + "step": 3177 + }, + { + "epoch": 0.4428342506793005, + "grad_norm": 0.4936281740665436, + "learning_rate": 2.618930493853045e-05, + "loss": 0.08430290222167969, + "step": 3178 + }, + { + "epoch": 0.44297359437051487, + "grad_norm": 0.5252881050109863, + "learning_rate": 2.618033988749895e-05, + "loss": 0.10199165344238281, + "step": 3179 + }, + { + "epoch": 0.44311293806172924, + "grad_norm": 0.6684341430664062, + "learning_rate": 2.6171373463337028e-05, + "loss": 0.10571670532226562, + "step": 3180 + }, + { + "epoch": 0.4432522817529436, + "grad_norm": 0.40588587522506714, + "learning_rate": 2.616240566803682e-05, + "loss": 0.076324462890625, + "step": 3181 + }, + { + "epoch": 0.443391625444158, + "grad_norm": 1.4893497228622437, + "learning_rate": 2.6153436503590765e-05, + "loss": 0.11571979522705078, + "step": 3182 + }, + { + "epoch": 0.4435309691353724, + "grad_norm": 0.5820866823196411, + "learning_rate": 2.6144465971991596e-05, + "loss": 0.10046005249023438, + "step": 3183 + }, + { + "epoch": 0.44367031282658675, + "grad_norm": 0.6250027418136597, + "learning_rate": 2.6135494075232366e-05, + "loss": 0.1060791015625, + "step": 3184 + }, + { + "epoch": 0.44380965651780113, + "grad_norm": 0.6204400062561035, + "learning_rate": 2.612652081530644e-05, + "loss": 0.11275863647460938, + "step": 3185 + }, + { + "epoch": 0.4439490002090155, + "grad_norm": 0.5420244932174683, + "learning_rate": 2.6117546194207454e-05, + "loss": 0.10990333557128906, + "step": 3186 + }, + { + "epoch": 0.44408834390022994, + "grad_norm": 0.44902363419532776, + "learning_rate": 2.6108570213929366e-05, + "loss": 0.07988739013671875, + "step": 3187 + }, + { + "epoch": 0.4442276875914443, + "grad_norm": 0.3468252122402191, + "learning_rate": 2.609959287646645e-05, + "loss": 0.08945465087890625, + "step": 3188 + }, + { + "epoch": 0.4443670312826587, + "grad_norm": 0.7524549961090088, + "learning_rate": 2.6090614183813236e-05, + "loss": 0.09752082824707031, + "step": 3189 + }, + { + "epoch": 0.4445063749738731, + "grad_norm": 0.6745143532752991, + "learning_rate": 2.6081634137964615e-05, + "loss": 0.08884048461914062, + "step": 3190 + }, + { + "epoch": 0.44464571866508745, + "grad_norm": 0.5213266015052795, + "learning_rate": 2.607265274091573e-05, + "loss": 0.09619522094726562, + "step": 3191 + }, + { + "epoch": 0.44478506235630183, + "grad_norm": 0.8775123357772827, + "learning_rate": 2.6063669994662043e-05, + "loss": 0.13463211059570312, + "step": 3192 + }, + { + "epoch": 0.4449244060475162, + "grad_norm": 0.37777620553970337, + "learning_rate": 2.605468590119932e-05, + "loss": 0.07517147064208984, + "step": 3193 + }, + { + "epoch": 0.4450637497387306, + "grad_norm": 0.46200868487358093, + "learning_rate": 2.6045700462523625e-05, + "loss": 0.10442256927490234, + "step": 3194 + }, + { + "epoch": 0.44520309342994496, + "grad_norm": 0.3402801752090454, + "learning_rate": 2.6036713680631312e-05, + "loss": 0.088104248046875, + "step": 3195 + }, + { + "epoch": 0.44534243712115934, + "grad_norm": 0.5169655680656433, + "learning_rate": 2.6027725557519037e-05, + "loss": 0.09799003601074219, + "step": 3196 + }, + { + "epoch": 0.4454817808123737, + "grad_norm": 0.5235153436660767, + "learning_rate": 2.601873609518376e-05, + "loss": 0.08462142944335938, + "step": 3197 + }, + { + "epoch": 0.4456211245035881, + "grad_norm": 0.49239978194236755, + "learning_rate": 2.600974529562273e-05, + "loss": 0.10195159912109375, + "step": 3198 + }, + { + "epoch": 0.4457604681948025, + "grad_norm": 0.31724223494529724, + "learning_rate": 2.6000753160833506e-05, + "loss": 0.07742500305175781, + "step": 3199 + }, + { + "epoch": 0.44589981188601685, + "grad_norm": 0.39351484179496765, + "learning_rate": 2.599175969281392e-05, + "loss": 0.08991241455078125, + "step": 3200 + }, + { + "epoch": 0.44603915557723123, + "grad_norm": 0.7242065072059631, + "learning_rate": 2.5982764893562137e-05, + "loss": 0.09882354736328125, + "step": 3201 + }, + { + "epoch": 0.4461784992684456, + "grad_norm": 0.7772037386894226, + "learning_rate": 2.5973768765076578e-05, + "loss": 0.09413337707519531, + "step": 3202 + }, + { + "epoch": 0.44631784295966, + "grad_norm": 0.6306560039520264, + "learning_rate": 2.5964771309355978e-05, + "loss": 0.1153411865234375, + "step": 3203 + }, + { + "epoch": 0.44645718665087436, + "grad_norm": 0.5283454656600952, + "learning_rate": 2.595577252839938e-05, + "loss": 0.07585716247558594, + "step": 3204 + }, + { + "epoch": 0.44659653034208874, + "grad_norm": 0.74545818567276, + "learning_rate": 2.594677242420609e-05, + "loss": 0.10965728759765625, + "step": 3205 + }, + { + "epoch": 0.4467358740333031, + "grad_norm": 0.5258922576904297, + "learning_rate": 2.593777099877574e-05, + "loss": 0.09752273559570312, + "step": 3206 + }, + { + "epoch": 0.44687521772451755, + "grad_norm": 0.5895053148269653, + "learning_rate": 2.592876825410823e-05, + "loss": 0.09647560119628906, + "step": 3207 + }, + { + "epoch": 0.4470145614157319, + "grad_norm": 0.5870022773742676, + "learning_rate": 2.5919764192203777e-05, + "loss": 0.08635330200195312, + "step": 3208 + }, + { + "epoch": 0.4471539051069463, + "grad_norm": 0.35497647523880005, + "learning_rate": 2.591075881506287e-05, + "loss": 0.09655570983886719, + "step": 3209 + }, + { + "epoch": 0.4472932487981607, + "grad_norm": 0.26941120624542236, + "learning_rate": 2.5901752124686294e-05, + "loss": 0.07969093322753906, + "step": 3210 + }, + { + "epoch": 0.44743259248937506, + "grad_norm": 0.39046117663383484, + "learning_rate": 2.5892744123075138e-05, + "loss": 0.08931541442871094, + "step": 3211 + }, + { + "epoch": 0.44757193618058944, + "grad_norm": 0.5818946957588196, + "learning_rate": 2.5883734812230773e-05, + "loss": 0.10043525695800781, + "step": 3212 + }, + { + "epoch": 0.4477112798718038, + "grad_norm": 0.7138299942016602, + "learning_rate": 2.587472419415486e-05, + "loss": 0.08931922912597656, + "step": 3213 + }, + { + "epoch": 0.4478506235630182, + "grad_norm": 0.49413320422172546, + "learning_rate": 2.5865712270849354e-05, + "loss": 0.10782814025878906, + "step": 3214 + }, + { + "epoch": 0.44798996725423257, + "grad_norm": 0.3767920732498169, + "learning_rate": 2.5856699044316496e-05, + "loss": 0.083343505859375, + "step": 3215 + }, + { + "epoch": 0.44812931094544695, + "grad_norm": 0.6204838156700134, + "learning_rate": 2.5847684516558817e-05, + "loss": 0.1017913818359375, + "step": 3216 + }, + { + "epoch": 0.4482686546366613, + "grad_norm": 0.4761483371257782, + "learning_rate": 2.583866868957915e-05, + "loss": 0.0932464599609375, + "step": 3217 + }, + { + "epoch": 0.4484079983278757, + "grad_norm": 0.7103255987167358, + "learning_rate": 2.5829651565380598e-05, + "loss": 0.1274547576904297, + "step": 3218 + }, + { + "epoch": 0.4485473420190901, + "grad_norm": 0.5061867833137512, + "learning_rate": 2.5820633145966564e-05, + "loss": 0.1288299560546875, + "step": 3219 + }, + { + "epoch": 0.44868668571030446, + "grad_norm": 0.47703713178634644, + "learning_rate": 2.581161343334073e-05, + "loss": 0.11072158813476562, + "step": 3220 + }, + { + "epoch": 0.44882602940151883, + "grad_norm": 0.7783265709877014, + "learning_rate": 2.5802592429507067e-05, + "loss": 0.12170791625976562, + "step": 3221 + }, + { + "epoch": 0.4489653730927332, + "grad_norm": 0.6543957591056824, + "learning_rate": 2.579357013646985e-05, + "loss": 0.09692955017089844, + "step": 3222 + }, + { + "epoch": 0.4491047167839476, + "grad_norm": 0.49769967794418335, + "learning_rate": 2.578454655623361e-05, + "loss": 0.10016441345214844, + "step": 3223 + }, + { + "epoch": 0.44924406047516197, + "grad_norm": 0.5144950151443481, + "learning_rate": 2.5775521690803197e-05, + "loss": 0.09799385070800781, + "step": 3224 + }, + { + "epoch": 0.44938340416637634, + "grad_norm": 0.39365506172180176, + "learning_rate": 2.5766495542183717e-05, + "loss": 0.07912778854370117, + "step": 3225 + }, + { + "epoch": 0.4495227478575907, + "grad_norm": 0.6424152255058289, + "learning_rate": 2.575746811238058e-05, + "loss": 0.11851119995117188, + "step": 3226 + }, + { + "epoch": 0.44966209154880515, + "grad_norm": 0.4549945890903473, + "learning_rate": 2.574843940339947e-05, + "loss": 0.08922767639160156, + "step": 3227 + }, + { + "epoch": 0.44980143524001953, + "grad_norm": 0.36948445439338684, + "learning_rate": 2.5739409417246367e-05, + "loss": 0.10172080993652344, + "step": 3228 + }, + { + "epoch": 0.4499407789312339, + "grad_norm": 0.5883519053459167, + "learning_rate": 2.5730378155927524e-05, + "loss": 0.11269950866699219, + "step": 3229 + }, + { + "epoch": 0.4500801226224483, + "grad_norm": 0.535325288772583, + "learning_rate": 2.5721345621449483e-05, + "loss": 0.10906362533569336, + "step": 3230 + }, + { + "epoch": 0.45021946631366266, + "grad_norm": 0.3650776445865631, + "learning_rate": 2.5712311815819063e-05, + "loss": 0.08469009399414062, + "step": 3231 + }, + { + "epoch": 0.45035881000487704, + "grad_norm": 0.43975576758384705, + "learning_rate": 2.570327674104337e-05, + "loss": 0.09416389465332031, + "step": 3232 + }, + { + "epoch": 0.4504981536960914, + "grad_norm": 0.6965985894203186, + "learning_rate": 2.56942403991298e-05, + "loss": 0.11461830139160156, + "step": 3233 + }, + { + "epoch": 0.4506374973873058, + "grad_norm": 0.4539940357208252, + "learning_rate": 2.568520279208601e-05, + "loss": 0.08409881591796875, + "step": 3234 + }, + { + "epoch": 0.4507768410785202, + "grad_norm": 0.5852789878845215, + "learning_rate": 2.5676163921919955e-05, + "loss": 0.09616279602050781, + "step": 3235 + }, + { + "epoch": 0.45091618476973455, + "grad_norm": 0.8538361191749573, + "learning_rate": 2.566712379063987e-05, + "loss": 0.10514259338378906, + "step": 3236 + }, + { + "epoch": 0.45105552846094893, + "grad_norm": 0.436420738697052, + "learning_rate": 2.565808240025425e-05, + "loss": 0.11569404602050781, + "step": 3237 + }, + { + "epoch": 0.4511948721521633, + "grad_norm": 0.28171634674072266, + "learning_rate": 2.5649039752771914e-05, + "loss": 0.07062053680419922, + "step": 3238 + }, + { + "epoch": 0.4513342158433777, + "grad_norm": 0.37141889333724976, + "learning_rate": 2.5639995850201902e-05, + "loss": 0.09654426574707031, + "step": 3239 + }, + { + "epoch": 0.45147355953459206, + "grad_norm": 0.4529375731945038, + "learning_rate": 2.5630950694553582e-05, + "loss": 0.08083534240722656, + "step": 3240 + }, + { + "epoch": 0.45161290322580644, + "grad_norm": 0.6521430611610413, + "learning_rate": 2.5621904287836568e-05, + "loss": 0.11318588256835938, + "step": 3241 + }, + { + "epoch": 0.4517522469170208, + "grad_norm": 0.4172095060348511, + "learning_rate": 2.5612856632060776e-05, + "loss": 0.08664417266845703, + "step": 3242 + }, + { + "epoch": 0.4518915906082352, + "grad_norm": 0.8130524754524231, + "learning_rate": 2.5603807729236387e-05, + "loss": 0.10521507263183594, + "step": 3243 + }, + { + "epoch": 0.45203093429944957, + "grad_norm": 0.43711304664611816, + "learning_rate": 2.559475758137385e-05, + "loss": 0.07695293426513672, + "step": 3244 + }, + { + "epoch": 0.45217027799066395, + "grad_norm": 0.3664785325527191, + "learning_rate": 2.5585706190483914e-05, + "loss": 0.09327316284179688, + "step": 3245 + }, + { + "epoch": 0.4523096216818783, + "grad_norm": 0.5167361497879028, + "learning_rate": 2.5576653558577588e-05, + "loss": 0.1008758544921875, + "step": 3246 + }, + { + "epoch": 0.45244896537309276, + "grad_norm": 0.4255105257034302, + "learning_rate": 2.556759968766615e-05, + "loss": 0.09178543090820312, + "step": 3247 + }, + { + "epoch": 0.45258830906430714, + "grad_norm": 0.33253180980682373, + "learning_rate": 2.5558544579761177e-05, + "loss": 0.09173774719238281, + "step": 3248 + }, + { + "epoch": 0.4527276527555215, + "grad_norm": 0.4915449917316437, + "learning_rate": 2.5549488236874506e-05, + "loss": 0.08561038970947266, + "step": 3249 + }, + { + "epoch": 0.4528669964467359, + "grad_norm": 0.6980950236320496, + "learning_rate": 2.554043066101824e-05, + "loss": 0.10692214965820312, + "step": 3250 + }, + { + "epoch": 0.45300634013795027, + "grad_norm": 0.6341108679771423, + "learning_rate": 2.5531371854204773e-05, + "loss": 0.08897590637207031, + "step": 3251 + }, + { + "epoch": 0.45314568382916465, + "grad_norm": 0.3671814203262329, + "learning_rate": 2.5522311818446762e-05, + "loss": 0.09053230285644531, + "step": 3252 + }, + { + "epoch": 0.453285027520379, + "grad_norm": 0.5929312109947205, + "learning_rate": 2.5513250555757143e-05, + "loss": 0.10123252868652344, + "step": 3253 + }, + { + "epoch": 0.4534243712115934, + "grad_norm": 0.5270496606826782, + "learning_rate": 2.5504188068149126e-05, + "loss": 0.1271514892578125, + "step": 3254 + }, + { + "epoch": 0.4535637149028078, + "grad_norm": 0.5053049325942993, + "learning_rate": 2.5495124357636174e-05, + "loss": 0.08791351318359375, + "step": 3255 + }, + { + "epoch": 0.45370305859402216, + "grad_norm": 0.46914345026016235, + "learning_rate": 2.5486059426232052e-05, + "loss": 0.08690643310546875, + "step": 3256 + }, + { + "epoch": 0.45384240228523653, + "grad_norm": 0.42011186480522156, + "learning_rate": 2.547699327595077e-05, + "loss": 0.1128072738647461, + "step": 3257 + }, + { + "epoch": 0.4539817459764509, + "grad_norm": 0.892158567905426, + "learning_rate": 2.5467925908806622e-05, + "loss": 0.10436630249023438, + "step": 3258 + }, + { + "epoch": 0.4541210896676653, + "grad_norm": 0.47208866477012634, + "learning_rate": 2.5458857326814178e-05, + "loss": 0.07756710052490234, + "step": 3259 + }, + { + "epoch": 0.45426043335887967, + "grad_norm": 0.387165904045105, + "learning_rate": 2.5449787531988258e-05, + "loss": 0.09401893615722656, + "step": 3260 + }, + { + "epoch": 0.45439977705009404, + "grad_norm": 0.4169423580169678, + "learning_rate": 2.5440716526343972e-05, + "loss": 0.08533382415771484, + "step": 3261 + }, + { + "epoch": 0.4545391207413084, + "grad_norm": 0.8554683923721313, + "learning_rate": 2.543164431189669e-05, + "loss": 0.10762977600097656, + "step": 3262 + }, + { + "epoch": 0.4546784644325228, + "grad_norm": 0.40046533942222595, + "learning_rate": 2.5422570890662046e-05, + "loss": 0.09861946105957031, + "step": 3263 + }, + { + "epoch": 0.4548178081237372, + "grad_norm": 0.5320014357566833, + "learning_rate": 2.541349626465595e-05, + "loss": 0.118682861328125, + "step": 3264 + }, + { + "epoch": 0.45495715181495155, + "grad_norm": 0.4893737733364105, + "learning_rate": 2.5404420435894578e-05, + "loss": 0.124359130859375, + "step": 3265 + }, + { + "epoch": 0.45509649550616593, + "grad_norm": 0.3580836057662964, + "learning_rate": 2.539534340639436e-05, + "loss": 0.08568739891052246, + "step": 3266 + }, + { + "epoch": 0.45523583919738037, + "grad_norm": 0.5960180163383484, + "learning_rate": 2.538626517817203e-05, + "loss": 0.09133434295654297, + "step": 3267 + }, + { + "epoch": 0.45537518288859474, + "grad_norm": 0.5862428545951843, + "learning_rate": 2.5377185753244537e-05, + "loss": 0.12951087951660156, + "step": 3268 + }, + { + "epoch": 0.4555145265798091, + "grad_norm": 0.5752269625663757, + "learning_rate": 2.5368105133629143e-05, + "loss": 0.11616134643554688, + "step": 3269 + }, + { + "epoch": 0.4556538702710235, + "grad_norm": 0.3351115584373474, + "learning_rate": 2.5359023321343336e-05, + "loss": 0.08524703979492188, + "step": 3270 + }, + { + "epoch": 0.4557932139622379, + "grad_norm": 0.45565372705459595, + "learning_rate": 2.5349940318404895e-05, + "loss": 0.0809173583984375, + "step": 3271 + }, + { + "epoch": 0.45593255765345225, + "grad_norm": 0.6848745346069336, + "learning_rate": 2.5340856126831864e-05, + "loss": 0.11567497253417969, + "step": 3272 + }, + { + "epoch": 0.45607190134466663, + "grad_norm": 0.37738728523254395, + "learning_rate": 2.5331770748642527e-05, + "loss": 0.09852790832519531, + "step": 3273 + }, + { + "epoch": 0.456211245035881, + "grad_norm": 0.5716538429260254, + "learning_rate": 2.5322684185855458e-05, + "loss": 0.12612342834472656, + "step": 3274 + }, + { + "epoch": 0.4563505887270954, + "grad_norm": 0.40100690722465515, + "learning_rate": 2.5313596440489483e-05, + "loss": 0.10254096984863281, + "step": 3275 + }, + { + "epoch": 0.45648993241830976, + "grad_norm": 0.4428718090057373, + "learning_rate": 2.530450751456369e-05, + "loss": 0.0819549560546875, + "step": 3276 + }, + { + "epoch": 0.45662927610952414, + "grad_norm": 0.3490123450756073, + "learning_rate": 2.5295417410097437e-05, + "loss": 0.08123111724853516, + "step": 3277 + }, + { + "epoch": 0.4567686198007385, + "grad_norm": 0.48249733448028564, + "learning_rate": 2.5286326129110325e-05, + "loss": 0.1125631332397461, + "step": 3278 + }, + { + "epoch": 0.4569079634919529, + "grad_norm": 0.6470041871070862, + "learning_rate": 2.5277233673622236e-05, + "loss": 0.10413360595703125, + "step": 3279 + }, + { + "epoch": 0.4570473071831673, + "grad_norm": 0.5036666989326477, + "learning_rate": 2.526814004565331e-05, + "loss": 0.09638595581054688, + "step": 3280 + }, + { + "epoch": 0.45718665087438165, + "grad_norm": 0.3714774250984192, + "learning_rate": 2.5259045247223933e-05, + "loss": 0.08912086486816406, + "step": 3281 + }, + { + "epoch": 0.45732599456559603, + "grad_norm": 0.8502724766731262, + "learning_rate": 2.524994928035477e-05, + "loss": 0.10067176818847656, + "step": 3282 + }, + { + "epoch": 0.4574653382568104, + "grad_norm": 0.9478815793991089, + "learning_rate": 2.5240852147066742e-05, + "loss": 0.11043071746826172, + "step": 3283 + }, + { + "epoch": 0.4576046819480248, + "grad_norm": 0.5202801823616028, + "learning_rate": 2.5231753849381013e-05, + "loss": 0.12027359008789062, + "step": 3284 + }, + { + "epoch": 0.45774402563923916, + "grad_norm": 0.5965293049812317, + "learning_rate": 2.5222654389319025e-05, + "loss": 0.11305999755859375, + "step": 3285 + }, + { + "epoch": 0.45788336933045354, + "grad_norm": 0.43779319524765015, + "learning_rate": 2.5213553768902466e-05, + "loss": 0.09492301940917969, + "step": 3286 + }, + { + "epoch": 0.45802271302166797, + "grad_norm": 0.3423020541667938, + "learning_rate": 2.520445199015328e-05, + "loss": 0.08784103393554688, + "step": 3287 + }, + { + "epoch": 0.45816205671288235, + "grad_norm": 0.377289742231369, + "learning_rate": 2.5195349055093693e-05, + "loss": 0.07929801940917969, + "step": 3288 + }, + { + "epoch": 0.4583014004040967, + "grad_norm": 0.6323826313018799, + "learning_rate": 2.5186244965746146e-05, + "loss": 0.07410812377929688, + "step": 3289 + }, + { + "epoch": 0.4584407440953111, + "grad_norm": 0.27814796566963196, + "learning_rate": 2.5177139724133376e-05, + "loss": 0.06997108459472656, + "step": 3290 + }, + { + "epoch": 0.4585800877865255, + "grad_norm": 0.7100175023078918, + "learning_rate": 2.5168033332278358e-05, + "loss": 0.10459232330322266, + "step": 3291 + }, + { + "epoch": 0.45871943147773986, + "grad_norm": 0.4747393727302551, + "learning_rate": 2.5158925792204317e-05, + "loss": 0.0877995491027832, + "step": 3292 + }, + { + "epoch": 0.45885877516895424, + "grad_norm": 0.4369846284389496, + "learning_rate": 2.514981710593475e-05, + "loss": 0.09392166137695312, + "step": 3293 + }, + { + "epoch": 0.4589981188601686, + "grad_norm": 0.778135359287262, + "learning_rate": 2.5140707275493394e-05, + "loss": 0.11349868774414062, + "step": 3294 + }, + { + "epoch": 0.459137462551383, + "grad_norm": 0.35562437772750854, + "learning_rate": 2.5131596302904245e-05, + "loss": 0.08746337890625, + "step": 3295 + }, + { + "epoch": 0.45927680624259737, + "grad_norm": 0.47270190715789795, + "learning_rate": 2.5122484190191553e-05, + "loss": 0.08882713317871094, + "step": 3296 + }, + { + "epoch": 0.45941614993381175, + "grad_norm": 0.40804916620254517, + "learning_rate": 2.511337093937982e-05, + "loss": 0.09103775024414062, + "step": 3297 + }, + { + "epoch": 0.4595554936250261, + "grad_norm": 0.34944793581962585, + "learning_rate": 2.510425655249381e-05, + "loss": 0.08496952056884766, + "step": 3298 + }, + { + "epoch": 0.4596948373162405, + "grad_norm": 0.4089270532131195, + "learning_rate": 2.509514103155852e-05, + "loss": 0.09459781646728516, + "step": 3299 + }, + { + "epoch": 0.4598341810074549, + "grad_norm": 0.4143008589744568, + "learning_rate": 2.5086024378599217e-05, + "loss": 0.09789466857910156, + "step": 3300 + }, + { + "epoch": 0.45997352469866926, + "grad_norm": 0.3933107852935791, + "learning_rate": 2.5076906595641422e-05, + "loss": 0.08463096618652344, + "step": 3301 + }, + { + "epoch": 0.46011286838988363, + "grad_norm": 0.741692066192627, + "learning_rate": 2.5067787684710886e-05, + "loss": 0.10690689086914062, + "step": 3302 + }, + { + "epoch": 0.460252212081098, + "grad_norm": 0.36118465662002563, + "learning_rate": 2.5058667647833615e-05, + "loss": 0.08923721313476562, + "step": 3303 + }, + { + "epoch": 0.4603915557723124, + "grad_norm": 0.39506492018699646, + "learning_rate": 2.50495464870359e-05, + "loss": 0.07529640197753906, + "step": 3304 + }, + { + "epoch": 0.46053089946352677, + "grad_norm": 0.3650793731212616, + "learning_rate": 2.5040424204344226e-05, + "loss": 0.08678245544433594, + "step": 3305 + }, + { + "epoch": 0.46067024315474114, + "grad_norm": 0.46479910612106323, + "learning_rate": 2.5031300801785374e-05, + "loss": 0.09381699562072754, + "step": 3306 + }, + { + "epoch": 0.4608095868459556, + "grad_norm": 0.32399627566337585, + "learning_rate": 2.502217628138635e-05, + "loss": 0.07448387145996094, + "step": 3307 + }, + { + "epoch": 0.46094893053716995, + "grad_norm": 0.5059199333190918, + "learning_rate": 2.5013050645174414e-05, + "loss": 0.09250640869140625, + "step": 3308 + }, + { + "epoch": 0.46108827422838433, + "grad_norm": 0.4421602189540863, + "learning_rate": 2.5003923895177073e-05, + "loss": 0.10420608520507812, + "step": 3309 + }, + { + "epoch": 0.4612276179195987, + "grad_norm": 0.8493544459342957, + "learning_rate": 2.499479603342209e-05, + "loss": 0.11213302612304688, + "step": 3310 + }, + { + "epoch": 0.4613669616108131, + "grad_norm": 0.3226427137851715, + "learning_rate": 2.4985667061937458e-05, + "loss": 0.08025360107421875, + "step": 3311 + }, + { + "epoch": 0.46150630530202746, + "grad_norm": 0.5472626090049744, + "learning_rate": 2.4976536982751426e-05, + "loss": 0.11160659790039062, + "step": 3312 + }, + { + "epoch": 0.46164564899324184, + "grad_norm": 0.42966243624687195, + "learning_rate": 2.4967405797892498e-05, + "loss": 0.09105300903320312, + "step": 3313 + }, + { + "epoch": 0.4617849926844562, + "grad_norm": 0.4433431625366211, + "learning_rate": 2.4958273509389406e-05, + "loss": 0.10024642944335938, + "step": 3314 + }, + { + "epoch": 0.4619243363756706, + "grad_norm": 0.5238568186759949, + "learning_rate": 2.4949140119271144e-05, + "loss": 0.08775520324707031, + "step": 3315 + }, + { + "epoch": 0.462063680066885, + "grad_norm": 0.5553523302078247, + "learning_rate": 2.4940005629566927e-05, + "loss": 0.09696197509765625, + "step": 3316 + }, + { + "epoch": 0.46220302375809935, + "grad_norm": 0.4058004319667816, + "learning_rate": 2.4930870042306253e-05, + "loss": 0.08838176727294922, + "step": 3317 + }, + { + "epoch": 0.46234236744931373, + "grad_norm": 0.46710890531539917, + "learning_rate": 2.4921733359518824e-05, + "loss": 0.10240554809570312, + "step": 3318 + }, + { + "epoch": 0.4624817111405281, + "grad_norm": 0.6134762763977051, + "learning_rate": 2.4912595583234608e-05, + "loss": 0.09941482543945312, + "step": 3319 + }, + { + "epoch": 0.4626210548317425, + "grad_norm": 0.3779071867465973, + "learning_rate": 2.4903456715483817e-05, + "loss": 0.09630393981933594, + "step": 3320 + }, + { + "epoch": 0.46276039852295686, + "grad_norm": 0.44013094902038574, + "learning_rate": 2.489431675829688e-05, + "loss": 0.07533454895019531, + "step": 3321 + }, + { + "epoch": 0.46289974221417124, + "grad_norm": 0.4931105673313141, + "learning_rate": 2.488517571370451e-05, + "loss": 0.08557319641113281, + "step": 3322 + }, + { + "epoch": 0.4630390859053856, + "grad_norm": 0.6240761876106262, + "learning_rate": 2.487603358373762e-05, + "loss": 0.1161031723022461, + "step": 3323 + }, + { + "epoch": 0.4631784295966, + "grad_norm": 0.4915570318698883, + "learning_rate": 2.486689037042739e-05, + "loss": 0.09288644790649414, + "step": 3324 + }, + { + "epoch": 0.46331777328781437, + "grad_norm": 0.37695369124412537, + "learning_rate": 2.485774607580523e-05, + "loss": 0.0640249252319336, + "step": 3325 + }, + { + "epoch": 0.46345711697902875, + "grad_norm": 0.7268368005752563, + "learning_rate": 2.4848600701902804e-05, + "loss": 0.12492942810058594, + "step": 3326 + }, + { + "epoch": 0.4635964606702432, + "grad_norm": 0.3536728024482727, + "learning_rate": 2.483945425075199e-05, + "loss": 0.09184551239013672, + "step": 3327 + }, + { + "epoch": 0.46373580436145756, + "grad_norm": 0.44617342948913574, + "learning_rate": 2.4830306724384933e-05, + "loss": 0.09029006958007812, + "step": 3328 + }, + { + "epoch": 0.46387514805267194, + "grad_norm": 0.41971394419670105, + "learning_rate": 2.4821158124834e-05, + "loss": 0.09724235534667969, + "step": 3329 + }, + { + "epoch": 0.4640144917438863, + "grad_norm": 0.3674505352973938, + "learning_rate": 2.4812008454131796e-05, + "loss": 0.08580493927001953, + "step": 3330 + }, + { + "epoch": 0.4641538354351007, + "grad_norm": 0.37196874618530273, + "learning_rate": 2.4802857714311177e-05, + "loss": 0.07110214233398438, + "step": 3331 + }, + { + "epoch": 0.46429317912631507, + "grad_norm": 0.5030943751335144, + "learning_rate": 2.479370590740522e-05, + "loss": 0.103973388671875, + "step": 3332 + }, + { + "epoch": 0.46443252281752945, + "grad_norm": 0.4794493317604065, + "learning_rate": 2.478455303544726e-05, + "loss": 0.08211326599121094, + "step": 3333 + }, + { + "epoch": 0.4645718665087438, + "grad_norm": 0.3043074905872345, + "learning_rate": 2.4775399100470837e-05, + "loss": 0.08359146118164062, + "step": 3334 + }, + { + "epoch": 0.4647112101999582, + "grad_norm": 0.46127453446388245, + "learning_rate": 2.4766244104509775e-05, + "loss": 0.09524059295654297, + "step": 3335 + }, + { + "epoch": 0.4648505538911726, + "grad_norm": 0.578477144241333, + "learning_rate": 2.475708804959808e-05, + "loss": 0.10753917694091797, + "step": 3336 + }, + { + "epoch": 0.46498989758238696, + "grad_norm": 0.5764096975326538, + "learning_rate": 2.474793093777002e-05, + "loss": 0.10181045532226562, + "step": 3337 + }, + { + "epoch": 0.46512924127360133, + "grad_norm": 0.591170608997345, + "learning_rate": 2.473877277106011e-05, + "loss": 0.10773897171020508, + "step": 3338 + }, + { + "epoch": 0.4652685849648157, + "grad_norm": 0.36711546778678894, + "learning_rate": 2.4729613551503074e-05, + "loss": 0.086456298828125, + "step": 3339 + }, + { + "epoch": 0.4654079286560301, + "grad_norm": 0.6173598170280457, + "learning_rate": 2.472045328113389e-05, + "loss": 0.09955978393554688, + "step": 3340 + }, + { + "epoch": 0.46554727234724447, + "grad_norm": 0.6200174689292908, + "learning_rate": 2.4711291961987756e-05, + "loss": 0.10923385620117188, + "step": 3341 + }, + { + "epoch": 0.46568661603845884, + "grad_norm": 0.5437065958976746, + "learning_rate": 2.470212959610011e-05, + "loss": 0.09379386901855469, + "step": 3342 + }, + { + "epoch": 0.4658259597296732, + "grad_norm": 0.40051648020744324, + "learning_rate": 2.4692966185506615e-05, + "loss": 0.09645462036132812, + "step": 3343 + }, + { + "epoch": 0.4659653034208876, + "grad_norm": 0.7002670764923096, + "learning_rate": 2.468380173224318e-05, + "loss": 0.11386585235595703, + "step": 3344 + }, + { + "epoch": 0.466104647112102, + "grad_norm": 1.0244942903518677, + "learning_rate": 2.467463623834593e-05, + "loss": 0.09461212158203125, + "step": 3345 + }, + { + "epoch": 0.46624399080331635, + "grad_norm": 0.7305197715759277, + "learning_rate": 2.4665469705851232e-05, + "loss": 0.10911178588867188, + "step": 3346 + }, + { + "epoch": 0.46638333449453073, + "grad_norm": 0.28653931617736816, + "learning_rate": 2.465630213679568e-05, + "loss": 0.08220100402832031, + "step": 3347 + }, + { + "epoch": 0.46652267818574517, + "grad_norm": 0.6199829578399658, + "learning_rate": 2.4647133533216097e-05, + "loss": 0.10715675354003906, + "step": 3348 + }, + { + "epoch": 0.46666202187695954, + "grad_norm": 0.371624231338501, + "learning_rate": 2.4637963897149545e-05, + "loss": 0.07925033569335938, + "step": 3349 + }, + { + "epoch": 0.4668013655681739, + "grad_norm": 0.4466816186904907, + "learning_rate": 2.4628793230633293e-05, + "loss": 0.09279441833496094, + "step": 3350 + }, + { + "epoch": 0.4669407092593883, + "grad_norm": 0.5631693601608276, + "learning_rate": 2.461962153570487e-05, + "loss": 0.08852624893188477, + "step": 3351 + }, + { + "epoch": 0.4670800529506027, + "grad_norm": 0.5409205555915833, + "learning_rate": 2.4610448814402008e-05, + "loss": 0.10539436340332031, + "step": 3352 + }, + { + "epoch": 0.46721939664181705, + "grad_norm": 0.33326098322868347, + "learning_rate": 2.4601275068762673e-05, + "loss": 0.09109306335449219, + "step": 3353 + }, + { + "epoch": 0.46735874033303143, + "grad_norm": 0.4774569869041443, + "learning_rate": 2.459210030082507e-05, + "loss": 0.09404373168945312, + "step": 3354 + }, + { + "epoch": 0.4674980840242458, + "grad_norm": 0.27693283557891846, + "learning_rate": 2.4582924512627616e-05, + "loss": 0.08259201049804688, + "step": 3355 + }, + { + "epoch": 0.4676374277154602, + "grad_norm": 0.5300906300544739, + "learning_rate": 2.4573747706208966e-05, + "loss": 0.0862436294555664, + "step": 3356 + }, + { + "epoch": 0.46777677140667456, + "grad_norm": 0.5759276151657104, + "learning_rate": 2.4564569883608003e-05, + "loss": 0.09987068176269531, + "step": 3357 + }, + { + "epoch": 0.46791611509788894, + "grad_norm": 0.46436387300491333, + "learning_rate": 2.455539104686382e-05, + "loss": 0.09390449523925781, + "step": 3358 + }, + { + "epoch": 0.4680554587891033, + "grad_norm": 0.3520391881465912, + "learning_rate": 2.4546211198015746e-05, + "loss": 0.0897216796875, + "step": 3359 + }, + { + "epoch": 0.4681948024803177, + "grad_norm": 0.284105122089386, + "learning_rate": 2.4537030339103333e-05, + "loss": 0.07186126708984375, + "step": 3360 + }, + { + "epoch": 0.4683341461715321, + "grad_norm": 0.4556911587715149, + "learning_rate": 2.4527848472166364e-05, + "loss": 0.09504127502441406, + "step": 3361 + }, + { + "epoch": 0.46847348986274645, + "grad_norm": 0.42470958828926086, + "learning_rate": 2.4518665599244836e-05, + "loss": 0.07874107360839844, + "step": 3362 + }, + { + "epoch": 0.46861283355396083, + "grad_norm": 0.496550053358078, + "learning_rate": 2.450948172237898e-05, + "loss": 0.10290145874023438, + "step": 3363 + }, + { + "epoch": 0.4687521772451752, + "grad_norm": 0.3915943205356598, + "learning_rate": 2.450029684360923e-05, + "loss": 0.08662605285644531, + "step": 3364 + }, + { + "epoch": 0.4688915209363896, + "grad_norm": 0.8431066274642944, + "learning_rate": 2.449111096497627e-05, + "loss": 0.1259899139404297, + "step": 3365 + }, + { + "epoch": 0.46903086462760396, + "grad_norm": 0.6047765016555786, + "learning_rate": 2.448192408852098e-05, + "loss": 0.10210609436035156, + "step": 3366 + }, + { + "epoch": 0.46917020831881834, + "grad_norm": 0.4611833095550537, + "learning_rate": 2.4472736216284495e-05, + "loss": 0.07930564880371094, + "step": 3367 + }, + { + "epoch": 0.46930955201003277, + "grad_norm": 0.46716704964637756, + "learning_rate": 2.4463547350308123e-05, + "loss": 0.07221412658691406, + "step": 3368 + }, + { + "epoch": 0.46944889570124715, + "grad_norm": 0.5418816804885864, + "learning_rate": 2.4454357492633444e-05, + "loss": 0.09065723419189453, + "step": 3369 + }, + { + "epoch": 0.4695882393924615, + "grad_norm": 1.2900831699371338, + "learning_rate": 2.4445166645302223e-05, + "loss": 0.12275886535644531, + "step": 3370 + }, + { + "epoch": 0.4697275830836759, + "grad_norm": 0.746807873249054, + "learning_rate": 2.4435974810356455e-05, + "loss": 0.09711837768554688, + "step": 3371 + }, + { + "epoch": 0.4698669267748903, + "grad_norm": 0.45636504888534546, + "learning_rate": 2.4426781989838365e-05, + "loss": 0.081329345703125, + "step": 3372 + }, + { + "epoch": 0.47000627046610466, + "grad_norm": 0.500220000743866, + "learning_rate": 2.4417588185790374e-05, + "loss": 0.09796142578125, + "step": 3373 + }, + { + "epoch": 0.47014561415731904, + "grad_norm": 0.42690160870552063, + "learning_rate": 2.4408393400255146e-05, + "loss": 0.09677886962890625, + "step": 3374 + }, + { + "epoch": 0.4702849578485334, + "grad_norm": 0.6231980323791504, + "learning_rate": 2.4399197635275554e-05, + "loss": 0.08408164978027344, + "step": 3375 + }, + { + "epoch": 0.4704243015397478, + "grad_norm": 0.83072429895401, + "learning_rate": 2.4390000892894677e-05, + "loss": 0.1024465560913086, + "step": 3376 + }, + { + "epoch": 0.47056364523096217, + "grad_norm": 0.6600984930992126, + "learning_rate": 2.4380803175155833e-05, + "loss": 0.08095550537109375, + "step": 3377 + }, + { + "epoch": 0.47070298892217655, + "grad_norm": 0.3903702199459076, + "learning_rate": 2.4371604484102535e-05, + "loss": 0.08209228515625, + "step": 3378 + }, + { + "epoch": 0.4708423326133909, + "grad_norm": 0.4258441627025604, + "learning_rate": 2.4362404821778528e-05, + "loss": 0.09598064422607422, + "step": 3379 + }, + { + "epoch": 0.4709816763046053, + "grad_norm": 0.8938683867454529, + "learning_rate": 2.4353204190227767e-05, + "loss": 0.141693115234375, + "step": 3380 + }, + { + "epoch": 0.4711210199958197, + "grad_norm": 0.40020009875297546, + "learning_rate": 2.4344002591494415e-05, + "loss": 0.10193061828613281, + "step": 3381 + }, + { + "epoch": 0.47126036368703406, + "grad_norm": 0.5885612964630127, + "learning_rate": 2.433480002762286e-05, + "loss": 0.09842491149902344, + "step": 3382 + }, + { + "epoch": 0.47139970737824843, + "grad_norm": 0.3441292643547058, + "learning_rate": 2.4325596500657714e-05, + "loss": 0.08744049072265625, + "step": 3383 + }, + { + "epoch": 0.4715390510694628, + "grad_norm": 0.42527249455451965, + "learning_rate": 2.431639201264377e-05, + "loss": 0.10155487060546875, + "step": 3384 + }, + { + "epoch": 0.4716783947606772, + "grad_norm": 0.3403942584991455, + "learning_rate": 2.4307186565626073e-05, + "loss": 0.08571434020996094, + "step": 3385 + }, + { + "epoch": 0.47181773845189157, + "grad_norm": 0.4996941387653351, + "learning_rate": 2.429798016164986e-05, + "loss": 0.11043739318847656, + "step": 3386 + }, + { + "epoch": 0.47195708214310594, + "grad_norm": 0.52249675989151, + "learning_rate": 2.428877280276057e-05, + "loss": 0.11090087890625, + "step": 3387 + }, + { + "epoch": 0.4720964258343204, + "grad_norm": 0.3414641320705414, + "learning_rate": 2.4279564491003883e-05, + "loss": 0.07569313049316406, + "step": 3388 + }, + { + "epoch": 0.47223576952553475, + "grad_norm": 0.4467654824256897, + "learning_rate": 2.4270355228425664e-05, + "loss": 0.09550094604492188, + "step": 3389 + }, + { + "epoch": 0.47237511321674913, + "grad_norm": 0.7098508477210999, + "learning_rate": 2.4261145017072014e-05, + "loss": 0.09798049926757812, + "step": 3390 + }, + { + "epoch": 0.4725144569079635, + "grad_norm": 0.6298187971115112, + "learning_rate": 2.425193385898922e-05, + "loss": 0.11298179626464844, + "step": 3391 + }, + { + "epoch": 0.4726538005991779, + "grad_norm": 0.4711478650569916, + "learning_rate": 2.42427217562238e-05, + "loss": 0.08747482299804688, + "step": 3392 + }, + { + "epoch": 0.47279314429039226, + "grad_norm": 0.43442270159721375, + "learning_rate": 2.4233508710822466e-05, + "loss": 0.10482215881347656, + "step": 3393 + }, + { + "epoch": 0.47293248798160664, + "grad_norm": 0.4452684819698334, + "learning_rate": 2.4224294724832152e-05, + "loss": 0.10374641418457031, + "step": 3394 + }, + { + "epoch": 0.473071831672821, + "grad_norm": 0.43534260988235474, + "learning_rate": 2.421507980029999e-05, + "loss": 0.09018898010253906, + "step": 3395 + }, + { + "epoch": 0.4732111753640354, + "grad_norm": 0.3990996479988098, + "learning_rate": 2.4205863939273328e-05, + "loss": 0.09092330932617188, + "step": 3396 + }, + { + "epoch": 0.4733505190552498, + "grad_norm": 0.41133472323417664, + "learning_rate": 2.4196647143799723e-05, + "loss": 0.09299278259277344, + "step": 3397 + }, + { + "epoch": 0.47348986274646415, + "grad_norm": 0.49499398469924927, + "learning_rate": 2.4187429415926927e-05, + "loss": 0.09455013275146484, + "step": 3398 + }, + { + "epoch": 0.47362920643767853, + "grad_norm": 0.44198840856552124, + "learning_rate": 2.4178210757702924e-05, + "loss": 0.10278511047363281, + "step": 3399 + }, + { + "epoch": 0.4737685501288929, + "grad_norm": 0.5099485516548157, + "learning_rate": 2.4168991171175872e-05, + "loss": 0.12946701049804688, + "step": 3400 + }, + { + "epoch": 0.4739078938201073, + "grad_norm": 0.5038263201713562, + "learning_rate": 2.415977065839417e-05, + "loss": 0.08662796020507812, + "step": 3401 + }, + { + "epoch": 0.47404723751132166, + "grad_norm": 0.5886743664741516, + "learning_rate": 2.4150549221406395e-05, + "loss": 0.10239124298095703, + "step": 3402 + }, + { + "epoch": 0.47418658120253604, + "grad_norm": 0.5908005833625793, + "learning_rate": 2.4141326862261332e-05, + "loss": 0.11698341369628906, + "step": 3403 + }, + { + "epoch": 0.4743259248937504, + "grad_norm": 0.5662357807159424, + "learning_rate": 2.4132103583008008e-05, + "loss": 0.08936119079589844, + "step": 3404 + }, + { + "epoch": 0.4744652685849648, + "grad_norm": 0.30412134528160095, + "learning_rate": 2.4122879385695587e-05, + "loss": 0.07443809509277344, + "step": 3405 + }, + { + "epoch": 0.47460461227617917, + "grad_norm": 0.39012253284454346, + "learning_rate": 2.41136542723735e-05, + "loss": 0.08555221557617188, + "step": 3406 + }, + { + "epoch": 0.47474395596739355, + "grad_norm": 0.45918262004852295, + "learning_rate": 2.410442824509135e-05, + "loss": 0.09944725036621094, + "step": 3407 + }, + { + "epoch": 0.474883299658608, + "grad_norm": 0.3743506371974945, + "learning_rate": 2.409520130589895e-05, + "loss": 0.08347511291503906, + "step": 3408 + }, + { + "epoch": 0.47502264334982236, + "grad_norm": 0.46894362568855286, + "learning_rate": 2.4085973456846318e-05, + "loss": 0.08277130126953125, + "step": 3409 + }, + { + "epoch": 0.47516198704103674, + "grad_norm": 0.42630839347839355, + "learning_rate": 2.4076744699983663e-05, + "loss": 0.07903480529785156, + "step": 3410 + }, + { + "epoch": 0.4753013307322511, + "grad_norm": 0.5020889043807983, + "learning_rate": 2.4067515037361408e-05, + "loss": 0.09169578552246094, + "step": 3411 + }, + { + "epoch": 0.4754406744234655, + "grad_norm": 0.4270579516887665, + "learning_rate": 2.405828447103018e-05, + "loss": 0.07627105712890625, + "step": 3412 + }, + { + "epoch": 0.47558001811467987, + "grad_norm": 0.47096961736679077, + "learning_rate": 2.4049053003040795e-05, + "loss": 0.08424758911132812, + "step": 3413 + }, + { + "epoch": 0.47571936180589425, + "grad_norm": 0.47854965925216675, + "learning_rate": 2.4039820635444264e-05, + "loss": 0.11084556579589844, + "step": 3414 + }, + { + "epoch": 0.4758587054971086, + "grad_norm": 0.35558754205703735, + "learning_rate": 2.4030587370291835e-05, + "loss": 0.08649826049804688, + "step": 3415 + }, + { + "epoch": 0.475998049188323, + "grad_norm": 0.4289076030254364, + "learning_rate": 2.4021353209634896e-05, + "loss": 0.09354209899902344, + "step": 3416 + }, + { + "epoch": 0.4761373928795374, + "grad_norm": 0.42000216245651245, + "learning_rate": 2.4012118155525094e-05, + "loss": 0.0847463607788086, + "step": 3417 + }, + { + "epoch": 0.47627673657075176, + "grad_norm": 0.37192919850349426, + "learning_rate": 2.4002882210014227e-05, + "loss": 0.08253288269042969, + "step": 3418 + }, + { + "epoch": 0.47641608026196614, + "grad_norm": 0.5545188784599304, + "learning_rate": 2.3993645375154328e-05, + "loss": 0.09913253784179688, + "step": 3419 + }, + { + "epoch": 0.4765554239531805, + "grad_norm": 0.40085142850875854, + "learning_rate": 2.3984407652997607e-05, + "loss": 0.07564163208007812, + "step": 3420 + }, + { + "epoch": 0.4766947676443949, + "grad_norm": 0.32471245527267456, + "learning_rate": 2.397516904559646e-05, + "loss": 0.06889533996582031, + "step": 3421 + }, + { + "epoch": 0.47683411133560927, + "grad_norm": 0.5205085873603821, + "learning_rate": 2.3965929555003512e-05, + "loss": 0.0876455307006836, + "step": 3422 + }, + { + "epoch": 0.47697345502682365, + "grad_norm": 0.6245311498641968, + "learning_rate": 2.3956689183271557e-05, + "loss": 0.09157562255859375, + "step": 3423 + }, + { + "epoch": 0.477112798718038, + "grad_norm": 0.4125975966453552, + "learning_rate": 2.39474479324536e-05, + "loss": 0.09163284301757812, + "step": 3424 + }, + { + "epoch": 0.4772521424092524, + "grad_norm": 0.49080345034599304, + "learning_rate": 2.3938205804602835e-05, + "loss": 0.08495330810546875, + "step": 3425 + }, + { + "epoch": 0.4773914861004668, + "grad_norm": 0.49669715762138367, + "learning_rate": 2.392896280177265e-05, + "loss": 0.09451580047607422, + "step": 3426 + }, + { + "epoch": 0.47753082979168116, + "grad_norm": 0.7934829592704773, + "learning_rate": 2.391971892601663e-05, + "loss": 0.11027717590332031, + "step": 3427 + }, + { + "epoch": 0.4776701734828956, + "grad_norm": 0.5896791219711304, + "learning_rate": 2.3910474179388557e-05, + "loss": 0.13793182373046875, + "step": 3428 + }, + { + "epoch": 0.47780951717410997, + "grad_norm": 0.49039226770401, + "learning_rate": 2.3901228563942397e-05, + "loss": 0.08974838256835938, + "step": 3429 + }, + { + "epoch": 0.47794886086532434, + "grad_norm": 0.41948848962783813, + "learning_rate": 2.389198208173231e-05, + "loss": 0.1057586669921875, + "step": 3430 + }, + { + "epoch": 0.4780882045565387, + "grad_norm": 0.480119526386261, + "learning_rate": 2.3882734734812673e-05, + "loss": 0.0988302230834961, + "step": 3431 + }, + { + "epoch": 0.4782275482477531, + "grad_norm": 0.41554003953933716, + "learning_rate": 2.3873486525238008e-05, + "loss": 0.07796669006347656, + "step": 3432 + }, + { + "epoch": 0.4783668919389675, + "grad_norm": 0.5747083425521851, + "learning_rate": 2.3864237455063083e-05, + "loss": 0.10154914855957031, + "step": 3433 + }, + { + "epoch": 0.47850623563018185, + "grad_norm": 0.4159761667251587, + "learning_rate": 2.3854987526342806e-05, + "loss": 0.09024620056152344, + "step": 3434 + }, + { + "epoch": 0.47864557932139623, + "grad_norm": 0.2847191095352173, + "learning_rate": 2.3845736741132317e-05, + "loss": 0.07617759704589844, + "step": 3435 + }, + { + "epoch": 0.4787849230126106, + "grad_norm": 0.4158693253993988, + "learning_rate": 2.3836485101486928e-05, + "loss": 0.08624458312988281, + "step": 3436 + }, + { + "epoch": 0.478924266703825, + "grad_norm": 0.4103093445301056, + "learning_rate": 2.382723260946213e-05, + "loss": 0.10606002807617188, + "step": 3437 + }, + { + "epoch": 0.47906361039503936, + "grad_norm": 0.3802903890609741, + "learning_rate": 2.3817979267113633e-05, + "loss": 0.09663772583007812, + "step": 3438 + }, + { + "epoch": 0.47920295408625374, + "grad_norm": 0.40499117970466614, + "learning_rate": 2.3808725076497297e-05, + "loss": 0.09331130981445312, + "step": 3439 + }, + { + "epoch": 0.4793422977774681, + "grad_norm": 0.4324609935283661, + "learning_rate": 2.3799470039669212e-05, + "loss": 0.10648727416992188, + "step": 3440 + }, + { + "epoch": 0.4794816414686825, + "grad_norm": 0.328056663274765, + "learning_rate": 2.3790214158685622e-05, + "loss": 0.08068275451660156, + "step": 3441 + }, + { + "epoch": 0.4796209851598969, + "grad_norm": 0.4151003658771515, + "learning_rate": 2.3780957435602984e-05, + "loss": 0.0850362777709961, + "step": 3442 + }, + { + "epoch": 0.47976032885111125, + "grad_norm": 0.3604259788990021, + "learning_rate": 2.377169987247792e-05, + "loss": 0.09549713134765625, + "step": 3443 + }, + { + "epoch": 0.47989967254232563, + "grad_norm": 0.41616639494895935, + "learning_rate": 2.376244147136726e-05, + "loss": 0.08270740509033203, + "step": 3444 + }, + { + "epoch": 0.48003901623354, + "grad_norm": 0.6228440403938293, + "learning_rate": 2.3753182234327994e-05, + "loss": 0.09198760986328125, + "step": 3445 + }, + { + "epoch": 0.4801783599247544, + "grad_norm": 0.900379478931427, + "learning_rate": 2.374392216341733e-05, + "loss": 0.12404823303222656, + "step": 3446 + }, + { + "epoch": 0.48031770361596876, + "grad_norm": 0.36823806166648865, + "learning_rate": 2.3734661260692633e-05, + "loss": 0.07953834533691406, + "step": 3447 + }, + { + "epoch": 0.4804570473071832, + "grad_norm": 0.41421398520469666, + "learning_rate": 2.3725399528211462e-05, + "loss": 0.07906913757324219, + "step": 3448 + }, + { + "epoch": 0.48059639099839757, + "grad_norm": 0.46539440751075745, + "learning_rate": 2.371613696803158e-05, + "loss": 0.09301948547363281, + "step": 3449 + }, + { + "epoch": 0.48073573468961195, + "grad_norm": 0.7037166953086853, + "learning_rate": 2.3706873582210893e-05, + "loss": 0.11251068115234375, + "step": 3450 + }, + { + "epoch": 0.4808750783808263, + "grad_norm": 0.5468170642852783, + "learning_rate": 2.369760937280753e-05, + "loss": 0.08740997314453125, + "step": 3451 + }, + { + "epoch": 0.4810144220720407, + "grad_norm": 0.5132626891136169, + "learning_rate": 2.368834434187979e-05, + "loss": 0.08431529998779297, + "step": 3452 + }, + { + "epoch": 0.4811537657632551, + "grad_norm": 0.49559250473976135, + "learning_rate": 2.3679078491486133e-05, + "loss": 0.08855152130126953, + "step": 3453 + }, + { + "epoch": 0.48129310945446946, + "grad_norm": 0.39275702834129333, + "learning_rate": 2.3669811823685235e-05, + "loss": 0.08515167236328125, + "step": 3454 + }, + { + "epoch": 0.48143245314568384, + "grad_norm": 0.5431119799613953, + "learning_rate": 2.366054434053593e-05, + "loss": 0.11029958724975586, + "step": 3455 + }, + { + "epoch": 0.4815717968368982, + "grad_norm": 0.4709182381629944, + "learning_rate": 2.3651276044097246e-05, + "loss": 0.09728431701660156, + "step": 3456 + }, + { + "epoch": 0.4817111405281126, + "grad_norm": 0.4484216570854187, + "learning_rate": 2.364200693642839e-05, + "loss": 0.08250045776367188, + "step": 3457 + }, + { + "epoch": 0.48185048421932697, + "grad_norm": 0.33168473839759827, + "learning_rate": 2.363273701958873e-05, + "loss": 0.08075904846191406, + "step": 3458 + }, + { + "epoch": 0.48198982791054135, + "grad_norm": 0.35240423679351807, + "learning_rate": 2.3623466295637848e-05, + "loss": 0.08496475219726562, + "step": 3459 + }, + { + "epoch": 0.4821291716017557, + "grad_norm": 0.44464820623397827, + "learning_rate": 2.3614194766635482e-05, + "loss": 0.0902862548828125, + "step": 3460 + }, + { + "epoch": 0.4822685152929701, + "grad_norm": 0.6696125864982605, + "learning_rate": 2.3604922434641545e-05, + "loss": 0.10447025299072266, + "step": 3461 + }, + { + "epoch": 0.4824078589841845, + "grad_norm": 0.5700128674507141, + "learning_rate": 2.3595649301716154e-05, + "loss": 0.09767913818359375, + "step": 3462 + }, + { + "epoch": 0.48254720267539886, + "grad_norm": 0.4356009364128113, + "learning_rate": 2.3586375369919573e-05, + "loss": 0.09535980224609375, + "step": 3463 + }, + { + "epoch": 0.48268654636661323, + "grad_norm": 0.4468086063861847, + "learning_rate": 2.3577100641312258e-05, + "loss": 0.091552734375, + "step": 3464 + }, + { + "epoch": 0.4828258900578276, + "grad_norm": 0.2996504306793213, + "learning_rate": 2.356782511795486e-05, + "loss": 0.08357429504394531, + "step": 3465 + }, + { + "epoch": 0.482965233749042, + "grad_norm": 0.3449593782424927, + "learning_rate": 2.3558548801908164e-05, + "loss": 0.08615493774414062, + "step": 3466 + }, + { + "epoch": 0.48310457744025637, + "grad_norm": 0.5083550214767456, + "learning_rate": 2.3549271695233177e-05, + "loss": 0.09634780883789062, + "step": 3467 + }, + { + "epoch": 0.4832439211314708, + "grad_norm": 0.5325677394866943, + "learning_rate": 2.353999379999104e-05, + "loss": 0.08170700073242188, + "step": 3468 + }, + { + "epoch": 0.4833832648226852, + "grad_norm": 0.9522843360900879, + "learning_rate": 2.3530715118243105e-05, + "loss": 0.11081504821777344, + "step": 3469 + }, + { + "epoch": 0.48352260851389955, + "grad_norm": 0.5108357071876526, + "learning_rate": 2.3521435652050886e-05, + "loss": 0.10079574584960938, + "step": 3470 + }, + { + "epoch": 0.48366195220511393, + "grad_norm": 0.6147168278694153, + "learning_rate": 2.351215540347605e-05, + "loss": 0.0872201919555664, + "step": 3471 + }, + { + "epoch": 0.4838012958963283, + "grad_norm": 0.7486119270324707, + "learning_rate": 2.350287437458047e-05, + "loss": 0.09604644775390625, + "step": 3472 + }, + { + "epoch": 0.4839406395875427, + "grad_norm": 0.5552497506141663, + "learning_rate": 2.349359256742618e-05, + "loss": 0.0828704833984375, + "step": 3473 + }, + { + "epoch": 0.48407998327875706, + "grad_norm": 0.6446892023086548, + "learning_rate": 2.3484309984075376e-05, + "loss": 0.10250473022460938, + "step": 3474 + }, + { + "epoch": 0.48421932696997144, + "grad_norm": 0.4845282733440399, + "learning_rate": 2.3475026626590443e-05, + "loss": 0.10368919372558594, + "step": 3475 + }, + { + "epoch": 0.4843586706611858, + "grad_norm": 0.5121467709541321, + "learning_rate": 2.3465742497033932e-05, + "loss": 0.10377883911132812, + "step": 3476 + }, + { + "epoch": 0.4844980143524002, + "grad_norm": 0.36552274227142334, + "learning_rate": 2.345645759746856e-05, + "loss": 0.09002876281738281, + "step": 3477 + }, + { + "epoch": 0.4846373580436146, + "grad_norm": 0.34301263093948364, + "learning_rate": 2.3447171929957224e-05, + "loss": 0.08536815643310547, + "step": 3478 + }, + { + "epoch": 0.48477670173482895, + "grad_norm": 0.3815698027610779, + "learning_rate": 2.3437885496562986e-05, + "loss": 0.10057640075683594, + "step": 3479 + }, + { + "epoch": 0.48491604542604333, + "grad_norm": 0.4825282394886017, + "learning_rate": 2.3428598299349076e-05, + "loss": 0.08634090423583984, + "step": 3480 + }, + { + "epoch": 0.4850553891172577, + "grad_norm": 0.5544397234916687, + "learning_rate": 2.34193103403789e-05, + "loss": 0.09230613708496094, + "step": 3481 + }, + { + "epoch": 0.4851947328084721, + "grad_norm": 0.4796181321144104, + "learning_rate": 2.341002162171603e-05, + "loss": 0.09939384460449219, + "step": 3482 + }, + { + "epoch": 0.48533407649968646, + "grad_norm": 0.5118287801742554, + "learning_rate": 2.3400732145424216e-05, + "loss": 0.09431171417236328, + "step": 3483 + }, + { + "epoch": 0.48547342019090084, + "grad_norm": 0.40192246437072754, + "learning_rate": 2.339144191356735e-05, + "loss": 0.09296417236328125, + "step": 3484 + }, + { + "epoch": 0.4856127638821152, + "grad_norm": 0.5272851586341858, + "learning_rate": 2.3382150928209523e-05, + "loss": 0.06976699829101562, + "step": 3485 + }, + { + "epoch": 0.4857521075733296, + "grad_norm": 0.7166458964347839, + "learning_rate": 2.3372859191414978e-05, + "loss": 0.12181472778320312, + "step": 3486 + }, + { + "epoch": 0.48589145126454397, + "grad_norm": 0.2928094267845154, + "learning_rate": 2.3363566705248117e-05, + "loss": 0.07884883880615234, + "step": 3487 + }, + { + "epoch": 0.4860307949557584, + "grad_norm": 0.544873833656311, + "learning_rate": 2.3354273471773534e-05, + "loss": 0.09393882751464844, + "step": 3488 + }, + { + "epoch": 0.4861701386469728, + "grad_norm": 0.5764533281326294, + "learning_rate": 2.3344979493055958e-05, + "loss": 0.10088825225830078, + "step": 3489 + }, + { + "epoch": 0.48630948233818716, + "grad_norm": 0.3907257318496704, + "learning_rate": 2.333568477116031e-05, + "loss": 0.07628345489501953, + "step": 3490 + }, + { + "epoch": 0.48644882602940154, + "grad_norm": 0.7189585566520691, + "learning_rate": 2.3326389308151658e-05, + "loss": 0.09816455841064453, + "step": 3491 + }, + { + "epoch": 0.4865881697206159, + "grad_norm": 0.7930517792701721, + "learning_rate": 2.3317093106095246e-05, + "loss": 0.12178707122802734, + "step": 3492 + }, + { + "epoch": 0.4867275134118303, + "grad_norm": 0.32591089606285095, + "learning_rate": 2.330779616705648e-05, + "loss": 0.07304668426513672, + "step": 3493 + }, + { + "epoch": 0.48686685710304467, + "grad_norm": 0.4635683000087738, + "learning_rate": 2.329849849310092e-05, + "loss": 0.09394645690917969, + "step": 3494 + }, + { + "epoch": 0.48700620079425905, + "grad_norm": 0.7572290301322937, + "learning_rate": 2.3289200086294298e-05, + "loss": 0.10734367370605469, + "step": 3495 + }, + { + "epoch": 0.4871455444854734, + "grad_norm": 0.5473294258117676, + "learning_rate": 2.3279900948702516e-05, + "loss": 0.09063720703125, + "step": 3496 + }, + { + "epoch": 0.4872848881766878, + "grad_norm": 0.866917073726654, + "learning_rate": 2.3270601082391623e-05, + "loss": 0.08599853515625, + "step": 3497 + }, + { + "epoch": 0.4874242318679022, + "grad_norm": 0.5999540090560913, + "learning_rate": 2.3261300489427835e-05, + "loss": 0.1033172607421875, + "step": 3498 + }, + { + "epoch": 0.48756357555911656, + "grad_norm": 0.5204306840896606, + "learning_rate": 2.3251999171877538e-05, + "loss": 0.11273956298828125, + "step": 3499 + }, + { + "epoch": 0.48770291925033094, + "grad_norm": 0.30075132846832275, + "learning_rate": 2.3242697131807267e-05, + "loss": 0.07770347595214844, + "step": 3500 + }, + { + "epoch": 0.4878422629415453, + "grad_norm": 0.619268536567688, + "learning_rate": 2.3233394371283727e-05, + "loss": 0.10169029235839844, + "step": 3501 + }, + { + "epoch": 0.4879816066327597, + "grad_norm": 0.42761752009391785, + "learning_rate": 2.322409089237378e-05, + "loss": 0.09831428527832031, + "step": 3502 + }, + { + "epoch": 0.48812095032397407, + "grad_norm": 0.4556981921195984, + "learning_rate": 2.321478669714444e-05, + "loss": 0.09161186218261719, + "step": 3503 + }, + { + "epoch": 0.48826029401518845, + "grad_norm": 0.5282820463180542, + "learning_rate": 2.3205481787662895e-05, + "loss": 0.10009765625, + "step": 3504 + }, + { + "epoch": 0.4883996377064028, + "grad_norm": 0.44491246342658997, + "learning_rate": 2.3196176165996476e-05, + "loss": 0.09662818908691406, + "step": 3505 + }, + { + "epoch": 0.4885389813976172, + "grad_norm": 0.4474603235721588, + "learning_rate": 2.3186869834212682e-05, + "loss": 0.08848953247070312, + "step": 3506 + }, + { + "epoch": 0.4886783250888316, + "grad_norm": 0.4760996103286743, + "learning_rate": 2.3177562794379173e-05, + "loss": 0.09019279479980469, + "step": 3507 + }, + { + "epoch": 0.488817668780046, + "grad_norm": 0.41524240374565125, + "learning_rate": 2.3168255048563753e-05, + "loss": 0.08428764343261719, + "step": 3508 + }, + { + "epoch": 0.4889570124712604, + "grad_norm": 0.8928782939910889, + "learning_rate": 2.3158946598834393e-05, + "loss": 0.09798431396484375, + "step": 3509 + }, + { + "epoch": 0.48909635616247477, + "grad_norm": 0.4506617784500122, + "learning_rate": 2.314963744725922e-05, + "loss": 0.08533000946044922, + "step": 3510 + }, + { + "epoch": 0.48923569985368914, + "grad_norm": 0.2981190085411072, + "learning_rate": 2.314032759590651e-05, + "loss": 0.07483291625976562, + "step": 3511 + }, + { + "epoch": 0.4893750435449035, + "grad_norm": 0.6079007983207703, + "learning_rate": 2.313101704684471e-05, + "loss": 0.11789894104003906, + "step": 3512 + }, + { + "epoch": 0.4895143872361179, + "grad_norm": 0.4057367742061615, + "learning_rate": 2.31217058021424e-05, + "loss": 0.09421348571777344, + "step": 3513 + }, + { + "epoch": 0.4896537309273323, + "grad_norm": 0.6072142720222473, + "learning_rate": 2.3112393863868327e-05, + "loss": 0.11096382141113281, + "step": 3514 + }, + { + "epoch": 0.48979307461854665, + "grad_norm": 0.48413825035095215, + "learning_rate": 2.3103081234091406e-05, + "loss": 0.09276771545410156, + "step": 3515 + }, + { + "epoch": 0.48993241830976103, + "grad_norm": 0.4909416437149048, + "learning_rate": 2.3093767914880668e-05, + "loss": 0.0912628173828125, + "step": 3516 + }, + { + "epoch": 0.4900717620009754, + "grad_norm": 0.6361947059631348, + "learning_rate": 2.308445390830534e-05, + "loss": 0.0942840576171875, + "step": 3517 + }, + { + "epoch": 0.4902111056921898, + "grad_norm": 0.5419065952301025, + "learning_rate": 2.3075139216434762e-05, + "loss": 0.10759162902832031, + "step": 3518 + }, + { + "epoch": 0.49035044938340416, + "grad_norm": 0.4116472899913788, + "learning_rate": 2.3065823841338465e-05, + "loss": 0.08144950866699219, + "step": 3519 + }, + { + "epoch": 0.49048979307461854, + "grad_norm": 0.44421887397766113, + "learning_rate": 2.3056507785086105e-05, + "loss": 0.07254505157470703, + "step": 3520 + }, + { + "epoch": 0.4906291367658329, + "grad_norm": 0.43100857734680176, + "learning_rate": 2.304719104974749e-05, + "loss": 0.07548141479492188, + "step": 3521 + }, + { + "epoch": 0.4907684804570473, + "grad_norm": 0.28159475326538086, + "learning_rate": 2.3037873637392596e-05, + "loss": 0.07655143737792969, + "step": 3522 + }, + { + "epoch": 0.4909078241482617, + "grad_norm": 0.3668830394744873, + "learning_rate": 2.3028555550091536e-05, + "loss": 0.07957935333251953, + "step": 3523 + }, + { + "epoch": 0.49104716783947605, + "grad_norm": 0.437991738319397, + "learning_rate": 2.3019236789914575e-05, + "loss": 0.08981132507324219, + "step": 3524 + }, + { + "epoch": 0.49118651153069043, + "grad_norm": 0.41576653718948364, + "learning_rate": 2.300991735893213e-05, + "loss": 0.10143089294433594, + "step": 3525 + }, + { + "epoch": 0.4913258552219048, + "grad_norm": 0.731276273727417, + "learning_rate": 2.3000597259214765e-05, + "loss": 0.11522674560546875, + "step": 3526 + }, + { + "epoch": 0.4914651989131192, + "grad_norm": 0.8897188305854797, + "learning_rate": 2.2991276492833197e-05, + "loss": 0.11099624633789062, + "step": 3527 + }, + { + "epoch": 0.4916045426043336, + "grad_norm": 0.43240225315093994, + "learning_rate": 2.2981955061858282e-05, + "loss": 0.09190940856933594, + "step": 3528 + }, + { + "epoch": 0.491743886295548, + "grad_norm": 0.5149895548820496, + "learning_rate": 2.297263296836103e-05, + "loss": 0.11329460144042969, + "step": 3529 + }, + { + "epoch": 0.49188322998676237, + "grad_norm": 0.45125943422317505, + "learning_rate": 2.2963310214412596e-05, + "loss": 0.09970283508300781, + "step": 3530 + }, + { + "epoch": 0.49202257367797675, + "grad_norm": 0.3560136556625366, + "learning_rate": 2.2953986802084293e-05, + "loss": 0.07925033569335938, + "step": 3531 + }, + { + "epoch": 0.4921619173691911, + "grad_norm": 0.8238831162452698, + "learning_rate": 2.2944662733447557e-05, + "loss": 0.09444999694824219, + "step": 3532 + }, + { + "epoch": 0.4923012610604055, + "grad_norm": 0.5963654518127441, + "learning_rate": 2.2935338010573998e-05, + "loss": 0.08721446990966797, + "step": 3533 + }, + { + "epoch": 0.4924406047516199, + "grad_norm": 0.8901246190071106, + "learning_rate": 2.292601263553534e-05, + "loss": 0.10432052612304688, + "step": 3534 + }, + { + "epoch": 0.49257994844283426, + "grad_norm": 0.4436335861682892, + "learning_rate": 2.2916686610403477e-05, + "loss": 0.09658622741699219, + "step": 3535 + }, + { + "epoch": 0.49271929213404864, + "grad_norm": 0.32249295711517334, + "learning_rate": 2.2907359937250445e-05, + "loss": 0.08913516998291016, + "step": 3536 + }, + { + "epoch": 0.492858635825263, + "grad_norm": 0.36077895760536194, + "learning_rate": 2.2898032618148403e-05, + "loss": 0.08251953125, + "step": 3537 + }, + { + "epoch": 0.4929979795164774, + "grad_norm": 0.47784245014190674, + "learning_rate": 2.288870465516968e-05, + "loss": 0.10231590270996094, + "step": 3538 + }, + { + "epoch": 0.49313732320769177, + "grad_norm": 0.3970542848110199, + "learning_rate": 2.287937605038673e-05, + "loss": 0.0785369873046875, + "step": 3539 + }, + { + "epoch": 0.49327666689890615, + "grad_norm": 0.4862060248851776, + "learning_rate": 2.2870046805872166e-05, + "loss": 0.0777750015258789, + "step": 3540 + }, + { + "epoch": 0.4934160105901205, + "grad_norm": 0.6668583154678345, + "learning_rate": 2.286071692369872e-05, + "loss": 0.12025737762451172, + "step": 3541 + }, + { + "epoch": 0.4935553542813349, + "grad_norm": 0.5480947494506836, + "learning_rate": 2.2851386405939288e-05, + "loss": 0.12278079986572266, + "step": 3542 + }, + { + "epoch": 0.4936946979725493, + "grad_norm": 0.40643373131752014, + "learning_rate": 2.284205525466689e-05, + "loss": 0.08835601806640625, + "step": 3543 + }, + { + "epoch": 0.49383404166376366, + "grad_norm": 0.8112149834632874, + "learning_rate": 2.2832723471954705e-05, + "loss": 0.11197280883789062, + "step": 3544 + }, + { + "epoch": 0.49397338535497803, + "grad_norm": 0.5163163542747498, + "learning_rate": 2.2823391059876032e-05, + "loss": 0.07828426361083984, + "step": 3545 + }, + { + "epoch": 0.4941127290461924, + "grad_norm": 0.710674524307251, + "learning_rate": 2.2814058020504324e-05, + "loss": 0.1110525131225586, + "step": 3546 + }, + { + "epoch": 0.4942520727374068, + "grad_norm": 0.47131314873695374, + "learning_rate": 2.280472435591318e-05, + "loss": 0.09731483459472656, + "step": 3547 + }, + { + "epoch": 0.49439141642862117, + "grad_norm": 0.49820053577423096, + "learning_rate": 2.2795390068176304e-05, + "loss": 0.11124610900878906, + "step": 3548 + }, + { + "epoch": 0.4945307601198356, + "grad_norm": 0.4696536064147949, + "learning_rate": 2.2786055159367588e-05, + "loss": 0.09602928161621094, + "step": 3549 + }, + { + "epoch": 0.49467010381105, + "grad_norm": 0.41318178176879883, + "learning_rate": 2.277671963156101e-05, + "loss": 0.0880270004272461, + "step": 3550 + }, + { + "epoch": 0.49480944750226435, + "grad_norm": 0.39199894666671753, + "learning_rate": 2.2767383486830728e-05, + "loss": 0.09119606018066406, + "step": 3551 + }, + { + "epoch": 0.49494879119347873, + "grad_norm": 0.41043126583099365, + "learning_rate": 2.275804672725102e-05, + "loss": 0.08877849578857422, + "step": 3552 + }, + { + "epoch": 0.4950881348846931, + "grad_norm": 0.4423809051513672, + "learning_rate": 2.274870935489629e-05, + "loss": 0.080963134765625, + "step": 3553 + }, + { + "epoch": 0.4952274785759075, + "grad_norm": 0.7711588740348816, + "learning_rate": 2.2739371371841103e-05, + "loss": 0.13309860229492188, + "step": 3554 + }, + { + "epoch": 0.49536682226712186, + "grad_norm": 1.1135303974151611, + "learning_rate": 2.2730032780160128e-05, + "loss": 0.126953125, + "step": 3555 + }, + { + "epoch": 0.49550616595833624, + "grad_norm": 0.5013328790664673, + "learning_rate": 2.27206935819282e-05, + "loss": 0.10778236389160156, + "step": 3556 + }, + { + "epoch": 0.4956455096495506, + "grad_norm": 0.788785457611084, + "learning_rate": 2.2711353779220278e-05, + "loss": 0.106109619140625, + "step": 3557 + }, + { + "epoch": 0.495784853340765, + "grad_norm": 0.37558019161224365, + "learning_rate": 2.2702013374111443e-05, + "loss": 0.09319496154785156, + "step": 3558 + }, + { + "epoch": 0.4959241970319794, + "grad_norm": 0.48046520352363586, + "learning_rate": 2.2692672368676925e-05, + "loss": 0.08017349243164062, + "step": 3559 + }, + { + "epoch": 0.49606354072319375, + "grad_norm": 0.47926652431488037, + "learning_rate": 2.2683330764992083e-05, + "loss": 0.08165359497070312, + "step": 3560 + }, + { + "epoch": 0.49620288441440813, + "grad_norm": 0.3233579993247986, + "learning_rate": 2.2673988565132404e-05, + "loss": 0.07995223999023438, + "step": 3561 + }, + { + "epoch": 0.4963422281056225, + "grad_norm": 0.4408590793609619, + "learning_rate": 2.266464577117352e-05, + "loss": 0.07889175415039062, + "step": 3562 + }, + { + "epoch": 0.4964815717968369, + "grad_norm": 0.6880588531494141, + "learning_rate": 2.2655302385191176e-05, + "loss": 0.11297607421875, + "step": 3563 + }, + { + "epoch": 0.49662091548805126, + "grad_norm": 0.5309742093086243, + "learning_rate": 2.2645958409261256e-05, + "loss": 0.10056686401367188, + "step": 3564 + }, + { + "epoch": 0.49676025917926564, + "grad_norm": 0.3764951527118683, + "learning_rate": 2.2636613845459802e-05, + "loss": 0.0796346664428711, + "step": 3565 + }, + { + "epoch": 0.49689960287048, + "grad_norm": 0.49737733602523804, + "learning_rate": 2.262726869586293e-05, + "loss": 0.09114933013916016, + "step": 3566 + }, + { + "epoch": 0.4970389465616944, + "grad_norm": 0.42466995120048523, + "learning_rate": 2.2617922962546946e-05, + "loss": 0.076507568359375, + "step": 3567 + }, + { + "epoch": 0.49717829025290877, + "grad_norm": 0.5668820142745972, + "learning_rate": 2.2608576647588242e-05, + "loss": 0.09534835815429688, + "step": 3568 + }, + { + "epoch": 0.4973176339441232, + "grad_norm": 0.4347916543483734, + "learning_rate": 2.2599229753063368e-05, + "loss": 0.09530830383300781, + "step": 3569 + }, + { + "epoch": 0.4974569776353376, + "grad_norm": 0.43355435132980347, + "learning_rate": 2.2589882281048984e-05, + "loss": 0.09754180908203125, + "step": 3570 + }, + { + "epoch": 0.49759632132655196, + "grad_norm": 0.31946372985839844, + "learning_rate": 2.258053423362188e-05, + "loss": 0.0771942138671875, + "step": 3571 + }, + { + "epoch": 0.49773566501776634, + "grad_norm": 0.4153016209602356, + "learning_rate": 2.2571185612858987e-05, + "loss": 0.08115863800048828, + "step": 3572 + }, + { + "epoch": 0.4978750087089807, + "grad_norm": 0.33662089705467224, + "learning_rate": 2.256183642083735e-05, + "loss": 0.07797813415527344, + "step": 3573 + }, + { + "epoch": 0.4980143524001951, + "grad_norm": 0.46090275049209595, + "learning_rate": 2.2552486659634148e-05, + "loss": 0.1029815673828125, + "step": 3574 + }, + { + "epoch": 0.49815369609140947, + "grad_norm": 0.3972553312778473, + "learning_rate": 2.2543136331326684e-05, + "loss": 0.0904703140258789, + "step": 3575 + }, + { + "epoch": 0.49829303978262385, + "grad_norm": 0.41653814911842346, + "learning_rate": 2.2533785437992392e-05, + "loss": 0.08826255798339844, + "step": 3576 + }, + { + "epoch": 0.4984323834738382, + "grad_norm": 0.5969926714897156, + "learning_rate": 2.2524433981708822e-05, + "loss": 0.10282325744628906, + "step": 3577 + }, + { + "epoch": 0.4985717271650526, + "grad_norm": 0.3728758990764618, + "learning_rate": 2.2515081964553655e-05, + "loss": 0.08409500122070312, + "step": 3578 + }, + { + "epoch": 0.498711070856267, + "grad_norm": 0.6853375434875488, + "learning_rate": 2.2505729388604692e-05, + "loss": 0.11014747619628906, + "step": 3579 + }, + { + "epoch": 0.49885041454748136, + "grad_norm": 0.38151171803474426, + "learning_rate": 2.2496376255939866e-05, + "loss": 0.09194660186767578, + "step": 3580 + }, + { + "epoch": 0.49898975823869574, + "grad_norm": 0.5134612917900085, + "learning_rate": 2.2487022568637236e-05, + "loss": 0.07385635375976562, + "step": 3581 + }, + { + "epoch": 0.4991291019299101, + "grad_norm": 0.5018208026885986, + "learning_rate": 2.247766832877496e-05, + "loss": 0.09558677673339844, + "step": 3582 + }, + { + "epoch": 0.4992684456211245, + "grad_norm": 0.417513370513916, + "learning_rate": 2.2468313538431355e-05, + "loss": 0.07451248168945312, + "step": 3583 + }, + { + "epoch": 0.49940778931233887, + "grad_norm": 0.574848473072052, + "learning_rate": 2.245895819968483e-05, + "loss": 0.12090873718261719, + "step": 3584 + }, + { + "epoch": 0.49954713300355325, + "grad_norm": 0.3500656187534332, + "learning_rate": 2.2449602314613937e-05, + "loss": 0.09210968017578125, + "step": 3585 + }, + { + "epoch": 0.4996864766947676, + "grad_norm": 0.4185464680194855, + "learning_rate": 2.244024588529734e-05, + "loss": 0.10045129060745239, + "step": 3586 + }, + { + "epoch": 0.499825820385982, + "grad_norm": 0.5309174656867981, + "learning_rate": 2.2430888913813807e-05, + "loss": 0.08686542510986328, + "step": 3587 + }, + { + "epoch": 0.4999651640771964, + "grad_norm": 0.41503453254699707, + "learning_rate": 2.242153140224226e-05, + "loss": 0.0800933837890625, + "step": 3588 + }, + { + "epoch": 0.5001045077684108, + "grad_norm": 0.9058471322059631, + "learning_rate": 2.2412173352661722e-05, + "loss": 0.11584091186523438, + "step": 3589 + }, + { + "epoch": 0.5002438514596251, + "grad_norm": 0.5209627747535706, + "learning_rate": 2.2402814767151333e-05, + "loss": 0.10173797607421875, + "step": 3590 + }, + { + "epoch": 0.5003831951508395, + "grad_norm": 0.40867143869400024, + "learning_rate": 2.2393455647790363e-05, + "loss": 0.09959983825683594, + "step": 3591 + }, + { + "epoch": 0.5005225388420539, + "grad_norm": 0.3687116503715515, + "learning_rate": 2.2384095996658188e-05, + "loss": 0.09241104125976562, + "step": 3592 + }, + { + "epoch": 0.5006618825332683, + "grad_norm": 0.8483180999755859, + "learning_rate": 2.2374735815834315e-05, + "loss": 0.11248970031738281, + "step": 3593 + }, + { + "epoch": 0.5008012262244826, + "grad_norm": 0.3946802318096161, + "learning_rate": 2.2365375107398363e-05, + "loss": 0.10139083862304688, + "step": 3594 + }, + { + "epoch": 0.500940569915697, + "grad_norm": 0.4636661410331726, + "learning_rate": 2.2356013873430058e-05, + "loss": 0.08941841125488281, + "step": 3595 + }, + { + "epoch": 0.5010799136069114, + "grad_norm": 0.3882684111595154, + "learning_rate": 2.2346652116009256e-05, + "loss": 0.09932327270507812, + "step": 3596 + }, + { + "epoch": 0.5012192572981258, + "grad_norm": 0.3645038306713104, + "learning_rate": 2.2337289837215937e-05, + "loss": 0.07796859741210938, + "step": 3597 + }, + { + "epoch": 0.5013586009893402, + "grad_norm": 0.4068838953971863, + "learning_rate": 2.232792703913017e-05, + "loss": 0.06998300552368164, + "step": 3598 + }, + { + "epoch": 0.5014979446805546, + "grad_norm": 0.7660645246505737, + "learning_rate": 2.2318563723832173e-05, + "loss": 0.08663558959960938, + "step": 3599 + }, + { + "epoch": 0.501637288371769, + "grad_norm": 0.760151207447052, + "learning_rate": 2.230919989340224e-05, + "loss": 0.08005523681640625, + "step": 3600 + }, + { + "epoch": 0.5017766320629834, + "grad_norm": 0.870476245880127, + "learning_rate": 2.2299835549920822e-05, + "loss": 0.13806724548339844, + "step": 3601 + }, + { + "epoch": 0.5019159757541978, + "grad_norm": 0.5888090133666992, + "learning_rate": 2.2290470695468443e-05, + "loss": 0.09307861328125, + "step": 3602 + }, + { + "epoch": 0.5020553194454122, + "grad_norm": 0.41574978828430176, + "learning_rate": 2.2281105332125765e-05, + "loss": 0.08404064178466797, + "step": 3603 + }, + { + "epoch": 0.5021946631366265, + "grad_norm": 0.36185958981513977, + "learning_rate": 2.2271739461973567e-05, + "loss": 0.07868385314941406, + "step": 3604 + }, + { + "epoch": 0.5023340068278409, + "grad_norm": 0.6560385227203369, + "learning_rate": 2.2262373087092722e-05, + "loss": 0.09868812561035156, + "step": 3605 + }, + { + "epoch": 0.5024733505190553, + "grad_norm": 0.403791218996048, + "learning_rate": 2.2253006209564233e-05, + "loss": 0.09697914123535156, + "step": 3606 + }, + { + "epoch": 0.5026126942102697, + "grad_norm": 0.4524112641811371, + "learning_rate": 2.2243638831469197e-05, + "loss": 0.08316230773925781, + "step": 3607 + }, + { + "epoch": 0.502752037901484, + "grad_norm": 0.47159361839294434, + "learning_rate": 2.2234270954888833e-05, + "loss": 0.09014129638671875, + "step": 3608 + }, + { + "epoch": 0.5028913815926984, + "grad_norm": 0.3454248309135437, + "learning_rate": 2.2224902581904476e-05, + "loss": 0.07519054412841797, + "step": 3609 + }, + { + "epoch": 0.5030307252839128, + "grad_norm": 1.2102253437042236, + "learning_rate": 2.221553371459756e-05, + "loss": 0.11594200134277344, + "step": 3610 + }, + { + "epoch": 0.5031700689751272, + "grad_norm": 0.6301572322845459, + "learning_rate": 2.2206164355049634e-05, + "loss": 0.09257316589355469, + "step": 3611 + }, + { + "epoch": 0.5033094126663415, + "grad_norm": 0.4084467887878418, + "learning_rate": 2.2196794505342358e-05, + "loss": 0.08700752258300781, + "step": 3612 + }, + { + "epoch": 0.5034487563575559, + "grad_norm": 0.5368004441261292, + "learning_rate": 2.2187424167557496e-05, + "loss": 0.11000251770019531, + "step": 3613 + }, + { + "epoch": 0.5035881000487703, + "grad_norm": 0.5082955956459045, + "learning_rate": 2.2178053343776912e-05, + "loss": 0.0936737060546875, + "step": 3614 + }, + { + "epoch": 0.5037274437399847, + "grad_norm": 0.4300031065940857, + "learning_rate": 2.216868203608262e-05, + "loss": 0.08716011047363281, + "step": 3615 + }, + { + "epoch": 0.5038667874311991, + "grad_norm": 0.369706392288208, + "learning_rate": 2.2159310246556675e-05, + "loss": 0.08617210388183594, + "step": 3616 + }, + { + "epoch": 0.5040061311224134, + "grad_norm": 0.49432602524757385, + "learning_rate": 2.2149937977281296e-05, + "loss": 0.10736751556396484, + "step": 3617 + }, + { + "epoch": 0.5041454748136278, + "grad_norm": 0.4001266658306122, + "learning_rate": 2.214056523033879e-05, + "loss": 0.08938980102539062, + "step": 3618 + }, + { + "epoch": 0.5042848185048422, + "grad_norm": 0.6416260004043579, + "learning_rate": 2.2131192007811552e-05, + "loss": 0.09443473815917969, + "step": 3619 + }, + { + "epoch": 0.5044241621960566, + "grad_norm": 1.0697243213653564, + "learning_rate": 2.2121818311782116e-05, + "loss": 0.1009674072265625, + "step": 3620 + }, + { + "epoch": 0.504563505887271, + "grad_norm": 0.43494752049446106, + "learning_rate": 2.211244414433308e-05, + "loss": 0.08030891418457031, + "step": 3621 + }, + { + "epoch": 0.5047028495784853, + "grad_norm": 0.3795432150363922, + "learning_rate": 2.2103069507547187e-05, + "loss": 0.0842592716217041, + "step": 3622 + }, + { + "epoch": 0.5048421932696997, + "grad_norm": 0.6013314723968506, + "learning_rate": 2.2093694403507264e-05, + "loss": 0.11027908325195312, + "step": 3623 + }, + { + "epoch": 0.5049815369609141, + "grad_norm": 0.46374964714050293, + "learning_rate": 2.208431883429625e-05, + "loss": 0.08806228637695312, + "step": 3624 + }, + { + "epoch": 0.5051208806521285, + "grad_norm": 0.3779250681400299, + "learning_rate": 2.207494280199717e-05, + "loss": 0.07620811462402344, + "step": 3625 + }, + { + "epoch": 0.5052602243433428, + "grad_norm": 0.4536884129047394, + "learning_rate": 2.2065566308693173e-05, + "loss": 0.08696746826171875, + "step": 3626 + }, + { + "epoch": 0.5053995680345572, + "grad_norm": 0.38693687319755554, + "learning_rate": 2.2056189356467498e-05, + "loss": 0.08761024475097656, + "step": 3627 + }, + { + "epoch": 0.5055389117257716, + "grad_norm": 0.3991399109363556, + "learning_rate": 2.2046811947403492e-05, + "loss": 0.08442306518554688, + "step": 3628 + }, + { + "epoch": 0.505678255416986, + "grad_norm": 0.6866554617881775, + "learning_rate": 2.2037434083584605e-05, + "loss": 0.10758781433105469, + "step": 3629 + }, + { + "epoch": 0.5058175991082003, + "grad_norm": 0.4063207805156708, + "learning_rate": 2.2028055767094372e-05, + "loss": 0.07865333557128906, + "step": 3630 + }, + { + "epoch": 0.5059569427994147, + "grad_norm": 0.37106505036354065, + "learning_rate": 2.2018677000016463e-05, + "loss": 0.09174156188964844, + "step": 3631 + }, + { + "epoch": 0.5060962864906291, + "grad_norm": 0.49302536249160767, + "learning_rate": 2.2009297784434595e-05, + "loss": 0.0956268310546875, + "step": 3632 + }, + { + "epoch": 0.5062356301818435, + "grad_norm": 0.342551052570343, + "learning_rate": 2.199991812243264e-05, + "loss": 0.09733009338378906, + "step": 3633 + }, + { + "epoch": 0.5063749738730579, + "grad_norm": 0.45235317945480347, + "learning_rate": 2.1990538016094537e-05, + "loss": 0.08396148681640625, + "step": 3634 + }, + { + "epoch": 0.5065143175642722, + "grad_norm": 0.4807332754135132, + "learning_rate": 2.1981157467504332e-05, + "loss": 0.09277725219726562, + "step": 3635 + }, + { + "epoch": 0.5066536612554866, + "grad_norm": 0.629163920879364, + "learning_rate": 2.1971776478746176e-05, + "loss": 0.0998382568359375, + "step": 3636 + }, + { + "epoch": 0.506793004946701, + "grad_norm": 0.4443052411079407, + "learning_rate": 2.196239505190429e-05, + "loss": 0.092742919921875, + "step": 3637 + }, + { + "epoch": 0.5069323486379154, + "grad_norm": 0.3620969355106354, + "learning_rate": 2.195301318906303e-05, + "loss": 0.08141422271728516, + "step": 3638 + }, + { + "epoch": 0.5070716923291299, + "grad_norm": 0.4078946113586426, + "learning_rate": 2.194363089230683e-05, + "loss": 0.08268547058105469, + "step": 3639 + }, + { + "epoch": 0.5072110360203442, + "grad_norm": 0.5177688598632812, + "learning_rate": 2.193424816372022e-05, + "loss": 0.08778762817382812, + "step": 3640 + }, + { + "epoch": 0.5073503797115586, + "grad_norm": 0.5401345491409302, + "learning_rate": 2.1924865005387822e-05, + "loss": 0.09831428527832031, + "step": 3641 + }, + { + "epoch": 0.507489723402773, + "grad_norm": 0.8293035626411438, + "learning_rate": 2.1915481419394373e-05, + "loss": 0.11754989624023438, + "step": 3642 + }, + { + "epoch": 0.5076290670939874, + "grad_norm": 0.4344964027404785, + "learning_rate": 2.190609740782468e-05, + "loss": 0.09390068054199219, + "step": 3643 + }, + { + "epoch": 0.5077684107852017, + "grad_norm": 0.3410934805870056, + "learning_rate": 2.1896712972763658e-05, + "loss": 0.08536911010742188, + "step": 3644 + }, + { + "epoch": 0.5079077544764161, + "grad_norm": 0.45922988653182983, + "learning_rate": 2.1887328116296315e-05, + "loss": 0.09827804565429688, + "step": 3645 + }, + { + "epoch": 0.5080470981676305, + "grad_norm": 0.5690625309944153, + "learning_rate": 2.1877942840507752e-05, + "loss": 0.08932113647460938, + "step": 3646 + }, + { + "epoch": 0.5081864418588449, + "grad_norm": 0.4832628667354584, + "learning_rate": 2.1868557147483176e-05, + "loss": 0.09684371948242188, + "step": 3647 + }, + { + "epoch": 0.5083257855500593, + "grad_norm": 0.5319449305534363, + "learning_rate": 2.1859171039307848e-05, + "loss": 0.10143852233886719, + "step": 3648 + }, + { + "epoch": 0.5084651292412736, + "grad_norm": 0.6148675084114075, + "learning_rate": 2.1849784518067172e-05, + "loss": 0.11048316955566406, + "step": 3649 + }, + { + "epoch": 0.508604472932488, + "grad_norm": 0.29068267345428467, + "learning_rate": 2.1840397585846594e-05, + "loss": 0.07510566711425781, + "step": 3650 + }, + { + "epoch": 0.5087438166237024, + "grad_norm": 0.4682142734527588, + "learning_rate": 2.1831010244731697e-05, + "loss": 0.10075950622558594, + "step": 3651 + }, + { + "epoch": 0.5088831603149168, + "grad_norm": 0.307883083820343, + "learning_rate": 2.182162249680813e-05, + "loss": 0.06961631774902344, + "step": 3652 + }, + { + "epoch": 0.5090225040061311, + "grad_norm": 0.49386391043663025, + "learning_rate": 2.1812234344161623e-05, + "loss": 0.1064605712890625, + "step": 3653 + }, + { + "epoch": 0.5091618476973455, + "grad_norm": 0.4236346483230591, + "learning_rate": 2.1802845788878027e-05, + "loss": 0.08876419067382812, + "step": 3654 + }, + { + "epoch": 0.5093011913885599, + "grad_norm": 0.3525296747684479, + "learning_rate": 2.1793456833043253e-05, + "loss": 0.08240127563476562, + "step": 3655 + }, + { + "epoch": 0.5094405350797743, + "grad_norm": 0.2932608723640442, + "learning_rate": 2.1784067478743317e-05, + "loss": 0.07145118713378906, + "step": 3656 + }, + { + "epoch": 0.5095798787709886, + "grad_norm": 0.43760648369789124, + "learning_rate": 2.177467772806432e-05, + "loss": 0.08058929443359375, + "step": 3657 + }, + { + "epoch": 0.509719222462203, + "grad_norm": 0.4730337858200073, + "learning_rate": 2.1765287583092447e-05, + "loss": 0.08542346954345703, + "step": 3658 + }, + { + "epoch": 0.5098585661534174, + "grad_norm": 0.5720928311347961, + "learning_rate": 2.1755897045913975e-05, + "loss": 0.09202384948730469, + "step": 3659 + }, + { + "epoch": 0.5099979098446318, + "grad_norm": 0.49732205271720886, + "learning_rate": 2.1746506118615267e-05, + "loss": 0.09232902526855469, + "step": 3660 + }, + { + "epoch": 0.5101372535358462, + "grad_norm": 0.3914588689804077, + "learning_rate": 2.173711480328277e-05, + "loss": 0.08130264282226562, + "step": 3661 + }, + { + "epoch": 0.5102765972270605, + "grad_norm": 0.4294493496417999, + "learning_rate": 2.1727723102003023e-05, + "loss": 0.09495925903320312, + "step": 3662 + }, + { + "epoch": 0.5104159409182749, + "grad_norm": 0.3532637357711792, + "learning_rate": 2.1718331016862657e-05, + "loss": 0.090118408203125, + "step": 3663 + }, + { + "epoch": 0.5105552846094893, + "grad_norm": 0.5191905498504639, + "learning_rate": 2.1708938549948354e-05, + "loss": 0.11635017395019531, + "step": 3664 + }, + { + "epoch": 0.5106946283007037, + "grad_norm": 0.38178643584251404, + "learning_rate": 2.1699545703346934e-05, + "loss": 0.08469009399414062, + "step": 3665 + }, + { + "epoch": 0.510833971991918, + "grad_norm": 0.5957871675491333, + "learning_rate": 2.1690152479145254e-05, + "loss": 0.08424949645996094, + "step": 3666 + }, + { + "epoch": 0.5109733156831324, + "grad_norm": 0.39898112416267395, + "learning_rate": 2.1680758879430283e-05, + "loss": 0.07890796661376953, + "step": 3667 + }, + { + "epoch": 0.5111126593743468, + "grad_norm": 0.3596453368663788, + "learning_rate": 2.1671364906289053e-05, + "loss": 0.09108924865722656, + "step": 3668 + }, + { + "epoch": 0.5112520030655612, + "grad_norm": 0.35532811284065247, + "learning_rate": 2.166197056180871e-05, + "loss": 0.09378814697265625, + "step": 3669 + }, + { + "epoch": 0.5113913467567756, + "grad_norm": 0.3006635010242462, + "learning_rate": 2.1652575848076446e-05, + "loss": 0.06777763366699219, + "step": 3670 + }, + { + "epoch": 0.5115306904479899, + "grad_norm": 0.7797800898551941, + "learning_rate": 2.1643180767179558e-05, + "loss": 0.14500045776367188, + "step": 3671 + }, + { + "epoch": 0.5116700341392043, + "grad_norm": 0.32864218950271606, + "learning_rate": 2.163378532120542e-05, + "loss": 0.08877182006835938, + "step": 3672 + }, + { + "epoch": 0.5118093778304187, + "grad_norm": 0.3554946482181549, + "learning_rate": 2.162438951224148e-05, + "loss": 0.08022880554199219, + "step": 3673 + }, + { + "epoch": 0.5119487215216331, + "grad_norm": 0.6575273871421814, + "learning_rate": 2.1614993342375277e-05, + "loss": 0.10730743408203125, + "step": 3674 + }, + { + "epoch": 0.5120880652128474, + "grad_norm": 0.3827268183231354, + "learning_rate": 2.1605596813694426e-05, + "loss": 0.07926368713378906, + "step": 3675 + }, + { + "epoch": 0.5122274089040618, + "grad_norm": 0.4101925194263458, + "learning_rate": 2.1596199928286618e-05, + "loss": 0.08885765075683594, + "step": 3676 + }, + { + "epoch": 0.5123667525952762, + "grad_norm": 0.5414935946464539, + "learning_rate": 2.1586802688239627e-05, + "loss": 0.10391759872436523, + "step": 3677 + }, + { + "epoch": 0.5125060962864906, + "grad_norm": 0.3827451765537262, + "learning_rate": 2.1577405095641307e-05, + "loss": 0.08294486999511719, + "step": 3678 + }, + { + "epoch": 0.5126454399777051, + "grad_norm": 0.37426990270614624, + "learning_rate": 2.156800715257959e-05, + "loss": 0.10097503662109375, + "step": 3679 + }, + { + "epoch": 0.5127847836689194, + "grad_norm": 0.4701220393180847, + "learning_rate": 2.1558608861142472e-05, + "loss": 0.10257339477539062, + "step": 3680 + }, + { + "epoch": 0.5129241273601338, + "grad_norm": 0.4991037845611572, + "learning_rate": 2.1549210223418063e-05, + "loss": 0.09327507019042969, + "step": 3681 + }, + { + "epoch": 0.5130634710513482, + "grad_norm": 0.41940033435821533, + "learning_rate": 2.15398112414945e-05, + "loss": 0.1027679443359375, + "step": 3682 + }, + { + "epoch": 0.5132028147425626, + "grad_norm": 0.3218013644218445, + "learning_rate": 2.1530411917460037e-05, + "loss": 0.08744239807128906, + "step": 3683 + }, + { + "epoch": 0.513342158433777, + "grad_norm": 0.3609306514263153, + "learning_rate": 2.1521012253402987e-05, + "loss": 0.08336448669433594, + "step": 3684 + }, + { + "epoch": 0.5134815021249913, + "grad_norm": 0.3523862957954407, + "learning_rate": 2.151161225141174e-05, + "loss": 0.08104896545410156, + "step": 3685 + }, + { + "epoch": 0.5136208458162057, + "grad_norm": 0.5452578663825989, + "learning_rate": 2.1502211913574764e-05, + "loss": 0.10735321044921875, + "step": 3686 + }, + { + "epoch": 0.5137601895074201, + "grad_norm": 0.5777164101600647, + "learning_rate": 2.1492811241980595e-05, + "loss": 0.10200703144073486, + "step": 3687 + }, + { + "epoch": 0.5138995331986345, + "grad_norm": 0.3249468505382538, + "learning_rate": 2.1483410238717844e-05, + "loss": 0.08119583129882812, + "step": 3688 + }, + { + "epoch": 0.5140388768898488, + "grad_norm": 0.4005364179611206, + "learning_rate": 2.147400890587521e-05, + "loss": 0.08219337463378906, + "step": 3689 + }, + { + "epoch": 0.5141782205810632, + "grad_norm": 0.416293203830719, + "learning_rate": 2.146460724554145e-05, + "loss": 0.09008598327636719, + "step": 3690 + }, + { + "epoch": 0.5143175642722776, + "grad_norm": 0.35787221789360046, + "learning_rate": 2.1455205259805396e-05, + "loss": 0.0961749255657196, + "step": 3691 + }, + { + "epoch": 0.514456907963492, + "grad_norm": 0.4129320979118347, + "learning_rate": 2.1445802950755956e-05, + "loss": 0.09149551391601562, + "step": 3692 + }, + { + "epoch": 0.5145962516547063, + "grad_norm": 0.750744640827179, + "learning_rate": 2.143640032048211e-05, + "loss": 0.08007621765136719, + "step": 3693 + }, + { + "epoch": 0.5147355953459207, + "grad_norm": 0.4491068124771118, + "learning_rate": 2.1426997371072905e-05, + "loss": 0.0926361083984375, + "step": 3694 + }, + { + "epoch": 0.5148749390371351, + "grad_norm": 0.31393682956695557, + "learning_rate": 2.141759410461746e-05, + "loss": 0.09554481506347656, + "step": 3695 + }, + { + "epoch": 0.5150142827283495, + "grad_norm": 0.4454667866230011, + "learning_rate": 2.140819052320497e-05, + "loss": 0.1003265380859375, + "step": 3696 + }, + { + "epoch": 0.5151536264195639, + "grad_norm": 0.468928724527359, + "learning_rate": 2.1398786628924705e-05, + "loss": 0.11100196838378906, + "step": 3697 + }, + { + "epoch": 0.5152929701107782, + "grad_norm": 0.7017589807510376, + "learning_rate": 2.1389382423865973e-05, + "loss": 0.10811614990234375, + "step": 3698 + }, + { + "epoch": 0.5154323138019926, + "grad_norm": 0.4212442636489868, + "learning_rate": 2.137997791011819e-05, + "loss": 0.07799148559570312, + "step": 3699 + }, + { + "epoch": 0.515571657493207, + "grad_norm": 0.42102673649787903, + "learning_rate": 2.1370573089770823e-05, + "loss": 0.10882234573364258, + "step": 3700 + }, + { + "epoch": 0.5157110011844214, + "grad_norm": 0.5088742971420288, + "learning_rate": 2.136116796491341e-05, + "loss": 0.11180877685546875, + "step": 3701 + }, + { + "epoch": 0.5158503448756357, + "grad_norm": 0.2795776426792145, + "learning_rate": 2.1351762537635553e-05, + "loss": 0.07112693786621094, + "step": 3702 + }, + { + "epoch": 0.5159896885668501, + "grad_norm": 0.48182690143585205, + "learning_rate": 2.134235681002691e-05, + "loss": 0.08132362365722656, + "step": 3703 + }, + { + "epoch": 0.5161290322580645, + "grad_norm": 0.747715950012207, + "learning_rate": 2.1332950784177235e-05, + "loss": 0.09625244140625, + "step": 3704 + }, + { + "epoch": 0.5162683759492789, + "grad_norm": 0.5448724031448364, + "learning_rate": 2.1323544462176325e-05, + "loss": 0.09053230285644531, + "step": 3705 + }, + { + "epoch": 0.5164077196404933, + "grad_norm": 0.589582622051239, + "learning_rate": 2.131413784611406e-05, + "loss": 0.08566570281982422, + "step": 3706 + }, + { + "epoch": 0.5165470633317076, + "grad_norm": 0.5089551210403442, + "learning_rate": 2.1304730938080364e-05, + "loss": 0.11092758178710938, + "step": 3707 + }, + { + "epoch": 0.516686407022922, + "grad_norm": 0.5156731009483337, + "learning_rate": 2.129532374016524e-05, + "loss": 0.09236717224121094, + "step": 3708 + }, + { + "epoch": 0.5168257507141364, + "grad_norm": 0.41378918290138245, + "learning_rate": 2.128591625445876e-05, + "loss": 0.08879280090332031, + "step": 3709 + }, + { + "epoch": 0.5169650944053508, + "grad_norm": 0.7643335461616516, + "learning_rate": 2.127650848305104e-05, + "loss": 0.11204719543457031, + "step": 3710 + }, + { + "epoch": 0.5171044380965651, + "grad_norm": 0.38326606154441833, + "learning_rate": 2.1267100428032276e-05, + "loss": 0.09032058715820312, + "step": 3711 + }, + { + "epoch": 0.5172437817877795, + "grad_norm": 0.6845988035202026, + "learning_rate": 2.1257692091492724e-05, + "loss": 0.09787940979003906, + "step": 3712 + }, + { + "epoch": 0.5173831254789939, + "grad_norm": 0.5907015204429626, + "learning_rate": 2.1248283475522712e-05, + "loss": 0.09325218200683594, + "step": 3713 + }, + { + "epoch": 0.5175224691702083, + "grad_norm": 0.4220942556858063, + "learning_rate": 2.1238874582212602e-05, + "loss": 0.09609222412109375, + "step": 3714 + }, + { + "epoch": 0.5176618128614227, + "grad_norm": 0.3649367392063141, + "learning_rate": 2.1229465413652854e-05, + "loss": 0.08612632751464844, + "step": 3715 + }, + { + "epoch": 0.517801156552637, + "grad_norm": 0.5303810238838196, + "learning_rate": 2.122005597193395e-05, + "loss": 0.08838939666748047, + "step": 3716 + }, + { + "epoch": 0.5179405002438514, + "grad_norm": 0.40369632840156555, + "learning_rate": 2.1210646259146466e-05, + "loss": 0.08483695983886719, + "step": 3717 + }, + { + "epoch": 0.5180798439350658, + "grad_norm": 0.34404662251472473, + "learning_rate": 2.1201236277381028e-05, + "loss": 0.08151054382324219, + "step": 3718 + }, + { + "epoch": 0.5182191876262803, + "grad_norm": 0.4764569103717804, + "learning_rate": 2.119182602872831e-05, + "loss": 0.08526229858398438, + "step": 3719 + }, + { + "epoch": 0.5183585313174947, + "grad_norm": 0.5915489792823792, + "learning_rate": 2.1182415515279056e-05, + "loss": 0.10617828369140625, + "step": 3720 + }, + { + "epoch": 0.518497875008709, + "grad_norm": 0.3148222267627716, + "learning_rate": 2.117300473912407e-05, + "loss": 0.08155441284179688, + "step": 3721 + }, + { + "epoch": 0.5186372186999234, + "grad_norm": 0.34973978996276855, + "learning_rate": 2.1163593702354213e-05, + "loss": 0.08449423313140869, + "step": 3722 + }, + { + "epoch": 0.5187765623911378, + "grad_norm": 0.6425710320472717, + "learning_rate": 2.11541824070604e-05, + "loss": 0.09778785705566406, + "step": 3723 + }, + { + "epoch": 0.5189159060823522, + "grad_norm": 0.5917773842811584, + "learning_rate": 2.114477085533361e-05, + "loss": 0.09911346435546875, + "step": 3724 + }, + { + "epoch": 0.5190552497735665, + "grad_norm": 0.4796721339225769, + "learning_rate": 2.1135359049264868e-05, + "loss": 0.07940673828125, + "step": 3725 + }, + { + "epoch": 0.5191945934647809, + "grad_norm": 0.31829744577407837, + "learning_rate": 2.1125946990945264e-05, + "loss": 0.08165740966796875, + "step": 3726 + }, + { + "epoch": 0.5193339371559953, + "grad_norm": 0.4833441376686096, + "learning_rate": 2.111653468246595e-05, + "loss": 0.09968185424804688, + "step": 3727 + }, + { + "epoch": 0.5194732808472097, + "grad_norm": 0.4405209720134735, + "learning_rate": 2.1107122125918112e-05, + "loss": 0.08397483825683594, + "step": 3728 + }, + { + "epoch": 0.519612624538424, + "grad_norm": 1.1745721101760864, + "learning_rate": 2.1097709323393026e-05, + "loss": 0.12460136413574219, + "step": 3729 + }, + { + "epoch": 0.5197519682296384, + "grad_norm": 0.49217137694358826, + "learning_rate": 2.1088296276981978e-05, + "loss": 0.08648824691772461, + "step": 3730 + }, + { + "epoch": 0.5198913119208528, + "grad_norm": 0.5688654184341431, + "learning_rate": 2.1078882988776352e-05, + "loss": 0.10580778121948242, + "step": 3731 + }, + { + "epoch": 0.5200306556120672, + "grad_norm": 1.0472456216812134, + "learning_rate": 2.1069469460867547e-05, + "loss": 0.130279541015625, + "step": 3732 + }, + { + "epoch": 0.5201699993032816, + "grad_norm": 0.4732559025287628, + "learning_rate": 2.106005569534705e-05, + "loss": 0.08551216125488281, + "step": 3733 + }, + { + "epoch": 0.5203093429944959, + "grad_norm": 0.4382063150405884, + "learning_rate": 2.105064169430638e-05, + "loss": 0.09051704406738281, + "step": 3734 + }, + { + "epoch": 0.5204486866857103, + "grad_norm": 0.4585205316543579, + "learning_rate": 2.1041227459837112e-05, + "loss": 0.07781696319580078, + "step": 3735 + }, + { + "epoch": 0.5205880303769247, + "grad_norm": 0.4260081946849823, + "learning_rate": 2.103181299403088e-05, + "loss": 0.0804147720336914, + "step": 3736 + }, + { + "epoch": 0.5207273740681391, + "grad_norm": 0.5267416834831238, + "learning_rate": 2.1022398298979345e-05, + "loss": 0.09594058990478516, + "step": 3737 + }, + { + "epoch": 0.5208667177593534, + "grad_norm": 0.722057044506073, + "learning_rate": 2.1012983376774255e-05, + "loss": 0.10251235961914062, + "step": 3738 + }, + { + "epoch": 0.5210060614505678, + "grad_norm": 0.34725332260131836, + "learning_rate": 2.100356822950739e-05, + "loss": 0.07402801513671875, + "step": 3739 + }, + { + "epoch": 0.5211454051417822, + "grad_norm": 0.37294360995292664, + "learning_rate": 2.099415285927057e-05, + "loss": 0.08238601684570312, + "step": 3740 + }, + { + "epoch": 0.5212847488329966, + "grad_norm": 0.4789356589317322, + "learning_rate": 2.0984737268155686e-05, + "loss": 0.09409523010253906, + "step": 3741 + }, + { + "epoch": 0.521424092524211, + "grad_norm": 0.38946589827537537, + "learning_rate": 2.097532145825466e-05, + "loss": 0.09158992767333984, + "step": 3742 + }, + { + "epoch": 0.5215634362154253, + "grad_norm": 0.29835647344589233, + "learning_rate": 2.0965905431659475e-05, + "loss": 0.07580757141113281, + "step": 3743 + }, + { + "epoch": 0.5217027799066397, + "grad_norm": 0.34200409054756165, + "learning_rate": 2.0956489190462156e-05, + "loss": 0.0792236328125, + "step": 3744 + }, + { + "epoch": 0.5218421235978541, + "grad_norm": 0.41288667917251587, + "learning_rate": 2.094707273675477e-05, + "loss": 0.10360431671142578, + "step": 3745 + }, + { + "epoch": 0.5219814672890685, + "grad_norm": 0.6773748993873596, + "learning_rate": 2.0937656072629444e-05, + "loss": 0.09508705139160156, + "step": 3746 + }, + { + "epoch": 0.5221208109802828, + "grad_norm": 0.5020942687988281, + "learning_rate": 2.0928239200178355e-05, + "loss": 0.09840202331542969, + "step": 3747 + }, + { + "epoch": 0.5222601546714972, + "grad_norm": 0.6772862076759338, + "learning_rate": 2.0918822121493697e-05, + "loss": 0.10821342468261719, + "step": 3748 + }, + { + "epoch": 0.5223994983627116, + "grad_norm": 0.42922666668891907, + "learning_rate": 2.0909404838667746e-05, + "loss": 0.08841133117675781, + "step": 3749 + }, + { + "epoch": 0.522538842053926, + "grad_norm": 0.5074878931045532, + "learning_rate": 2.08999873537928e-05, + "loss": 0.10151481628417969, + "step": 3750 + }, + { + "epoch": 0.5226781857451404, + "grad_norm": 0.612980306148529, + "learning_rate": 2.089056966896122e-05, + "loss": 0.09383773803710938, + "step": 3751 + }, + { + "epoch": 0.5228175294363547, + "grad_norm": 0.5071472525596619, + "learning_rate": 2.088115178626539e-05, + "loss": 0.09774589538574219, + "step": 3752 + }, + { + "epoch": 0.5229568731275691, + "grad_norm": 0.35658007860183716, + "learning_rate": 2.0871733707797738e-05, + "loss": 0.08358955383300781, + "step": 3753 + }, + { + "epoch": 0.5230962168187835, + "grad_norm": 0.3966161012649536, + "learning_rate": 2.0862315435650766e-05, + "loss": 0.09838485717773438, + "step": 3754 + }, + { + "epoch": 0.5232355605099979, + "grad_norm": 0.5904475450515747, + "learning_rate": 2.085289697191699e-05, + "loss": 0.09019088745117188, + "step": 3755 + }, + { + "epoch": 0.5233749042012122, + "grad_norm": 0.36541062593460083, + "learning_rate": 2.0843478318688978e-05, + "loss": 0.09548377990722656, + "step": 3756 + }, + { + "epoch": 0.5235142478924266, + "grad_norm": 0.469536155462265, + "learning_rate": 2.083405947805934e-05, + "loss": 0.08511543273925781, + "step": 3757 + }, + { + "epoch": 0.523653591583641, + "grad_norm": 0.4039689004421234, + "learning_rate": 2.082464045212073e-05, + "loss": 0.09076881408691406, + "step": 3758 + }, + { + "epoch": 0.5237929352748554, + "grad_norm": 0.3504207730293274, + "learning_rate": 2.0815221242965835e-05, + "loss": 0.09127044677734375, + "step": 3759 + }, + { + "epoch": 0.5239322789660699, + "grad_norm": 0.48429232835769653, + "learning_rate": 2.0805801852687396e-05, + "loss": 0.09802055358886719, + "step": 3760 + }, + { + "epoch": 0.5240716226572842, + "grad_norm": 0.47880664467811584, + "learning_rate": 2.0796382283378183e-05, + "loss": 0.08037185668945312, + "step": 3761 + }, + { + "epoch": 0.5242109663484986, + "grad_norm": 0.6842708587646484, + "learning_rate": 2.0786962537131e-05, + "loss": 0.10823822021484375, + "step": 3762 + }, + { + "epoch": 0.524350310039713, + "grad_norm": 0.5283544659614563, + "learning_rate": 2.0777542616038718e-05, + "loss": 0.08578348159790039, + "step": 3763 + }, + { + "epoch": 0.5244896537309274, + "grad_norm": 0.42669686675071716, + "learning_rate": 2.0768122522194208e-05, + "loss": 0.10999870300292969, + "step": 3764 + }, + { + "epoch": 0.5246289974221418, + "grad_norm": 0.3537612557411194, + "learning_rate": 2.0758702257690418e-05, + "loss": 0.08132171630859375, + "step": 3765 + }, + { + "epoch": 0.5247683411133561, + "grad_norm": 0.524139404296875, + "learning_rate": 2.0749281824620306e-05, + "loss": 0.07874107360839844, + "step": 3766 + }, + { + "epoch": 0.5249076848045705, + "grad_norm": 0.5306811928749084, + "learning_rate": 2.073986122507688e-05, + "loss": 0.07938957214355469, + "step": 3767 + }, + { + "epoch": 0.5250470284957849, + "grad_norm": 0.5473001599311829, + "learning_rate": 2.0730440461153183e-05, + "loss": 0.10400867462158203, + "step": 3768 + }, + { + "epoch": 0.5251863721869993, + "grad_norm": 0.47956952452659607, + "learning_rate": 2.0721019534942285e-05, + "loss": 0.08798027038574219, + "step": 3769 + }, + { + "epoch": 0.5253257158782136, + "grad_norm": 0.7060845494270325, + "learning_rate": 2.071159844853731e-05, + "loss": 0.09305953979492188, + "step": 3770 + }, + { + "epoch": 0.525465059569428, + "grad_norm": 0.4688463509082794, + "learning_rate": 2.070217720403141e-05, + "loss": 0.1017608642578125, + "step": 3771 + }, + { + "epoch": 0.5256044032606424, + "grad_norm": 0.5257765054702759, + "learning_rate": 2.0692755803517764e-05, + "loss": 0.10313606262207031, + "step": 3772 + }, + { + "epoch": 0.5257437469518568, + "grad_norm": 0.36687567830085754, + "learning_rate": 2.0683334249089593e-05, + "loss": 0.0745382308959961, + "step": 3773 + }, + { + "epoch": 0.5258830906430711, + "grad_norm": 0.49871858954429626, + "learning_rate": 2.067391254284015e-05, + "loss": 0.09165382385253906, + "step": 3774 + }, + { + "epoch": 0.5260224343342855, + "grad_norm": 0.5035704970359802, + "learning_rate": 2.066449068686273e-05, + "loss": 0.11998367309570312, + "step": 3775 + }, + { + "epoch": 0.5261617780254999, + "grad_norm": 0.5320033431053162, + "learning_rate": 2.065506868325065e-05, + "loss": 0.08712482452392578, + "step": 3776 + }, + { + "epoch": 0.5263011217167143, + "grad_norm": 0.5815989971160889, + "learning_rate": 2.0645646534097262e-05, + "loss": 0.10517120361328125, + "step": 3777 + }, + { + "epoch": 0.5264404654079287, + "grad_norm": 0.44674915075302124, + "learning_rate": 2.0636224241495954e-05, + "loss": 0.08629226684570312, + "step": 3778 + }, + { + "epoch": 0.526579809099143, + "grad_norm": 0.6215022206306458, + "learning_rate": 2.0626801807540148e-05, + "loss": 0.08412551879882812, + "step": 3779 + }, + { + "epoch": 0.5267191527903574, + "grad_norm": 0.4710548222064972, + "learning_rate": 2.0617379234323285e-05, + "loss": 0.08379745483398438, + "step": 3780 + }, + { + "epoch": 0.5268584964815718, + "grad_norm": 0.42005425691604614, + "learning_rate": 2.060795652393886e-05, + "loss": 0.116058349609375, + "step": 3781 + }, + { + "epoch": 0.5269978401727862, + "grad_norm": 0.41082364320755005, + "learning_rate": 2.0598533678480367e-05, + "loss": 0.098602294921875, + "step": 3782 + }, + { + "epoch": 0.5271371838640005, + "grad_norm": 0.5941078662872314, + "learning_rate": 2.0589110700041357e-05, + "loss": 0.08664798736572266, + "step": 3783 + }, + { + "epoch": 0.5272765275552149, + "grad_norm": 0.3257017433643341, + "learning_rate": 2.0579687590715404e-05, + "loss": 0.08169364929199219, + "step": 3784 + }, + { + "epoch": 0.5274158712464293, + "grad_norm": 0.6959720253944397, + "learning_rate": 2.0570264352596096e-05, + "loss": 0.10268211364746094, + "step": 3785 + }, + { + "epoch": 0.5275552149376437, + "grad_norm": 0.40468597412109375, + "learning_rate": 2.0560840987777074e-05, + "loss": 0.09455490112304688, + "step": 3786 + }, + { + "epoch": 0.5276945586288581, + "grad_norm": 0.3662451207637787, + "learning_rate": 2.0551417498351985e-05, + "loss": 0.08351516723632812, + "step": 3787 + }, + { + "epoch": 0.5278339023200724, + "grad_norm": 0.6665486097335815, + "learning_rate": 2.0541993886414516e-05, + "loss": 0.07935047149658203, + "step": 3788 + }, + { + "epoch": 0.5279732460112868, + "grad_norm": 0.4079313278198242, + "learning_rate": 2.0532570154058385e-05, + "loss": 0.0872039794921875, + "step": 3789 + }, + { + "epoch": 0.5281125897025012, + "grad_norm": 0.4404827654361725, + "learning_rate": 2.0523146303377318e-05, + "loss": 0.09342384338378906, + "step": 3790 + }, + { + "epoch": 0.5282519333937156, + "grad_norm": 0.3949267268180847, + "learning_rate": 2.0513722336465092e-05, + "loss": 0.07919979095458984, + "step": 3791 + }, + { + "epoch": 0.52839127708493, + "grad_norm": 0.35659992694854736, + "learning_rate": 2.0504298255415488e-05, + "loss": 0.07785272598266602, + "step": 3792 + }, + { + "epoch": 0.5285306207761443, + "grad_norm": 0.44609546661376953, + "learning_rate": 2.0494874062322324e-05, + "loss": 0.09398460388183594, + "step": 3793 + }, + { + "epoch": 0.5286699644673587, + "grad_norm": 0.4474690854549408, + "learning_rate": 2.0485449759279442e-05, + "loss": 0.08215713500976562, + "step": 3794 + }, + { + "epoch": 0.5288093081585731, + "grad_norm": 0.407185435295105, + "learning_rate": 2.047602534838071e-05, + "loss": 0.09047508239746094, + "step": 3795 + }, + { + "epoch": 0.5289486518497875, + "grad_norm": 0.5444326996803284, + "learning_rate": 2.0466600831720006e-05, + "loss": 0.12375259399414062, + "step": 3796 + }, + { + "epoch": 0.5290879955410018, + "grad_norm": 0.5019745826721191, + "learning_rate": 2.0457176211391257e-05, + "loss": 0.11878395080566406, + "step": 3797 + }, + { + "epoch": 0.5292273392322162, + "grad_norm": 0.4941440224647522, + "learning_rate": 2.0447751489488387e-05, + "loss": 0.11546993255615234, + "step": 3798 + }, + { + "epoch": 0.5293666829234306, + "grad_norm": 0.4588523805141449, + "learning_rate": 2.0438326668105364e-05, + "loss": 0.11458969116210938, + "step": 3799 + }, + { + "epoch": 0.5295060266146451, + "grad_norm": 0.4175533056259155, + "learning_rate": 2.0428901749336157e-05, + "loss": 0.09730148315429688, + "step": 3800 + }, + { + "epoch": 0.5296453703058595, + "grad_norm": 0.4422464966773987, + "learning_rate": 2.0419476735274774e-05, + "loss": 0.08552742004394531, + "step": 3801 + }, + { + "epoch": 0.5297847139970738, + "grad_norm": 0.6904864311218262, + "learning_rate": 2.0410051628015247e-05, + "loss": 0.10288810729980469, + "step": 3802 + }, + { + "epoch": 0.5299240576882882, + "grad_norm": 0.3207360506057739, + "learning_rate": 2.0400626429651595e-05, + "loss": 0.08298587799072266, + "step": 3803 + }, + { + "epoch": 0.5300634013795026, + "grad_norm": 0.7301559448242188, + "learning_rate": 2.0391201142277905e-05, + "loss": 0.11353874206542969, + "step": 3804 + }, + { + "epoch": 0.530202745070717, + "grad_norm": 0.5728006362915039, + "learning_rate": 2.038177576798825e-05, + "loss": 0.09410476684570312, + "step": 3805 + }, + { + "epoch": 0.5303420887619313, + "grad_norm": 0.4525412321090698, + "learning_rate": 2.0372350308876732e-05, + "loss": 0.09236907958984375, + "step": 3806 + }, + { + "epoch": 0.5304814324531457, + "grad_norm": 0.3329944610595703, + "learning_rate": 2.0362924767037485e-05, + "loss": 0.08283233642578125, + "step": 3807 + }, + { + "epoch": 0.5306207761443601, + "grad_norm": 0.5734912753105164, + "learning_rate": 2.0353499144564636e-05, + "loss": 0.09320068359375, + "step": 3808 + }, + { + "epoch": 0.5307601198355745, + "grad_norm": 0.3559733033180237, + "learning_rate": 2.0344073443552347e-05, + "loss": 0.08942222595214844, + "step": 3809 + }, + { + "epoch": 0.5308994635267889, + "grad_norm": 0.5292993187904358, + "learning_rate": 2.0334647666094796e-05, + "loss": 0.0975503921508789, + "step": 3810 + }, + { + "epoch": 0.5310388072180032, + "grad_norm": 0.573083221912384, + "learning_rate": 2.0325221814286173e-05, + "loss": 0.09752464294433594, + "step": 3811 + }, + { + "epoch": 0.5311781509092176, + "grad_norm": 0.4975278973579407, + "learning_rate": 2.031579589022068e-05, + "loss": 0.0892629623413086, + "step": 3812 + }, + { + "epoch": 0.531317494600432, + "grad_norm": 0.4234524071216583, + "learning_rate": 2.0306369895992564e-05, + "loss": 0.07439231872558594, + "step": 3813 + }, + { + "epoch": 0.5314568382916464, + "grad_norm": 0.3236529231071472, + "learning_rate": 2.029694383369604e-05, + "loss": 0.07442140579223633, + "step": 3814 + }, + { + "epoch": 0.5315961819828607, + "grad_norm": 0.45216605067253113, + "learning_rate": 2.028751770542538e-05, + "loss": 0.0748443603515625, + "step": 3815 + }, + { + "epoch": 0.5317355256740751, + "grad_norm": 0.5155174732208252, + "learning_rate": 2.0278091513274848e-05, + "loss": 0.10266399383544922, + "step": 3816 + }, + { + "epoch": 0.5318748693652895, + "grad_norm": 0.45154058933258057, + "learning_rate": 2.0268665259338736e-05, + "loss": 0.10002899169921875, + "step": 3817 + }, + { + "epoch": 0.5320142130565039, + "grad_norm": 0.6019696593284607, + "learning_rate": 2.025923894571134e-05, + "loss": 0.10690832138061523, + "step": 3818 + }, + { + "epoch": 0.5321535567477182, + "grad_norm": 0.6723864078521729, + "learning_rate": 2.0249812574486957e-05, + "loss": 0.09382438659667969, + "step": 3819 + }, + { + "epoch": 0.5322929004389326, + "grad_norm": 0.4975101947784424, + "learning_rate": 2.024038614775993e-05, + "loss": 0.08702850341796875, + "step": 3820 + }, + { + "epoch": 0.532432244130147, + "grad_norm": 0.5248335599899292, + "learning_rate": 2.0230959667624587e-05, + "loss": 0.09557342529296875, + "step": 3821 + }, + { + "epoch": 0.5325715878213614, + "grad_norm": 0.4089205265045166, + "learning_rate": 2.022153313617528e-05, + "loss": 0.09801483154296875, + "step": 3822 + }, + { + "epoch": 0.5327109315125758, + "grad_norm": 0.5337443351745605, + "learning_rate": 2.0212106555506364e-05, + "loss": 0.08561992645263672, + "step": 3823 + }, + { + "epoch": 0.5328502752037901, + "grad_norm": 0.5521273016929626, + "learning_rate": 2.0202679927712224e-05, + "loss": 0.1131439208984375, + "step": 3824 + }, + { + "epoch": 0.5329896188950045, + "grad_norm": 0.5300192832946777, + "learning_rate": 2.0193253254887223e-05, + "loss": 0.09160041809082031, + "step": 3825 + }, + { + "epoch": 0.5331289625862189, + "grad_norm": 0.6440755724906921, + "learning_rate": 2.018382653912576e-05, + "loss": 0.11888504028320312, + "step": 3826 + }, + { + "epoch": 0.5332683062774333, + "grad_norm": 0.3908245861530304, + "learning_rate": 2.0174399782522242e-05, + "loss": 0.08638763427734375, + "step": 3827 + }, + { + "epoch": 0.5334076499686476, + "grad_norm": 0.38359546661376953, + "learning_rate": 2.016497298717107e-05, + "loss": 0.08947181701660156, + "step": 3828 + }, + { + "epoch": 0.533546993659862, + "grad_norm": 0.3294075131416321, + "learning_rate": 2.015554615516667e-05, + "loss": 0.07576751708984375, + "step": 3829 + }, + { + "epoch": 0.5336863373510764, + "grad_norm": 0.3785386085510254, + "learning_rate": 2.014611928860346e-05, + "loss": 0.08358573913574219, + "step": 3830 + }, + { + "epoch": 0.5338256810422908, + "grad_norm": 0.34655556082725525, + "learning_rate": 2.0136692389575892e-05, + "loss": 0.08176040649414062, + "step": 3831 + }, + { + "epoch": 0.5339650247335052, + "grad_norm": 0.3733676075935364, + "learning_rate": 2.012726546017838e-05, + "loss": 0.08509635925292969, + "step": 3832 + }, + { + "epoch": 0.5341043684247195, + "grad_norm": 0.31504836678504944, + "learning_rate": 2.01178385025054e-05, + "loss": 0.08620834350585938, + "step": 3833 + }, + { + "epoch": 0.5342437121159339, + "grad_norm": 0.34724435210227966, + "learning_rate": 2.0108411518651388e-05, + "loss": 0.0804595947265625, + "step": 3834 + }, + { + "epoch": 0.5343830558071483, + "grad_norm": 0.58302903175354, + "learning_rate": 2.0098984510710812e-05, + "loss": 0.10877418518066406, + "step": 3835 + }, + { + "epoch": 0.5345223994983627, + "grad_norm": 0.48237839341163635, + "learning_rate": 2.0089557480778144e-05, + "loss": 0.0941314697265625, + "step": 3836 + }, + { + "epoch": 0.534661743189577, + "grad_norm": 0.37936997413635254, + "learning_rate": 2.0080130430947842e-05, + "loss": 0.07761383056640625, + "step": 3837 + }, + { + "epoch": 0.5348010868807914, + "grad_norm": 0.37377962470054626, + "learning_rate": 2.007070336331439e-05, + "loss": 0.08625221252441406, + "step": 3838 + }, + { + "epoch": 0.5349404305720058, + "grad_norm": 0.593887984752655, + "learning_rate": 2.0061276279972265e-05, + "loss": 0.12152099609375, + "step": 3839 + }, + { + "epoch": 0.5350797742632203, + "grad_norm": 0.5866414904594421, + "learning_rate": 2.0051849183015953e-05, + "loss": 0.08643150329589844, + "step": 3840 + }, + { + "epoch": 0.5352191179544347, + "grad_norm": 0.5290805101394653, + "learning_rate": 2.004242207453993e-05, + "loss": 0.09164047241210938, + "step": 3841 + }, + { + "epoch": 0.535358461645649, + "grad_norm": 0.5035367608070374, + "learning_rate": 2.0032994956638695e-05, + "loss": 0.11059188842773438, + "step": 3842 + }, + { + "epoch": 0.5354978053368634, + "grad_norm": 0.41084223985671997, + "learning_rate": 2.0023567831406733e-05, + "loss": 0.08424758911132812, + "step": 3843 + }, + { + "epoch": 0.5356371490280778, + "grad_norm": 0.44592222571372986, + "learning_rate": 2.0014140700938532e-05, + "loss": 0.08651351928710938, + "step": 3844 + }, + { + "epoch": 0.5357764927192922, + "grad_norm": 0.6084923148155212, + "learning_rate": 2.0004713567328594e-05, + "loss": 0.08672904968261719, + "step": 3845 + }, + { + "epoch": 0.5359158364105066, + "grad_norm": 0.4169977903366089, + "learning_rate": 1.9995286432671412e-05, + "loss": 0.09386444091796875, + "step": 3846 + }, + { + "epoch": 0.5360551801017209, + "grad_norm": 0.4392768144607544, + "learning_rate": 1.9985859299061474e-05, + "loss": 0.08080577850341797, + "step": 3847 + }, + { + "epoch": 0.5361945237929353, + "grad_norm": 0.6141186952590942, + "learning_rate": 1.9976432168593273e-05, + "loss": 0.11574935913085938, + "step": 3848 + }, + { + "epoch": 0.5363338674841497, + "grad_norm": 0.8063312768936157, + "learning_rate": 1.996700504336131e-05, + "loss": 0.0847005844116211, + "step": 3849 + }, + { + "epoch": 0.5364732111753641, + "grad_norm": 0.6498550772666931, + "learning_rate": 1.9957577925460074e-05, + "loss": 0.0919485092163086, + "step": 3850 + }, + { + "epoch": 0.5366125548665784, + "grad_norm": 0.40159276127815247, + "learning_rate": 1.994815081698406e-05, + "loss": 0.07785797119140625, + "step": 3851 + }, + { + "epoch": 0.5367518985577928, + "grad_norm": 0.36756065487861633, + "learning_rate": 1.9938723720027745e-05, + "loss": 0.08926010131835938, + "step": 3852 + }, + { + "epoch": 0.5368912422490072, + "grad_norm": 0.3285578489303589, + "learning_rate": 1.9929296636685615e-05, + "loss": 0.07071399688720703, + "step": 3853 + }, + { + "epoch": 0.5370305859402216, + "grad_norm": 0.45500823855400085, + "learning_rate": 1.9919869569052164e-05, + "loss": 0.09476470947265625, + "step": 3854 + }, + { + "epoch": 0.537169929631436, + "grad_norm": 0.3871777355670929, + "learning_rate": 1.991044251922186e-05, + "loss": 0.08188796043395996, + "step": 3855 + }, + { + "epoch": 0.5373092733226503, + "grad_norm": 0.7127866148948669, + "learning_rate": 1.9901015489289188e-05, + "loss": 0.10864448547363281, + "step": 3856 + }, + { + "epoch": 0.5374486170138647, + "grad_norm": 0.4170548915863037, + "learning_rate": 1.989158848134862e-05, + "loss": 0.09807205200195312, + "step": 3857 + }, + { + "epoch": 0.5375879607050791, + "grad_norm": 0.6218630075454712, + "learning_rate": 1.988216149749461e-05, + "loss": 0.0956878662109375, + "step": 3858 + }, + { + "epoch": 0.5377273043962935, + "grad_norm": 0.5557578206062317, + "learning_rate": 1.9872734539821626e-05, + "loss": 0.1096334457397461, + "step": 3859 + }, + { + "epoch": 0.5378666480875078, + "grad_norm": 0.4347181022167206, + "learning_rate": 1.9863307610424115e-05, + "loss": 0.08255195617675781, + "step": 3860 + }, + { + "epoch": 0.5380059917787222, + "grad_norm": 0.5236526727676392, + "learning_rate": 1.985388071139654e-05, + "loss": 0.09866619110107422, + "step": 3861 + }, + { + "epoch": 0.5381453354699366, + "grad_norm": 0.4227333068847656, + "learning_rate": 1.984445384483334e-05, + "loss": 0.08665847778320312, + "step": 3862 + }, + { + "epoch": 0.538284679161151, + "grad_norm": 0.5662535429000854, + "learning_rate": 1.9835027012828937e-05, + "loss": 0.1138296127319336, + "step": 3863 + }, + { + "epoch": 0.5384240228523653, + "grad_norm": 0.38915279507637024, + "learning_rate": 1.9825600217477765e-05, + "loss": 0.09114265441894531, + "step": 3864 + }, + { + "epoch": 0.5385633665435797, + "grad_norm": 0.2957589626312256, + "learning_rate": 1.9816173460874243e-05, + "loss": 0.0708627700805664, + "step": 3865 + }, + { + "epoch": 0.5387027102347941, + "grad_norm": 0.7607362866401672, + "learning_rate": 1.980674674511278e-05, + "loss": 0.11103439331054688, + "step": 3866 + }, + { + "epoch": 0.5388420539260085, + "grad_norm": 0.39986756443977356, + "learning_rate": 1.9797320072287786e-05, + "loss": 0.08446788787841797, + "step": 3867 + }, + { + "epoch": 0.5389813976172229, + "grad_norm": 0.3649470806121826, + "learning_rate": 1.9787893444493643e-05, + "loss": 0.07651329040527344, + "step": 3868 + }, + { + "epoch": 0.5391207413084372, + "grad_norm": 0.37930378317832947, + "learning_rate": 1.9778466863824726e-05, + "loss": 0.09537506103515625, + "step": 3869 + }, + { + "epoch": 0.5392600849996516, + "grad_norm": 0.4750712215900421, + "learning_rate": 1.9769040332375416e-05, + "loss": 0.08891105651855469, + "step": 3870 + }, + { + "epoch": 0.539399428690866, + "grad_norm": 0.5996161699295044, + "learning_rate": 1.975961385224007e-05, + "loss": 0.09468245506286621, + "step": 3871 + }, + { + "epoch": 0.5395387723820804, + "grad_norm": 0.45133763551712036, + "learning_rate": 1.9750187425513053e-05, + "loss": 0.09110403060913086, + "step": 3872 + }, + { + "epoch": 0.5396781160732947, + "grad_norm": 0.6086604595184326, + "learning_rate": 1.9740761054288672e-05, + "loss": 0.07718276977539062, + "step": 3873 + }, + { + "epoch": 0.5398174597645091, + "grad_norm": 0.6241793632507324, + "learning_rate": 1.973133474066127e-05, + "loss": 0.10429573059082031, + "step": 3874 + }, + { + "epoch": 0.5399568034557235, + "grad_norm": 0.3263911008834839, + "learning_rate": 1.9721908486725156e-05, + "loss": 0.08120346069335938, + "step": 3875 + }, + { + "epoch": 0.5400961471469379, + "grad_norm": 0.43658682703971863, + "learning_rate": 1.9712482294574622e-05, + "loss": 0.07581329345703125, + "step": 3876 + }, + { + "epoch": 0.5402354908381523, + "grad_norm": 0.5539786219596863, + "learning_rate": 1.9703056166303963e-05, + "loss": 0.10018539428710938, + "step": 3877 + }, + { + "epoch": 0.5403748345293666, + "grad_norm": 0.5393499732017517, + "learning_rate": 1.9693630104007446e-05, + "loss": 0.09111595153808594, + "step": 3878 + }, + { + "epoch": 0.540514178220581, + "grad_norm": 0.41531240940093994, + "learning_rate": 1.9684204109779324e-05, + "loss": 0.09229087829589844, + "step": 3879 + }, + { + "epoch": 0.5406535219117955, + "grad_norm": 0.7364137768745422, + "learning_rate": 1.9674778185713834e-05, + "loss": 0.10933685302734375, + "step": 3880 + }, + { + "epoch": 0.5407928656030099, + "grad_norm": 0.4929050803184509, + "learning_rate": 1.966535233390521e-05, + "loss": 0.08697319030761719, + "step": 3881 + }, + { + "epoch": 0.5409322092942243, + "grad_norm": 0.7462522983551025, + "learning_rate": 1.9655926556447656e-05, + "loss": 0.10753059387207031, + "step": 3882 + }, + { + "epoch": 0.5410715529854386, + "grad_norm": 0.5385599732398987, + "learning_rate": 1.9646500855435374e-05, + "loss": 0.08429145812988281, + "step": 3883 + }, + { + "epoch": 0.541210896676653, + "grad_norm": 0.4893192946910858, + "learning_rate": 1.963707523296252e-05, + "loss": 0.11876869201660156, + "step": 3884 + }, + { + "epoch": 0.5413502403678674, + "grad_norm": 0.36851802468299866, + "learning_rate": 1.962764969112327e-05, + "loss": 0.08907318115234375, + "step": 3885 + }, + { + "epoch": 0.5414895840590818, + "grad_norm": 0.661170244216919, + "learning_rate": 1.9618224232011757e-05, + "loss": 0.09538078308105469, + "step": 3886 + }, + { + "epoch": 0.5416289277502961, + "grad_norm": 0.42861858010292053, + "learning_rate": 1.96087988577221e-05, + "loss": 0.0812234878540039, + "step": 3887 + }, + { + "epoch": 0.5417682714415105, + "grad_norm": 0.719034731388092, + "learning_rate": 1.9599373570348416e-05, + "loss": 0.09964942932128906, + "step": 3888 + }, + { + "epoch": 0.5419076151327249, + "grad_norm": 0.4534355401992798, + "learning_rate": 1.9589948371984766e-05, + "loss": 0.09207916259765625, + "step": 3889 + }, + { + "epoch": 0.5420469588239393, + "grad_norm": 0.37316787242889404, + "learning_rate": 1.958052326472523e-05, + "loss": 0.08906745910644531, + "step": 3890 + }, + { + "epoch": 0.5421863025151537, + "grad_norm": 0.768679678440094, + "learning_rate": 1.957109825066385e-05, + "loss": 0.09772300720214844, + "step": 3891 + }, + { + "epoch": 0.542325646206368, + "grad_norm": 0.3229485750198364, + "learning_rate": 1.956167333189464e-05, + "loss": 0.08542346954345703, + "step": 3892 + }, + { + "epoch": 0.5424649898975824, + "grad_norm": 0.4922178387641907, + "learning_rate": 1.9552248510511616e-05, + "loss": 0.10702133178710938, + "step": 3893 + }, + { + "epoch": 0.5426043335887968, + "grad_norm": 0.5768254399299622, + "learning_rate": 1.954282378860875e-05, + "loss": 0.10248470306396484, + "step": 3894 + }, + { + "epoch": 0.5427436772800112, + "grad_norm": 0.356195867061615, + "learning_rate": 1.9533399168279997e-05, + "loss": 0.08785820007324219, + "step": 3895 + }, + { + "epoch": 0.5428830209712255, + "grad_norm": 0.44582512974739075, + "learning_rate": 1.9523974651619296e-05, + "loss": 0.0779571533203125, + "step": 3896 + }, + { + "epoch": 0.5430223646624399, + "grad_norm": 0.40006589889526367, + "learning_rate": 1.951455024072056e-05, + "loss": 0.105255126953125, + "step": 3897 + }, + { + "epoch": 0.5431617083536543, + "grad_norm": 0.936191737651825, + "learning_rate": 1.950512593767768e-05, + "loss": 0.10133171081542969, + "step": 3898 + }, + { + "epoch": 0.5433010520448687, + "grad_norm": 0.30498120188713074, + "learning_rate": 1.9495701744584522e-05, + "loss": 0.07629776000976562, + "step": 3899 + }, + { + "epoch": 0.543440395736083, + "grad_norm": 0.5847951173782349, + "learning_rate": 1.9486277663534915e-05, + "loss": 0.10423851013183594, + "step": 3900 + }, + { + "epoch": 0.5435797394272974, + "grad_norm": 0.4841534495353699, + "learning_rate": 1.9476853696622686e-05, + "loss": 0.09450721740722656, + "step": 3901 + }, + { + "epoch": 0.5437190831185118, + "grad_norm": 0.438031405210495, + "learning_rate": 1.9467429845941622e-05, + "loss": 0.091461181640625, + "step": 3902 + }, + { + "epoch": 0.5438584268097262, + "grad_norm": 0.6614689826965332, + "learning_rate": 1.9458006113585484e-05, + "loss": 0.10242748260498047, + "step": 3903 + }, + { + "epoch": 0.5439977705009406, + "grad_norm": 0.2723938822746277, + "learning_rate": 1.9448582501648025e-05, + "loss": 0.06807231903076172, + "step": 3904 + }, + { + "epoch": 0.5441371141921549, + "grad_norm": 0.3794356882572174, + "learning_rate": 1.9439159012222936e-05, + "loss": 0.08407402038574219, + "step": 3905 + }, + { + "epoch": 0.5442764578833693, + "grad_norm": 0.4427141845226288, + "learning_rate": 1.9429735647403908e-05, + "loss": 0.09821128845214844, + "step": 3906 + }, + { + "epoch": 0.5444158015745837, + "grad_norm": 1.2526726722717285, + "learning_rate": 1.9420312409284606e-05, + "loss": 0.13948726654052734, + "step": 3907 + }, + { + "epoch": 0.5445551452657981, + "grad_norm": 0.5661587715148926, + "learning_rate": 1.9410889299958643e-05, + "loss": 0.10601806640625, + "step": 3908 + }, + { + "epoch": 0.5446944889570124, + "grad_norm": 0.6678125858306885, + "learning_rate": 1.940146632151964e-05, + "loss": 0.08579063415527344, + "step": 3909 + }, + { + "epoch": 0.5448338326482268, + "grad_norm": 0.5010951161384583, + "learning_rate": 1.939204347606115e-05, + "loss": 0.09733390808105469, + "step": 3910 + }, + { + "epoch": 0.5449731763394412, + "grad_norm": 0.3896265923976898, + "learning_rate": 1.938262076567672e-05, + "loss": 0.09490585327148438, + "step": 3911 + }, + { + "epoch": 0.5451125200306556, + "grad_norm": 0.3438236117362976, + "learning_rate": 1.9373198192459856e-05, + "loss": 0.07776451110839844, + "step": 3912 + }, + { + "epoch": 0.54525186372187, + "grad_norm": 0.4265449345111847, + "learning_rate": 1.936377575850405e-05, + "loss": 0.09739875793457031, + "step": 3913 + }, + { + "epoch": 0.5453912074130843, + "grad_norm": 0.5765805840492249, + "learning_rate": 1.935435346590274e-05, + "loss": 0.10932540893554688, + "step": 3914 + }, + { + "epoch": 0.5455305511042987, + "grad_norm": 0.41328197717666626, + "learning_rate": 1.934493131674936e-05, + "loss": 0.07938385009765625, + "step": 3915 + }, + { + "epoch": 0.5456698947955131, + "grad_norm": 0.44628265500068665, + "learning_rate": 1.9335509313137275e-05, + "loss": 0.09283828735351562, + "step": 3916 + }, + { + "epoch": 0.5458092384867275, + "grad_norm": 0.5423193573951721, + "learning_rate": 1.9326087457159856e-05, + "loss": 0.09473800659179688, + "step": 3917 + }, + { + "epoch": 0.5459485821779418, + "grad_norm": 0.45767688751220703, + "learning_rate": 1.9316665750910414e-05, + "loss": 0.10271263122558594, + "step": 3918 + }, + { + "epoch": 0.5460879258691562, + "grad_norm": 0.5209531784057617, + "learning_rate": 1.930724419648224e-05, + "loss": 0.1098175048828125, + "step": 3919 + }, + { + "epoch": 0.5462272695603707, + "grad_norm": 0.44111838936805725, + "learning_rate": 1.92978227959686e-05, + "loss": 0.09343147277832031, + "step": 3920 + }, + { + "epoch": 0.5463666132515851, + "grad_norm": 0.6808000802993774, + "learning_rate": 1.9288401551462694e-05, + "loss": 0.0933685302734375, + "step": 3921 + }, + { + "epoch": 0.5465059569427995, + "grad_norm": 0.5077112913131714, + "learning_rate": 1.9278980465057722e-05, + "loss": 0.094512939453125, + "step": 3922 + }, + { + "epoch": 0.5466453006340138, + "grad_norm": 0.5418306589126587, + "learning_rate": 1.9269559538846823e-05, + "loss": 0.08624076843261719, + "step": 3923 + }, + { + "epoch": 0.5467846443252282, + "grad_norm": 0.541973888874054, + "learning_rate": 1.9260138774923124e-05, + "loss": 0.11134719848632812, + "step": 3924 + }, + { + "epoch": 0.5469239880164426, + "grad_norm": 0.5082377195358276, + "learning_rate": 1.9250718175379697e-05, + "loss": 0.10887718200683594, + "step": 3925 + }, + { + "epoch": 0.547063331707657, + "grad_norm": 0.4424314498901367, + "learning_rate": 1.924129774230959e-05, + "loss": 0.0870351791381836, + "step": 3926 + }, + { + "epoch": 0.5472026753988714, + "grad_norm": 0.465009480714798, + "learning_rate": 1.9231877477805795e-05, + "loss": 0.07827568054199219, + "step": 3927 + }, + { + "epoch": 0.5473420190900857, + "grad_norm": 0.5090827345848083, + "learning_rate": 1.922245738396129e-05, + "loss": 0.10398101806640625, + "step": 3928 + }, + { + "epoch": 0.5474813627813001, + "grad_norm": 0.37955307960510254, + "learning_rate": 1.9213037462869003e-05, + "loss": 0.08160972595214844, + "step": 3929 + }, + { + "epoch": 0.5476207064725145, + "grad_norm": 0.3401148021221161, + "learning_rate": 1.920361771662183e-05, + "loss": 0.06603717803955078, + "step": 3930 + }, + { + "epoch": 0.5477600501637289, + "grad_norm": 0.4752054810523987, + "learning_rate": 1.9194198147312614e-05, + "loss": 0.08854103088378906, + "step": 3931 + }, + { + "epoch": 0.5478993938549432, + "grad_norm": 0.6338434815406799, + "learning_rate": 1.9184778757034168e-05, + "loss": 0.10433340072631836, + "step": 3932 + }, + { + "epoch": 0.5480387375461576, + "grad_norm": 0.49477842450141907, + "learning_rate": 1.9175359547879275e-05, + "loss": 0.0792999267578125, + "step": 3933 + }, + { + "epoch": 0.548178081237372, + "grad_norm": 0.5897603034973145, + "learning_rate": 1.9165940521940667e-05, + "loss": 0.09330463409423828, + "step": 3934 + }, + { + "epoch": 0.5483174249285864, + "grad_norm": 0.3819257616996765, + "learning_rate": 1.9156521681311025e-05, + "loss": 0.08688163757324219, + "step": 3935 + }, + { + "epoch": 0.5484567686198007, + "grad_norm": 0.40806853771209717, + "learning_rate": 1.914710302808302e-05, + "loss": 0.08562278747558594, + "step": 3936 + }, + { + "epoch": 0.5485961123110151, + "grad_norm": 0.49500414729118347, + "learning_rate": 1.9137684564349244e-05, + "loss": 0.09847068786621094, + "step": 3937 + }, + { + "epoch": 0.5487354560022295, + "grad_norm": 0.38421374559402466, + "learning_rate": 1.912826629220227e-05, + "loss": 0.07198905944824219, + "step": 3938 + }, + { + "epoch": 0.5488747996934439, + "grad_norm": 0.3546938896179199, + "learning_rate": 1.911884821373462e-05, + "loss": 0.07817268371582031, + "step": 3939 + }, + { + "epoch": 0.5490141433846583, + "grad_norm": 0.4334903359413147, + "learning_rate": 1.9109430331038784e-05, + "loss": 0.08175277709960938, + "step": 3940 + }, + { + "epoch": 0.5491534870758726, + "grad_norm": 0.7317392826080322, + "learning_rate": 1.91000126462072e-05, + "loss": 0.08753013610839844, + "step": 3941 + }, + { + "epoch": 0.549292830767087, + "grad_norm": 0.7417848706245422, + "learning_rate": 1.909059516133226e-05, + "loss": 0.0930965393781662, + "step": 3942 + }, + { + "epoch": 0.5494321744583014, + "grad_norm": 0.5378831028938293, + "learning_rate": 1.9081177878506306e-05, + "loss": 0.08031845092773438, + "step": 3943 + }, + { + "epoch": 0.5495715181495158, + "grad_norm": 0.7647280693054199, + "learning_rate": 1.907176079982165e-05, + "loss": 0.10936737060546875, + "step": 3944 + }, + { + "epoch": 0.5497108618407301, + "grad_norm": 0.43204012513160706, + "learning_rate": 1.9062343927370556e-05, + "loss": 0.08027076721191406, + "step": 3945 + }, + { + "epoch": 0.5498502055319445, + "grad_norm": 0.3624517321586609, + "learning_rate": 1.905292726324524e-05, + "loss": 0.07719182968139648, + "step": 3946 + }, + { + "epoch": 0.5499895492231589, + "grad_norm": 0.3079320788383484, + "learning_rate": 1.9043510809537857e-05, + "loss": 0.0757303237915039, + "step": 3947 + }, + { + "epoch": 0.5501288929143733, + "grad_norm": 0.5189117789268494, + "learning_rate": 1.9034094568340532e-05, + "loss": 0.08472824096679688, + "step": 3948 + }, + { + "epoch": 0.5502682366055877, + "grad_norm": 0.5223495960235596, + "learning_rate": 1.9024678541745343e-05, + "loss": 0.08304786682128906, + "step": 3949 + }, + { + "epoch": 0.550407580296802, + "grad_norm": 0.46127355098724365, + "learning_rate": 1.901526273184432e-05, + "loss": 0.07391834259033203, + "step": 3950 + }, + { + "epoch": 0.5505469239880164, + "grad_norm": 0.9633737802505493, + "learning_rate": 1.900584714072943e-05, + "loss": 0.09735870361328125, + "step": 3951 + }, + { + "epoch": 0.5506862676792308, + "grad_norm": 0.9061733484268188, + "learning_rate": 1.8996431770492622e-05, + "loss": 0.10747170448303223, + "step": 3952 + }, + { + "epoch": 0.5508256113704452, + "grad_norm": 0.6742889285087585, + "learning_rate": 1.8987016623225748e-05, + "loss": 0.1061239242553711, + "step": 3953 + }, + { + "epoch": 0.5509649550616595, + "grad_norm": 0.5050815939903259, + "learning_rate": 1.897760170102066e-05, + "loss": 0.09465789794921875, + "step": 3954 + }, + { + "epoch": 0.5511042987528739, + "grad_norm": 0.4081473648548126, + "learning_rate": 1.8968187005969126e-05, + "loss": 0.08364677429199219, + "step": 3955 + }, + { + "epoch": 0.5512436424440883, + "grad_norm": 0.3891538679599762, + "learning_rate": 1.8958772540162887e-05, + "loss": 0.09070777893066406, + "step": 3956 + }, + { + "epoch": 0.5513829861353027, + "grad_norm": 0.3198668956756592, + "learning_rate": 1.8949358305693625e-05, + "loss": 0.07173919677734375, + "step": 3957 + }, + { + "epoch": 0.551522329826517, + "grad_norm": 0.5403586626052856, + "learning_rate": 1.8939944304652952e-05, + "loss": 0.10943603515625, + "step": 3958 + }, + { + "epoch": 0.5516616735177314, + "grad_norm": 0.5084822177886963, + "learning_rate": 1.8930530539132456e-05, + "loss": 0.08002281188964844, + "step": 3959 + }, + { + "epoch": 0.5518010172089458, + "grad_norm": 0.5430189371109009, + "learning_rate": 1.8921117011223655e-05, + "loss": 0.09583091735839844, + "step": 3960 + }, + { + "epoch": 0.5519403609001603, + "grad_norm": 0.47038012742996216, + "learning_rate": 1.8911703723018025e-05, + "loss": 0.09672927856445312, + "step": 3961 + }, + { + "epoch": 0.5520797045913747, + "grad_norm": 0.4247695803642273, + "learning_rate": 1.8902290676606987e-05, + "loss": 0.08978652954101562, + "step": 3962 + }, + { + "epoch": 0.552219048282589, + "grad_norm": 0.6155433058738708, + "learning_rate": 1.8892877874081895e-05, + "loss": 0.10306930541992188, + "step": 3963 + }, + { + "epoch": 0.5523583919738034, + "grad_norm": 0.5347464680671692, + "learning_rate": 1.8883465317534055e-05, + "loss": 0.08373260498046875, + "step": 3964 + }, + { + "epoch": 0.5524977356650178, + "grad_norm": 0.45013922452926636, + "learning_rate": 1.887405300905474e-05, + "loss": 0.0933380126953125, + "step": 3965 + }, + { + "epoch": 0.5526370793562322, + "grad_norm": 0.7650270462036133, + "learning_rate": 1.886464095073514e-05, + "loss": 0.09469032287597656, + "step": 3966 + }, + { + "epoch": 0.5527764230474466, + "grad_norm": 0.5455166697502136, + "learning_rate": 1.88552291446664e-05, + "loss": 0.09389877319335938, + "step": 3967 + }, + { + "epoch": 0.5529157667386609, + "grad_norm": 0.42088404297828674, + "learning_rate": 1.884581759293961e-05, + "loss": 0.08510494232177734, + "step": 3968 + }, + { + "epoch": 0.5530551104298753, + "grad_norm": 0.6539744734764099, + "learning_rate": 1.883640629764579e-05, + "loss": 0.10085487365722656, + "step": 3969 + }, + { + "epoch": 0.5531944541210897, + "grad_norm": 0.34096312522888184, + "learning_rate": 1.8826995260875937e-05, + "loss": 0.06767463684082031, + "step": 3970 + }, + { + "epoch": 0.5533337978123041, + "grad_norm": 0.5645963549613953, + "learning_rate": 1.8817584484720947e-05, + "loss": 0.10230731964111328, + "step": 3971 + }, + { + "epoch": 0.5534731415035185, + "grad_norm": 0.4046439230442047, + "learning_rate": 1.8808173971271695e-05, + "loss": 0.08462142944335938, + "step": 3972 + }, + { + "epoch": 0.5536124851947328, + "grad_norm": 0.5351841449737549, + "learning_rate": 1.8798763722618982e-05, + "loss": 0.07756805419921875, + "step": 3973 + }, + { + "epoch": 0.5537518288859472, + "grad_norm": 0.3881390690803528, + "learning_rate": 1.878935374085354e-05, + "loss": 0.08168601989746094, + "step": 3974 + }, + { + "epoch": 0.5538911725771616, + "grad_norm": 0.6704239845275879, + "learning_rate": 1.8779944028066057e-05, + "loss": 0.08931565284729004, + "step": 3975 + }, + { + "epoch": 0.554030516268376, + "grad_norm": 0.739886462688446, + "learning_rate": 1.8770534586347152e-05, + "loss": 0.10867118835449219, + "step": 3976 + }, + { + "epoch": 0.5541698599595903, + "grad_norm": 0.645499050617218, + "learning_rate": 1.8761125417787398e-05, + "loss": 0.09883308410644531, + "step": 3977 + }, + { + "epoch": 0.5543092036508047, + "grad_norm": 0.5660085678100586, + "learning_rate": 1.8751716524477298e-05, + "loss": 0.10748863220214844, + "step": 3978 + }, + { + "epoch": 0.5544485473420191, + "grad_norm": 0.4772121012210846, + "learning_rate": 1.874230790850728e-05, + "loss": 0.10978126525878906, + "step": 3979 + }, + { + "epoch": 0.5545878910332335, + "grad_norm": 0.3557966649532318, + "learning_rate": 1.8732899571967728e-05, + "loss": 0.08595085144042969, + "step": 3980 + }, + { + "epoch": 0.5547272347244478, + "grad_norm": 0.3782907724380493, + "learning_rate": 1.8723491516948968e-05, + "loss": 0.08669471740722656, + "step": 3981 + }, + { + "epoch": 0.5548665784156622, + "grad_norm": 0.6225922107696533, + "learning_rate": 1.871408374554125e-05, + "loss": 0.09822845458984375, + "step": 3982 + }, + { + "epoch": 0.5550059221068766, + "grad_norm": 0.4296649992465973, + "learning_rate": 1.8704676259834768e-05, + "loss": 0.0765676498413086, + "step": 3983 + }, + { + "epoch": 0.555145265798091, + "grad_norm": 0.2951599955558777, + "learning_rate": 1.8695269061919642e-05, + "loss": 0.07079124450683594, + "step": 3984 + }, + { + "epoch": 0.5552846094893054, + "grad_norm": 0.6063258051872253, + "learning_rate": 1.8685862153885947e-05, + "loss": 0.09183502197265625, + "step": 3985 + }, + { + "epoch": 0.5554239531805197, + "grad_norm": 0.2854485511779785, + "learning_rate": 1.867645553782368e-05, + "loss": 0.07220172882080078, + "step": 3986 + }, + { + "epoch": 0.5555632968717341, + "grad_norm": 0.2925446331501007, + "learning_rate": 1.866704921582277e-05, + "loss": 0.07403564453125, + "step": 3987 + }, + { + "epoch": 0.5557026405629485, + "grad_norm": 0.4304373860359192, + "learning_rate": 1.86576431899731e-05, + "loss": 0.08663558959960938, + "step": 3988 + }, + { + "epoch": 0.5558419842541629, + "grad_norm": 0.47979891300201416, + "learning_rate": 1.864823746236446e-05, + "loss": 0.1127849817276001, + "step": 3989 + }, + { + "epoch": 0.5559813279453772, + "grad_norm": 0.7454348802566528, + "learning_rate": 1.8638832035086598e-05, + "loss": 0.09872055053710938, + "step": 3990 + }, + { + "epoch": 0.5561206716365916, + "grad_norm": 0.5198816061019897, + "learning_rate": 1.862942691022918e-05, + "loss": 0.09939765930175781, + "step": 3991 + }, + { + "epoch": 0.556260015327806, + "grad_norm": 0.683233916759491, + "learning_rate": 1.8620022089881812e-05, + "loss": 0.10719490051269531, + "step": 3992 + }, + { + "epoch": 0.5563993590190204, + "grad_norm": 0.5672513246536255, + "learning_rate": 1.861061757613403e-05, + "loss": 0.0777740478515625, + "step": 3993 + }, + { + "epoch": 0.5565387027102348, + "grad_norm": 0.5565862655639648, + "learning_rate": 1.8601213371075308e-05, + "loss": 0.08920097351074219, + "step": 3994 + }, + { + "epoch": 0.5566780464014491, + "grad_norm": 0.49420806765556335, + "learning_rate": 1.8591809476795034e-05, + "loss": 0.09531879425048828, + "step": 3995 + }, + { + "epoch": 0.5568173900926635, + "grad_norm": 0.5293065309524536, + "learning_rate": 1.8582405895382544e-05, + "loss": 0.10355758666992188, + "step": 3996 + }, + { + "epoch": 0.5569567337838779, + "grad_norm": 0.5251451134681702, + "learning_rate": 1.8573002628927102e-05, + "loss": 0.09431540966033936, + "step": 3997 + }, + { + "epoch": 0.5570960774750923, + "grad_norm": 0.4113677740097046, + "learning_rate": 1.8563599679517898e-05, + "loss": 0.10356330871582031, + "step": 3998 + }, + { + "epoch": 0.5572354211663066, + "grad_norm": 0.4508986473083496, + "learning_rate": 1.8554197049244054e-05, + "loss": 0.09781837463378906, + "step": 3999 + }, + { + "epoch": 0.557374764857521, + "grad_norm": 0.2646358609199524, + "learning_rate": 1.854479474019461e-05, + "loss": 0.0738677978515625, + "step": 4000 + }, + { + "epoch": 0.5575141085487355, + "grad_norm": 0.5183758735656738, + "learning_rate": 1.8535392754458555e-05, + "loss": 0.10062599182128906, + "step": 4001 + }, + { + "epoch": 0.5576534522399499, + "grad_norm": 0.3386704623699188, + "learning_rate": 1.8525991094124795e-05, + "loss": 0.08579254150390625, + "step": 4002 + }, + { + "epoch": 0.5577927959311643, + "grad_norm": 0.6192626357078552, + "learning_rate": 1.8516589761282155e-05, + "loss": 0.10919189453125, + "step": 4003 + }, + { + "epoch": 0.5579321396223786, + "grad_norm": 0.5081757307052612, + "learning_rate": 1.850718875801942e-05, + "loss": 0.09319877624511719, + "step": 4004 + }, + { + "epoch": 0.558071483313593, + "grad_norm": 0.35307544469833374, + "learning_rate": 1.8497788086425243e-05, + "loss": 0.07601356506347656, + "step": 4005 + }, + { + "epoch": 0.5582108270048074, + "grad_norm": 0.4461684823036194, + "learning_rate": 1.8488387748588266e-05, + "loss": 0.0902252197265625, + "step": 4006 + }, + { + "epoch": 0.5583501706960218, + "grad_norm": 0.4268692135810852, + "learning_rate": 1.8478987746597017e-05, + "loss": 0.07765388488769531, + "step": 4007 + }, + { + "epoch": 0.5584895143872362, + "grad_norm": 0.4250284731388092, + "learning_rate": 1.8469588082539963e-05, + "loss": 0.10613441467285156, + "step": 4008 + }, + { + "epoch": 0.5586288580784505, + "grad_norm": 0.4803819954395294, + "learning_rate": 1.8460188758505502e-05, + "loss": 0.08468055725097656, + "step": 4009 + }, + { + "epoch": 0.5587682017696649, + "grad_norm": 0.4014166593551636, + "learning_rate": 1.8450789776581947e-05, + "loss": 0.08220100402832031, + "step": 4010 + }, + { + "epoch": 0.5589075454608793, + "grad_norm": 0.36183327436447144, + "learning_rate": 1.844139113885753e-05, + "loss": 0.08337974548339844, + "step": 4011 + }, + { + "epoch": 0.5590468891520937, + "grad_norm": 0.393807053565979, + "learning_rate": 1.8431992847420418e-05, + "loss": 0.08704376220703125, + "step": 4012 + }, + { + "epoch": 0.559186232843308, + "grad_norm": 0.5992540717124939, + "learning_rate": 1.8422594904358696e-05, + "loss": 0.11244583129882812, + "step": 4013 + }, + { + "epoch": 0.5593255765345224, + "grad_norm": 0.48637402057647705, + "learning_rate": 1.8413197311760377e-05, + "loss": 0.09271812438964844, + "step": 4014 + }, + { + "epoch": 0.5594649202257368, + "grad_norm": 0.3652540445327759, + "learning_rate": 1.8403800071713392e-05, + "loss": 0.08053469657897949, + "step": 4015 + }, + { + "epoch": 0.5596042639169512, + "grad_norm": 0.5475808382034302, + "learning_rate": 1.839440318630558e-05, + "loss": 0.09217262268066406, + "step": 4016 + }, + { + "epoch": 0.5597436076081655, + "grad_norm": 0.3544541001319885, + "learning_rate": 1.838500665762473e-05, + "loss": 0.087982177734375, + "step": 4017 + }, + { + "epoch": 0.5598829512993799, + "grad_norm": 0.4525499939918518, + "learning_rate": 1.8375610487758527e-05, + "loss": 0.08600044250488281, + "step": 4018 + }, + { + "epoch": 0.5600222949905943, + "grad_norm": 0.4619007110595703, + "learning_rate": 1.8366214678794584e-05, + "loss": 0.09036827087402344, + "step": 4019 + }, + { + "epoch": 0.5601616386818087, + "grad_norm": 0.6229040026664734, + "learning_rate": 1.8356819232820452e-05, + "loss": 0.09051895141601562, + "step": 4020 + }, + { + "epoch": 0.5603009823730231, + "grad_norm": 0.7375126481056213, + "learning_rate": 1.834742415192356e-05, + "loss": 0.1298999786376953, + "step": 4021 + }, + { + "epoch": 0.5604403260642374, + "grad_norm": 0.3642440736293793, + "learning_rate": 1.8338029438191298e-05, + "loss": 0.07036781311035156, + "step": 4022 + }, + { + "epoch": 0.5605796697554518, + "grad_norm": 0.7785740494728088, + "learning_rate": 1.832863509371095e-05, + "loss": 0.09807682037353516, + "step": 4023 + }, + { + "epoch": 0.5607190134466662, + "grad_norm": 0.3322637677192688, + "learning_rate": 1.831924112056972e-05, + "loss": 0.08726882934570312, + "step": 4024 + }, + { + "epoch": 0.5608583571378806, + "grad_norm": 0.6264582276344299, + "learning_rate": 1.8309847520854753e-05, + "loss": 0.09667396545410156, + "step": 4025 + }, + { + "epoch": 0.560997700829095, + "grad_norm": 0.6460886597633362, + "learning_rate": 1.8300454296653076e-05, + "loss": 0.10586738586425781, + "step": 4026 + }, + { + "epoch": 0.5611370445203093, + "grad_norm": 0.5254935622215271, + "learning_rate": 1.829106145005165e-05, + "loss": 0.10653495788574219, + "step": 4027 + }, + { + "epoch": 0.5612763882115237, + "grad_norm": 0.41849470138549805, + "learning_rate": 1.828166898313735e-05, + "loss": 0.0901947021484375, + "step": 4028 + }, + { + "epoch": 0.5614157319027381, + "grad_norm": 0.41244274377822876, + "learning_rate": 1.8272276897996977e-05, + "loss": 0.08697319030761719, + "step": 4029 + }, + { + "epoch": 0.5615550755939525, + "grad_norm": 0.369329571723938, + "learning_rate": 1.8262885196717232e-05, + "loss": 0.06991386413574219, + "step": 4030 + }, + { + "epoch": 0.5616944192851668, + "grad_norm": 0.37489715218544006, + "learning_rate": 1.8253493881384743e-05, + "loss": 0.0774383544921875, + "step": 4031 + }, + { + "epoch": 0.5618337629763812, + "grad_norm": 0.48186764121055603, + "learning_rate": 1.8244102954086032e-05, + "loss": 0.10773468017578125, + "step": 4032 + }, + { + "epoch": 0.5619731066675956, + "grad_norm": 0.3754662871360779, + "learning_rate": 1.823471241690756e-05, + "loss": 0.09067344665527344, + "step": 4033 + }, + { + "epoch": 0.56211245035881, + "grad_norm": 0.7459686994552612, + "learning_rate": 1.8225322271935686e-05, + "loss": 0.10732650756835938, + "step": 4034 + }, + { + "epoch": 0.5622517940500243, + "grad_norm": 0.671372652053833, + "learning_rate": 1.8215932521256683e-05, + "loss": 0.11050891876220703, + "step": 4035 + }, + { + "epoch": 0.5623911377412387, + "grad_norm": 0.39009904861450195, + "learning_rate": 1.8206543166956754e-05, + "loss": 0.0920867919921875, + "step": 4036 + }, + { + "epoch": 0.5625304814324531, + "grad_norm": 0.3313426673412323, + "learning_rate": 1.8197154211121976e-05, + "loss": 0.07526779174804688, + "step": 4037 + }, + { + "epoch": 0.5626698251236675, + "grad_norm": 0.4525284767150879, + "learning_rate": 1.818776565583838e-05, + "loss": 0.11981201171875, + "step": 4038 + }, + { + "epoch": 0.5628091688148819, + "grad_norm": 0.5256170034408569, + "learning_rate": 1.8178377503191875e-05, + "loss": 0.09504318237304688, + "step": 4039 + }, + { + "epoch": 0.5629485125060962, + "grad_norm": 0.5539085865020752, + "learning_rate": 1.8168989755268303e-05, + "loss": 0.09396839141845703, + "step": 4040 + }, + { + "epoch": 0.5630878561973107, + "grad_norm": 0.46541649103164673, + "learning_rate": 1.815960241415341e-05, + "loss": 0.09241294860839844, + "step": 4041 + }, + { + "epoch": 0.5632271998885251, + "grad_norm": 0.3055858910083771, + "learning_rate": 1.815021548193284e-05, + "loss": 0.07906341552734375, + "step": 4042 + }, + { + "epoch": 0.5633665435797395, + "grad_norm": 0.3706829845905304, + "learning_rate": 1.814082896069216e-05, + "loss": 0.08203506469726562, + "step": 4043 + }, + { + "epoch": 0.5635058872709539, + "grad_norm": 0.4759671688079834, + "learning_rate": 1.813144285251683e-05, + "loss": 0.0743570327758789, + "step": 4044 + }, + { + "epoch": 0.5636452309621682, + "grad_norm": 0.34544914960861206, + "learning_rate": 1.8122057159492248e-05, + "loss": 0.0784311294555664, + "step": 4045 + }, + { + "epoch": 0.5637845746533826, + "grad_norm": 0.48851457238197327, + "learning_rate": 1.8112671883703688e-05, + "loss": 0.08797264099121094, + "step": 4046 + }, + { + "epoch": 0.563923918344597, + "grad_norm": 0.5779854655265808, + "learning_rate": 1.8103287027236352e-05, + "loss": 0.09328365325927734, + "step": 4047 + }, + { + "epoch": 0.5640632620358114, + "grad_norm": 0.6533632278442383, + "learning_rate": 1.8093902592175328e-05, + "loss": 0.10825729370117188, + "step": 4048 + }, + { + "epoch": 0.5642026057270257, + "grad_norm": 0.41795995831489563, + "learning_rate": 1.8084518580605634e-05, + "loss": 0.082366943359375, + "step": 4049 + }, + { + "epoch": 0.5643419494182401, + "grad_norm": 0.4589305818080902, + "learning_rate": 1.807513499461218e-05, + "loss": 0.09082984924316406, + "step": 4050 + }, + { + "epoch": 0.5644812931094545, + "grad_norm": 0.35624027252197266, + "learning_rate": 1.8065751836279784e-05, + "loss": 0.07785797119140625, + "step": 4051 + }, + { + "epoch": 0.5646206368006689, + "grad_norm": 0.4983833432197571, + "learning_rate": 1.805636910769318e-05, + "loss": 0.09583854675292969, + "step": 4052 + }, + { + "epoch": 0.5647599804918833, + "grad_norm": 0.4770651161670685, + "learning_rate": 1.8046986810936974e-05, + "loss": 0.09445953369140625, + "step": 4053 + }, + { + "epoch": 0.5648993241830976, + "grad_norm": 0.5395882725715637, + "learning_rate": 1.8037604948095714e-05, + "loss": 0.10277462005615234, + "step": 4054 + }, + { + "epoch": 0.565038667874312, + "grad_norm": 0.5076349973678589, + "learning_rate": 1.802822352125383e-05, + "loss": 0.0848226547241211, + "step": 4055 + }, + { + "epoch": 0.5651780115655264, + "grad_norm": 0.37760666012763977, + "learning_rate": 1.8018842532495667e-05, + "loss": 0.07865715026855469, + "step": 4056 + }, + { + "epoch": 0.5653173552567408, + "grad_norm": 0.6300117373466492, + "learning_rate": 1.8009461983905466e-05, + "loss": 0.10439872741699219, + "step": 4057 + }, + { + "epoch": 0.5654566989479551, + "grad_norm": 0.3959280848503113, + "learning_rate": 1.8000081877567362e-05, + "loss": 0.09405326843261719, + "step": 4058 + }, + { + "epoch": 0.5655960426391695, + "grad_norm": 0.35765913128852844, + "learning_rate": 1.799070221556541e-05, + "loss": 0.08412837982177734, + "step": 4059 + }, + { + "epoch": 0.5657353863303839, + "grad_norm": 0.45975497364997864, + "learning_rate": 1.7981322999983547e-05, + "loss": 0.09099197387695312, + "step": 4060 + }, + { + "epoch": 0.5658747300215983, + "grad_norm": 0.4356464445590973, + "learning_rate": 1.7971944232905627e-05, + "loss": 0.09430217742919922, + "step": 4061 + }, + { + "epoch": 0.5660140737128126, + "grad_norm": 0.4116441607475281, + "learning_rate": 1.7962565916415406e-05, + "loss": 0.09390830993652344, + "step": 4062 + }, + { + "epoch": 0.566153417404027, + "grad_norm": 0.5502631664276123, + "learning_rate": 1.7953188052596514e-05, + "loss": 0.09466552734375, + "step": 4063 + }, + { + "epoch": 0.5662927610952414, + "grad_norm": 0.470461368560791, + "learning_rate": 1.7943810643532506e-05, + "loss": 0.11845016479492188, + "step": 4064 + }, + { + "epoch": 0.5664321047864558, + "grad_norm": 0.5570551156997681, + "learning_rate": 1.7934433691306834e-05, + "loss": 0.10101318359375, + "step": 4065 + }, + { + "epoch": 0.5665714484776702, + "grad_norm": 0.4337084889411926, + "learning_rate": 1.7925057198002836e-05, + "loss": 0.1063985824584961, + "step": 4066 + }, + { + "epoch": 0.5667107921688845, + "grad_norm": 0.47824493050575256, + "learning_rate": 1.7915681165703754e-05, + "loss": 0.08579826354980469, + "step": 4067 + }, + { + "epoch": 0.5668501358600989, + "grad_norm": 0.5399807691574097, + "learning_rate": 1.7906305596492747e-05, + "loss": 0.08435440063476562, + "step": 4068 + }, + { + "epoch": 0.5669894795513133, + "grad_norm": 0.37271973490715027, + "learning_rate": 1.7896930492452816e-05, + "loss": 0.07998371124267578, + "step": 4069 + }, + { + "epoch": 0.5671288232425277, + "grad_norm": 0.5471586585044861, + "learning_rate": 1.788755585566693e-05, + "loss": 0.11058616638183594, + "step": 4070 + }, + { + "epoch": 0.567268166933742, + "grad_norm": 0.48955225944519043, + "learning_rate": 1.7878181688217894e-05, + "loss": 0.1107940673828125, + "step": 4071 + }, + { + "epoch": 0.5674075106249564, + "grad_norm": 0.38481634855270386, + "learning_rate": 1.7868807992188448e-05, + "loss": 0.08219623565673828, + "step": 4072 + }, + { + "epoch": 0.5675468543161708, + "grad_norm": 0.5092350244522095, + "learning_rate": 1.7859434769661218e-05, + "loss": 0.07841682434082031, + "step": 4073 + }, + { + "epoch": 0.5676861980073852, + "grad_norm": 0.5130521655082703, + "learning_rate": 1.7850062022718708e-05, + "loss": 0.09077739715576172, + "step": 4074 + }, + { + "epoch": 0.5678255416985996, + "grad_norm": 0.5292364358901978, + "learning_rate": 1.7840689753443328e-05, + "loss": 0.09730768203735352, + "step": 4075 + }, + { + "epoch": 0.5679648853898139, + "grad_norm": 0.5386413335800171, + "learning_rate": 1.7831317963917388e-05, + "loss": 0.1000213623046875, + "step": 4076 + }, + { + "epoch": 0.5681042290810283, + "grad_norm": 0.4760090708732605, + "learning_rate": 1.7821946656223088e-05, + "loss": 0.09200477600097656, + "step": 4077 + }, + { + "epoch": 0.5682435727722427, + "grad_norm": 0.44105637073516846, + "learning_rate": 1.7812575832442518e-05, + "loss": 0.08484077453613281, + "step": 4078 + }, + { + "epoch": 0.5683829164634571, + "grad_norm": 0.46893584728240967, + "learning_rate": 1.7803205494657652e-05, + "loss": 0.08195340633392334, + "step": 4079 + }, + { + "epoch": 0.5685222601546714, + "grad_norm": 0.4833480417728424, + "learning_rate": 1.7793835644950373e-05, + "loss": 0.07451057434082031, + "step": 4080 + }, + { + "epoch": 0.5686616038458859, + "grad_norm": 1.0034250020980835, + "learning_rate": 1.7784466285402445e-05, + "loss": 0.1057138442993164, + "step": 4081 + }, + { + "epoch": 0.5688009475371003, + "grad_norm": 0.45201772451400757, + "learning_rate": 1.777509741809553e-05, + "loss": 0.08274650573730469, + "step": 4082 + }, + { + "epoch": 0.5689402912283147, + "grad_norm": 0.3625153601169586, + "learning_rate": 1.7765729045111177e-05, + "loss": 0.08303451538085938, + "step": 4083 + }, + { + "epoch": 0.5690796349195291, + "grad_norm": 0.5890111923217773, + "learning_rate": 1.775636116853081e-05, + "loss": 0.10124015808105469, + "step": 4084 + }, + { + "epoch": 0.5692189786107434, + "grad_norm": 0.6010305881500244, + "learning_rate": 1.7746993790435777e-05, + "loss": 0.10165786743164062, + "step": 4085 + }, + { + "epoch": 0.5693583223019578, + "grad_norm": 0.4978436529636383, + "learning_rate": 1.773762691290728e-05, + "loss": 0.0810089111328125, + "step": 4086 + }, + { + "epoch": 0.5694976659931722, + "grad_norm": 0.4358383119106293, + "learning_rate": 1.7728260538026432e-05, + "loss": 0.07118797302246094, + "step": 4087 + }, + { + "epoch": 0.5696370096843866, + "grad_norm": 0.5812110900878906, + "learning_rate": 1.7718894667874235e-05, + "loss": 0.09799575805664062, + "step": 4088 + }, + { + "epoch": 0.569776353375601, + "grad_norm": 0.5438780784606934, + "learning_rate": 1.7709529304531567e-05, + "loss": 0.08418750762939453, + "step": 4089 + }, + { + "epoch": 0.5699156970668153, + "grad_norm": 0.4879170060157776, + "learning_rate": 1.7700164450079188e-05, + "loss": 0.07196998596191406, + "step": 4090 + }, + { + "epoch": 0.5700550407580297, + "grad_norm": 0.6351314783096313, + "learning_rate": 1.769080010659776e-05, + "loss": 0.08958053588867188, + "step": 4091 + }, + { + "epoch": 0.5701943844492441, + "grad_norm": 0.7690042853355408, + "learning_rate": 1.768143627616783e-05, + "loss": 0.08167457580566406, + "step": 4092 + }, + { + "epoch": 0.5703337281404585, + "grad_norm": 0.354000449180603, + "learning_rate": 1.7672072960869828e-05, + "loss": 0.078704833984375, + "step": 4093 + }, + { + "epoch": 0.5704730718316728, + "grad_norm": 0.44189393520355225, + "learning_rate": 1.766271016278407e-05, + "loss": 0.09004402160644531, + "step": 4094 + }, + { + "epoch": 0.5706124155228872, + "grad_norm": 0.44245707988739014, + "learning_rate": 1.7653347883990748e-05, + "loss": 0.07258415222167969, + "step": 4095 + }, + { + "epoch": 0.5707517592141016, + "grad_norm": 0.7940633296966553, + "learning_rate": 1.764398612656995e-05, + "loss": 0.07614707946777344, + "step": 4096 + }, + { + "epoch": 0.570891102905316, + "grad_norm": 0.542537271976471, + "learning_rate": 1.7634624892601647e-05, + "loss": 0.09639930725097656, + "step": 4097 + }, + { + "epoch": 0.5710304465965303, + "grad_norm": 0.4960382282733917, + "learning_rate": 1.762526418416569e-05, + "loss": 0.09727668762207031, + "step": 4098 + }, + { + "epoch": 0.5711697902877447, + "grad_norm": 0.5873609185218811, + "learning_rate": 1.7615904003341822e-05, + "loss": 0.0860443115234375, + "step": 4099 + }, + { + "epoch": 0.5713091339789591, + "grad_norm": 0.32736441493034363, + "learning_rate": 1.7606544352209644e-05, + "loss": 0.07240104675292969, + "step": 4100 + }, + { + "epoch": 0.5714484776701735, + "grad_norm": 0.6071589589118958, + "learning_rate": 1.7597185232848673e-05, + "loss": 0.08935546875, + "step": 4101 + }, + { + "epoch": 0.5715878213613879, + "grad_norm": 0.33791062235832214, + "learning_rate": 1.7587826647338285e-05, + "loss": 0.07094383239746094, + "step": 4102 + }, + { + "epoch": 0.5717271650526022, + "grad_norm": 0.4208974540233612, + "learning_rate": 1.757846859775774e-05, + "loss": 0.08786201477050781, + "step": 4103 + }, + { + "epoch": 0.5718665087438166, + "grad_norm": 0.7408671379089355, + "learning_rate": 1.7569111086186196e-05, + "loss": 0.10355377197265625, + "step": 4104 + }, + { + "epoch": 0.572005852435031, + "grad_norm": 0.421549916267395, + "learning_rate": 1.7559754114702672e-05, + "loss": 0.09059762954711914, + "step": 4105 + }, + { + "epoch": 0.5721451961262454, + "grad_norm": 0.4458145797252655, + "learning_rate": 1.755039768538607e-05, + "loss": 0.09075164794921875, + "step": 4106 + }, + { + "epoch": 0.5722845398174597, + "grad_norm": 0.47965437173843384, + "learning_rate": 1.7541041800315173e-05, + "loss": 0.09101676940917969, + "step": 4107 + }, + { + "epoch": 0.5724238835086741, + "grad_norm": 0.4535920321941376, + "learning_rate": 1.7531686461568648e-05, + "loss": 0.07931137084960938, + "step": 4108 + }, + { + "epoch": 0.5725632271998885, + "grad_norm": 0.6551328897476196, + "learning_rate": 1.752233167122504e-05, + "loss": 0.1145782470703125, + "step": 4109 + }, + { + "epoch": 0.5727025708911029, + "grad_norm": 0.4917435348033905, + "learning_rate": 1.7512977431362777e-05, + "loss": 0.08197212219238281, + "step": 4110 + }, + { + "epoch": 0.5728419145823173, + "grad_norm": 0.43892306089401245, + "learning_rate": 1.750362374406014e-05, + "loss": 0.09789371490478516, + "step": 4111 + }, + { + "epoch": 0.5729812582735316, + "grad_norm": 0.4044692814350128, + "learning_rate": 1.749427061139531e-05, + "loss": 0.074676513671875, + "step": 4112 + }, + { + "epoch": 0.573120601964746, + "grad_norm": 0.6153304576873779, + "learning_rate": 1.7484918035446352e-05, + "loss": 0.09347248077392578, + "step": 4113 + }, + { + "epoch": 0.5732599456559604, + "grad_norm": 0.5496296882629395, + "learning_rate": 1.7475566018291185e-05, + "loss": 0.11017608642578125, + "step": 4114 + }, + { + "epoch": 0.5733992893471748, + "grad_norm": 0.34209582209587097, + "learning_rate": 1.7466214562007618e-05, + "loss": 0.08791160583496094, + "step": 4115 + }, + { + "epoch": 0.5735386330383891, + "grad_norm": 0.39444196224212646, + "learning_rate": 1.745686366867332e-05, + "loss": 0.09368324279785156, + "step": 4116 + }, + { + "epoch": 0.5736779767296035, + "grad_norm": 0.33652830123901367, + "learning_rate": 1.7447513340365855e-05, + "loss": 0.08399772644042969, + "step": 4117 + }, + { + "epoch": 0.5738173204208179, + "grad_norm": 0.4277515411376953, + "learning_rate": 1.7438163579162658e-05, + "loss": 0.09148597717285156, + "step": 4118 + }, + { + "epoch": 0.5739566641120323, + "grad_norm": 0.2759608328342438, + "learning_rate": 1.7428814387141016e-05, + "loss": 0.07653141021728516, + "step": 4119 + }, + { + "epoch": 0.5740960078032467, + "grad_norm": 0.4730808436870575, + "learning_rate": 1.741946576637813e-05, + "loss": 0.10274887084960938, + "step": 4120 + }, + { + "epoch": 0.5742353514944611, + "grad_norm": 0.6499531269073486, + "learning_rate": 1.7410117718951026e-05, + "loss": 0.10560989379882812, + "step": 4121 + }, + { + "epoch": 0.5743746951856755, + "grad_norm": 0.4568560719490051, + "learning_rate": 1.740077024693664e-05, + "loss": 0.09757423400878906, + "step": 4122 + }, + { + "epoch": 0.5745140388768899, + "grad_norm": 0.4242611229419708, + "learning_rate": 1.739142335241176e-05, + "loss": 0.09071636199951172, + "step": 4123 + }, + { + "epoch": 0.5746533825681043, + "grad_norm": 0.3685426414012909, + "learning_rate": 1.7382077037453057e-05, + "loss": 0.07782268524169922, + "step": 4124 + }, + { + "epoch": 0.5747927262593187, + "grad_norm": 0.5688881278038025, + "learning_rate": 1.7372731304137072e-05, + "loss": 0.08977699279785156, + "step": 4125 + }, + { + "epoch": 0.574932069950533, + "grad_norm": 0.49836650490760803, + "learning_rate": 1.736338615454021e-05, + "loss": 0.07271194458007812, + "step": 4126 + }, + { + "epoch": 0.5750714136417474, + "grad_norm": 0.4442707300186157, + "learning_rate": 1.7354041590738747e-05, + "loss": 0.07737350463867188, + "step": 4127 + }, + { + "epoch": 0.5752107573329618, + "grad_norm": 0.3628441095352173, + "learning_rate": 1.734469761480883e-05, + "loss": 0.08031272888183594, + "step": 4128 + }, + { + "epoch": 0.5753501010241762, + "grad_norm": 0.3576217591762543, + "learning_rate": 1.733535422882649e-05, + "loss": 0.08732414245605469, + "step": 4129 + }, + { + "epoch": 0.5754894447153905, + "grad_norm": 0.7196424603462219, + "learning_rate": 1.73260114348676e-05, + "loss": 0.113037109375, + "step": 4130 + }, + { + "epoch": 0.5756287884066049, + "grad_norm": 0.3551273047924042, + "learning_rate": 1.7316669235007927e-05, + "loss": 0.07997286319732666, + "step": 4131 + }, + { + "epoch": 0.5757681320978193, + "grad_norm": 0.3790348172187805, + "learning_rate": 1.7307327631323078e-05, + "loss": 0.08164596557617188, + "step": 4132 + }, + { + "epoch": 0.5759074757890337, + "grad_norm": 0.34991633892059326, + "learning_rate": 1.7297986625888563e-05, + "loss": 0.08976554870605469, + "step": 4133 + }, + { + "epoch": 0.576046819480248, + "grad_norm": 0.2901778519153595, + "learning_rate": 1.728864622077973e-05, + "loss": 0.07468128204345703, + "step": 4134 + }, + { + "epoch": 0.5761861631714624, + "grad_norm": 0.33863043785095215, + "learning_rate": 1.72793064180718e-05, + "loss": 0.0752573013305664, + "step": 4135 + }, + { + "epoch": 0.5763255068626768, + "grad_norm": 0.4777882993221283, + "learning_rate": 1.7269967219839882e-05, + "loss": 0.09912872314453125, + "step": 4136 + }, + { + "epoch": 0.5764648505538912, + "grad_norm": 0.3598491847515106, + "learning_rate": 1.7260628628158907e-05, + "loss": 0.0778951644897461, + "step": 4137 + }, + { + "epoch": 0.5766041942451056, + "grad_norm": 0.6063092947006226, + "learning_rate": 1.7251290645103716e-05, + "loss": 0.08121109008789062, + "step": 4138 + }, + { + "epoch": 0.5767435379363199, + "grad_norm": 0.5170284509658813, + "learning_rate": 1.7241953272748987e-05, + "loss": 0.09050941467285156, + "step": 4139 + }, + { + "epoch": 0.5768828816275343, + "grad_norm": 0.4463847875595093, + "learning_rate": 1.7232616513169272e-05, + "loss": 0.08275413513183594, + "step": 4140 + }, + { + "epoch": 0.5770222253187487, + "grad_norm": 0.4464281499385834, + "learning_rate": 1.7223280368438993e-05, + "loss": 0.0855865478515625, + "step": 4141 + }, + { + "epoch": 0.5771615690099631, + "grad_norm": 0.5252760052680969, + "learning_rate": 1.7213944840632422e-05, + "loss": 0.10733175277709961, + "step": 4142 + }, + { + "epoch": 0.5773009127011774, + "grad_norm": 0.6112959384918213, + "learning_rate": 1.7204609931823702e-05, + "loss": 0.1143498420715332, + "step": 4143 + }, + { + "epoch": 0.5774402563923918, + "grad_norm": 0.36236146092414856, + "learning_rate": 1.7195275644086827e-05, + "loss": 0.07628440856933594, + "step": 4144 + }, + { + "epoch": 0.5775796000836062, + "grad_norm": 0.36368685960769653, + "learning_rate": 1.7185941979495676e-05, + "loss": 0.08197784423828125, + "step": 4145 + }, + { + "epoch": 0.5777189437748206, + "grad_norm": 0.504126250743866, + "learning_rate": 1.717660894012397e-05, + "loss": 0.0807647705078125, + "step": 4146 + }, + { + "epoch": 0.577858287466035, + "grad_norm": 0.5811656713485718, + "learning_rate": 1.7167276528045308e-05, + "loss": 0.086090087890625, + "step": 4147 + }, + { + "epoch": 0.5779976311572493, + "grad_norm": 0.6472553610801697, + "learning_rate": 1.7157944745333114e-05, + "loss": 0.10199737548828125, + "step": 4148 + }, + { + "epoch": 0.5781369748484637, + "grad_norm": 0.781489372253418, + "learning_rate": 1.714861359406072e-05, + "loss": 0.10749530792236328, + "step": 4149 + }, + { + "epoch": 0.5782763185396781, + "grad_norm": 0.5809580087661743, + "learning_rate": 1.7139283076301287e-05, + "loss": 0.11397743225097656, + "step": 4150 + }, + { + "epoch": 0.5784156622308925, + "grad_norm": 0.43551144003868103, + "learning_rate": 1.7129953194127837e-05, + "loss": 0.06860959529876709, + "step": 4151 + }, + { + "epoch": 0.5785550059221068, + "grad_norm": 0.5036539435386658, + "learning_rate": 1.712062394961328e-05, + "loss": 0.09179115295410156, + "step": 4152 + }, + { + "epoch": 0.5786943496133212, + "grad_norm": 0.4649757146835327, + "learning_rate": 1.7111295344830324e-05, + "loss": 0.07700920104980469, + "step": 4153 + }, + { + "epoch": 0.5788336933045356, + "grad_norm": 0.5757923722267151, + "learning_rate": 1.7101967381851604e-05, + "loss": 0.09709835052490234, + "step": 4154 + }, + { + "epoch": 0.57897303699575, + "grad_norm": 0.630185604095459, + "learning_rate": 1.709264006274956e-05, + "loss": 0.09353446960449219, + "step": 4155 + }, + { + "epoch": 0.5791123806869644, + "grad_norm": 0.6551588773727417, + "learning_rate": 1.7083313389596523e-05, + "loss": 0.10773849487304688, + "step": 4156 + }, + { + "epoch": 0.5792517243781787, + "grad_norm": 0.5258731842041016, + "learning_rate": 1.7073987364464664e-05, + "loss": 0.09084129333496094, + "step": 4157 + }, + { + "epoch": 0.5793910680693931, + "grad_norm": 0.3422320485115051, + "learning_rate": 1.7064661989426012e-05, + "loss": 0.07803821563720703, + "step": 4158 + }, + { + "epoch": 0.5795304117606075, + "grad_norm": 0.3879438042640686, + "learning_rate": 1.7055337266552446e-05, + "loss": 0.07891082763671875, + "step": 4159 + }, + { + "epoch": 0.5796697554518219, + "grad_norm": 0.36853256821632385, + "learning_rate": 1.704601319791571e-05, + "loss": 0.0758056640625, + "step": 4160 + }, + { + "epoch": 0.5798090991430362, + "grad_norm": 0.4299737811088562, + "learning_rate": 1.7036689785587404e-05, + "loss": 0.07748222351074219, + "step": 4161 + }, + { + "epoch": 0.5799484428342507, + "grad_norm": 0.4307395815849304, + "learning_rate": 1.7027367031638976e-05, + "loss": 0.08256912231445312, + "step": 4162 + }, + { + "epoch": 0.5800877865254651, + "grad_norm": 0.5644723773002625, + "learning_rate": 1.7018044938141728e-05, + "loss": 0.09168243408203125, + "step": 4163 + }, + { + "epoch": 0.5802271302166795, + "grad_norm": 0.3892327845096588, + "learning_rate": 1.700872350716681e-05, + "loss": 0.08499336242675781, + "step": 4164 + }, + { + "epoch": 0.5803664739078939, + "grad_norm": 0.34907740354537964, + "learning_rate": 1.6999402740785238e-05, + "loss": 0.08849143981933594, + "step": 4165 + }, + { + "epoch": 0.5805058175991082, + "grad_norm": 0.37257587909698486, + "learning_rate": 1.6990082641067876e-05, + "loss": 0.07004356384277344, + "step": 4166 + }, + { + "epoch": 0.5806451612903226, + "grad_norm": 0.5404939651489258, + "learning_rate": 1.6980763210085425e-05, + "loss": 0.09870529174804688, + "step": 4167 + }, + { + "epoch": 0.580784504981537, + "grad_norm": 0.5717820525169373, + "learning_rate": 1.6971444449908474e-05, + "loss": 0.08575439453125, + "step": 4168 + }, + { + "epoch": 0.5809238486727514, + "grad_norm": 0.6080612540245056, + "learning_rate": 1.696212636260741e-05, + "loss": 0.09522056579589844, + "step": 4169 + }, + { + "epoch": 0.5810631923639658, + "grad_norm": 0.5903803706169128, + "learning_rate": 1.6952808950252518e-05, + "loss": 0.10212516784667969, + "step": 4170 + }, + { + "epoch": 0.5812025360551801, + "grad_norm": 0.4246671497821808, + "learning_rate": 1.69434922149139e-05, + "loss": 0.09372520446777344, + "step": 4171 + }, + { + "epoch": 0.5813418797463945, + "grad_norm": 0.6402531862258911, + "learning_rate": 1.693417615866154e-05, + "loss": 0.1173095703125, + "step": 4172 + }, + { + "epoch": 0.5814812234376089, + "grad_norm": 0.4164126515388489, + "learning_rate": 1.6924860783565245e-05, + "loss": 0.08985280990600586, + "step": 4173 + }, + { + "epoch": 0.5816205671288233, + "grad_norm": 0.48621663451194763, + "learning_rate": 1.691554609169467e-05, + "loss": 0.10044431686401367, + "step": 4174 + }, + { + "epoch": 0.5817599108200376, + "grad_norm": 0.3325381577014923, + "learning_rate": 1.6906232085119342e-05, + "loss": 0.08514404296875, + "step": 4175 + }, + { + "epoch": 0.581899254511252, + "grad_norm": 0.42928779125213623, + "learning_rate": 1.6896918765908604e-05, + "loss": 0.09528350830078125, + "step": 4176 + }, + { + "epoch": 0.5820385982024664, + "grad_norm": 0.35797443985939026, + "learning_rate": 1.6887606136131673e-05, + "loss": 0.0769805908203125, + "step": 4177 + }, + { + "epoch": 0.5821779418936808, + "grad_norm": 0.3857658803462982, + "learning_rate": 1.687829419785761e-05, + "loss": 0.09619808197021484, + "step": 4178 + }, + { + "epoch": 0.5823172855848952, + "grad_norm": 0.281948447227478, + "learning_rate": 1.68689829531553e-05, + "loss": 0.07663917541503906, + "step": 4179 + }, + { + "epoch": 0.5824566292761095, + "grad_norm": 0.6317964792251587, + "learning_rate": 1.6859672404093494e-05, + "loss": 0.08382797241210938, + "step": 4180 + }, + { + "epoch": 0.5825959729673239, + "grad_norm": 0.7120321989059448, + "learning_rate": 1.6850362552740786e-05, + "loss": 0.08840274810791016, + "step": 4181 + }, + { + "epoch": 0.5827353166585383, + "grad_norm": 0.3220042288303375, + "learning_rate": 1.6841053401165614e-05, + "loss": 0.07934379577636719, + "step": 4182 + }, + { + "epoch": 0.5828746603497527, + "grad_norm": 0.4286179840564728, + "learning_rate": 1.683174495143625e-05, + "loss": 0.08567619323730469, + "step": 4183 + }, + { + "epoch": 0.583014004040967, + "grad_norm": 0.3601298928260803, + "learning_rate": 1.6822437205620834e-05, + "loss": 0.09075736999511719, + "step": 4184 + }, + { + "epoch": 0.5831533477321814, + "grad_norm": 0.32736748456954956, + "learning_rate": 1.681313016578732e-05, + "loss": 0.0657196044921875, + "step": 4185 + }, + { + "epoch": 0.5832926914233958, + "grad_norm": 0.42605799436569214, + "learning_rate": 1.680382383400353e-05, + "loss": 0.10268497467041016, + "step": 4186 + }, + { + "epoch": 0.5834320351146102, + "grad_norm": 0.5769495964050293, + "learning_rate": 1.679451821233711e-05, + "loss": 0.09508514404296875, + "step": 4187 + }, + { + "epoch": 0.5835713788058245, + "grad_norm": 0.36668670177459717, + "learning_rate": 1.6785213302855562e-05, + "loss": 0.08728981018066406, + "step": 4188 + }, + { + "epoch": 0.5837107224970389, + "grad_norm": 0.8630025386810303, + "learning_rate": 1.6775909107626227e-05, + "loss": 0.0892324447631836, + "step": 4189 + }, + { + "epoch": 0.5838500661882533, + "grad_norm": 0.36858829855918884, + "learning_rate": 1.6766605628716277e-05, + "loss": 0.07248687744140625, + "step": 4190 + }, + { + "epoch": 0.5839894098794677, + "grad_norm": 0.5380629897117615, + "learning_rate": 1.675730286819274e-05, + "loss": 0.09572601318359375, + "step": 4191 + }, + { + "epoch": 0.5841287535706821, + "grad_norm": 0.4832984209060669, + "learning_rate": 1.6748000828122465e-05, + "loss": 0.10136222839355469, + "step": 4192 + }, + { + "epoch": 0.5842680972618964, + "grad_norm": 0.4115689992904663, + "learning_rate": 1.673869951057217e-05, + "loss": 0.09781265258789062, + "step": 4193 + }, + { + "epoch": 0.5844074409531108, + "grad_norm": 0.56966233253479, + "learning_rate": 1.6729398917608387e-05, + "loss": 0.10247802734375, + "step": 4194 + }, + { + "epoch": 0.5845467846443252, + "grad_norm": 1.2714968919754028, + "learning_rate": 1.6720099051297494e-05, + "loss": 0.11930274963378906, + "step": 4195 + }, + { + "epoch": 0.5846861283355396, + "grad_norm": 0.6481845378875732, + "learning_rate": 1.6710799913705706e-05, + "loss": 0.08934783935546875, + "step": 4196 + }, + { + "epoch": 0.584825472026754, + "grad_norm": 0.6684801578521729, + "learning_rate": 1.6701501506899087e-05, + "loss": 0.0995941162109375, + "step": 4197 + }, + { + "epoch": 0.5849648157179683, + "grad_norm": 0.5722195506095886, + "learning_rate": 1.6692203832943527e-05, + "loss": 0.08437919616699219, + "step": 4198 + }, + { + "epoch": 0.5851041594091827, + "grad_norm": 0.4085668623447418, + "learning_rate": 1.6682906893904754e-05, + "loss": 0.08298873901367188, + "step": 4199 + }, + { + "epoch": 0.5852435031003971, + "grad_norm": 0.4987976551055908, + "learning_rate": 1.6673610691848346e-05, + "loss": 0.08143424987792969, + "step": 4200 + }, + { + "epoch": 0.5853828467916115, + "grad_norm": 0.3639145791530609, + "learning_rate": 1.6664315228839696e-05, + "loss": 0.08596324920654297, + "step": 4201 + }, + { + "epoch": 0.5855221904828259, + "grad_norm": 0.5220497250556946, + "learning_rate": 1.6655020506944046e-05, + "loss": 0.09870529174804688, + "step": 4202 + }, + { + "epoch": 0.5856615341740403, + "grad_norm": 0.3681846261024475, + "learning_rate": 1.664572652822647e-05, + "loss": 0.09103202819824219, + "step": 4203 + }, + { + "epoch": 0.5858008778652547, + "grad_norm": 0.7237354516983032, + "learning_rate": 1.6636433294751883e-05, + "loss": 0.11651420593261719, + "step": 4204 + }, + { + "epoch": 0.5859402215564691, + "grad_norm": 0.4051128327846527, + "learning_rate": 1.662714080858503e-05, + "loss": 0.09729957580566406, + "step": 4205 + }, + { + "epoch": 0.5860795652476835, + "grad_norm": 0.5508955121040344, + "learning_rate": 1.6617849071790484e-05, + "loss": 0.09328269958496094, + "step": 4206 + }, + { + "epoch": 0.5862189089388978, + "grad_norm": 0.5113581418991089, + "learning_rate": 1.6608558086432655e-05, + "loss": 0.09533500671386719, + "step": 4207 + }, + { + "epoch": 0.5863582526301122, + "grad_norm": 0.32103630900382996, + "learning_rate": 1.6599267854575788e-05, + "loss": 0.08532142639160156, + "step": 4208 + }, + { + "epoch": 0.5864975963213266, + "grad_norm": 0.4968346357345581, + "learning_rate": 1.6589978378283967e-05, + "loss": 0.08466720581054688, + "step": 4209 + }, + { + "epoch": 0.586636940012541, + "grad_norm": 0.9008628726005554, + "learning_rate": 1.6580689659621106e-05, + "loss": 0.11308097839355469, + "step": 4210 + }, + { + "epoch": 0.5867762837037553, + "grad_norm": 0.4897223711013794, + "learning_rate": 1.6571401700650934e-05, + "loss": 0.08689498901367188, + "step": 4211 + }, + { + "epoch": 0.5869156273949697, + "grad_norm": 0.304302841424942, + "learning_rate": 1.6562114503437017e-05, + "loss": 0.07739830017089844, + "step": 4212 + }, + { + "epoch": 0.5870549710861841, + "grad_norm": 0.42277440428733826, + "learning_rate": 1.6552828070042782e-05, + "loss": 0.08588027954101562, + "step": 4213 + }, + { + "epoch": 0.5871943147773985, + "grad_norm": 0.40320080518722534, + "learning_rate": 1.6543542402531446e-05, + "loss": 0.08659076690673828, + "step": 4214 + }, + { + "epoch": 0.5873336584686129, + "grad_norm": 0.7024858593940735, + "learning_rate": 1.6534257502966078e-05, + "loss": 0.1053314208984375, + "step": 4215 + }, + { + "epoch": 0.5874730021598272, + "grad_norm": 0.36533665657043457, + "learning_rate": 1.6524973373409563e-05, + "loss": 0.07319068908691406, + "step": 4216 + }, + { + "epoch": 0.5876123458510416, + "grad_norm": 0.6962522864341736, + "learning_rate": 1.651569001592463e-05, + "loss": 0.09855270385742188, + "step": 4217 + }, + { + "epoch": 0.587751689542256, + "grad_norm": 0.402290016412735, + "learning_rate": 1.6506407432573828e-05, + "loss": 0.08853411674499512, + "step": 4218 + }, + { + "epoch": 0.5878910332334704, + "grad_norm": 0.6580333709716797, + "learning_rate": 1.6497125625419533e-05, + "loss": 0.09057998657226562, + "step": 4219 + }, + { + "epoch": 0.5880303769246847, + "grad_norm": 0.6045043468475342, + "learning_rate": 1.6487844596523955e-05, + "loss": 0.10822772979736328, + "step": 4220 + }, + { + "epoch": 0.5881697206158991, + "grad_norm": 0.46127206087112427, + "learning_rate": 1.6478564347949127e-05, + "loss": 0.09169960021972656, + "step": 4221 + }, + { + "epoch": 0.5883090643071135, + "grad_norm": 0.3184743821620941, + "learning_rate": 1.6469284881756898e-05, + "loss": 0.07824516296386719, + "step": 4222 + }, + { + "epoch": 0.5884484079983279, + "grad_norm": 0.2930125296115875, + "learning_rate": 1.6460006200008963e-05, + "loss": 0.07738065719604492, + "step": 4223 + }, + { + "epoch": 0.5885877516895422, + "grad_norm": 0.5329018831253052, + "learning_rate": 1.645072830476683e-05, + "loss": 0.07637405395507812, + "step": 4224 + }, + { + "epoch": 0.5887270953807566, + "grad_norm": 0.6528330445289612, + "learning_rate": 1.644145119809184e-05, + "loss": 0.1099557876586914, + "step": 4225 + }, + { + "epoch": 0.588866439071971, + "grad_norm": 0.7624825835227966, + "learning_rate": 1.643217488204515e-05, + "loss": 0.08252859115600586, + "step": 4226 + }, + { + "epoch": 0.5890057827631854, + "grad_norm": 0.3408341407775879, + "learning_rate": 1.6422899358687745e-05, + "loss": 0.077301025390625, + "step": 4227 + }, + { + "epoch": 0.5891451264543998, + "grad_norm": 0.7423257827758789, + "learning_rate": 1.641362463008043e-05, + "loss": 0.0803537368774414, + "step": 4228 + }, + { + "epoch": 0.5892844701456141, + "grad_norm": 0.4709376394748688, + "learning_rate": 1.6404350698283853e-05, + "loss": 0.09550762176513672, + "step": 4229 + }, + { + "epoch": 0.5894238138368285, + "grad_norm": 0.8350943922996521, + "learning_rate": 1.6395077565358458e-05, + "loss": 0.12995529174804688, + "step": 4230 + }, + { + "epoch": 0.5895631575280429, + "grad_norm": 0.47992873191833496, + "learning_rate": 1.6385805233364528e-05, + "loss": 0.09257888793945312, + "step": 4231 + }, + { + "epoch": 0.5897025012192573, + "grad_norm": 0.4523908197879791, + "learning_rate": 1.6376533704362155e-05, + "loss": 0.0891876220703125, + "step": 4232 + }, + { + "epoch": 0.5898418449104716, + "grad_norm": 0.41858986020088196, + "learning_rate": 1.6367262980411273e-05, + "loss": 0.09838485717773438, + "step": 4233 + }, + { + "epoch": 0.589981188601686, + "grad_norm": 0.6939138174057007, + "learning_rate": 1.635799306357162e-05, + "loss": 0.10001373291015625, + "step": 4234 + }, + { + "epoch": 0.5901205322929004, + "grad_norm": 0.5092946887016296, + "learning_rate": 1.6348723955902754e-05, + "loss": 0.07869815826416016, + "step": 4235 + }, + { + "epoch": 0.5902598759841148, + "grad_norm": 0.641283392906189, + "learning_rate": 1.6339455659464073e-05, + "loss": 0.09864234924316406, + "step": 4236 + }, + { + "epoch": 0.5903992196753292, + "grad_norm": 0.5899220108985901, + "learning_rate": 1.6330188176314772e-05, + "loss": 0.09642887115478516, + "step": 4237 + }, + { + "epoch": 0.5905385633665435, + "grad_norm": 0.4179365932941437, + "learning_rate": 1.6320921508513874e-05, + "loss": 0.07998943328857422, + "step": 4238 + }, + { + "epoch": 0.5906779070577579, + "grad_norm": 0.40110525488853455, + "learning_rate": 1.6311655658120214e-05, + "loss": 0.09003591537475586, + "step": 4239 + }, + { + "epoch": 0.5908172507489723, + "grad_norm": 0.4901134967803955, + "learning_rate": 1.630239062719247e-05, + "loss": 0.09169387817382812, + "step": 4240 + }, + { + "epoch": 0.5909565944401867, + "grad_norm": 0.36193525791168213, + "learning_rate": 1.6293126417789107e-05, + "loss": 0.07851409912109375, + "step": 4241 + }, + { + "epoch": 0.5910959381314012, + "grad_norm": 0.5199028253555298, + "learning_rate": 1.628386303196843e-05, + "loss": 0.09597969055175781, + "step": 4242 + }, + { + "epoch": 0.5912352818226155, + "grad_norm": 0.4662463665008545, + "learning_rate": 1.627460047178854e-05, + "loss": 0.08762550354003906, + "step": 4243 + }, + { + "epoch": 0.5913746255138299, + "grad_norm": 0.6426933407783508, + "learning_rate": 1.6265338739307374e-05, + "loss": 0.10389900207519531, + "step": 4244 + }, + { + "epoch": 0.5915139692050443, + "grad_norm": 0.5713728666305542, + "learning_rate": 1.6256077836582677e-05, + "loss": 0.08131599426269531, + "step": 4245 + }, + { + "epoch": 0.5916533128962587, + "grad_norm": 0.6286999583244324, + "learning_rate": 1.624681776567201e-05, + "loss": 0.09122467041015625, + "step": 4246 + }, + { + "epoch": 0.591792656587473, + "grad_norm": 0.5051855444908142, + "learning_rate": 1.6237558528632754e-05, + "loss": 0.08390045166015625, + "step": 4247 + }, + { + "epoch": 0.5919320002786874, + "grad_norm": 0.42628613114356995, + "learning_rate": 1.6228300127522083e-05, + "loss": 0.08449363708496094, + "step": 4248 + }, + { + "epoch": 0.5920713439699018, + "grad_norm": 0.44804415106773376, + "learning_rate": 1.6219042564397023e-05, + "loss": 0.08080673217773438, + "step": 4249 + }, + { + "epoch": 0.5922106876611162, + "grad_norm": 0.8212116360664368, + "learning_rate": 1.620978584131438e-05, + "loss": 0.09714889526367188, + "step": 4250 + }, + { + "epoch": 0.5923500313523306, + "grad_norm": 0.4753032922744751, + "learning_rate": 1.620052996033079e-05, + "loss": 0.09211587905883789, + "step": 4251 + }, + { + "epoch": 0.5924893750435449, + "grad_norm": 0.428370863199234, + "learning_rate": 1.619127492350271e-05, + "loss": 0.07761192321777344, + "step": 4252 + }, + { + "epoch": 0.5926287187347593, + "grad_norm": 0.43237364292144775, + "learning_rate": 1.6182020732886377e-05, + "loss": 0.10213088989257812, + "step": 4253 + }, + { + "epoch": 0.5927680624259737, + "grad_norm": 0.39624452590942383, + "learning_rate": 1.6172767390537874e-05, + "loss": 0.07292652130126953, + "step": 4254 + }, + { + "epoch": 0.5929074061171881, + "grad_norm": 0.5561244487762451, + "learning_rate": 1.6163514898513076e-05, + "loss": 0.10494041442871094, + "step": 4255 + }, + { + "epoch": 0.5930467498084024, + "grad_norm": 0.4697701334953308, + "learning_rate": 1.6154263258867683e-05, + "loss": 0.07524776458740234, + "step": 4256 + }, + { + "epoch": 0.5931860934996168, + "grad_norm": 0.6904475092887878, + "learning_rate": 1.6145012473657197e-05, + "loss": 0.12211990356445312, + "step": 4257 + }, + { + "epoch": 0.5933254371908312, + "grad_norm": 0.32625502347946167, + "learning_rate": 1.613576254493693e-05, + "loss": 0.07524728775024414, + "step": 4258 + }, + { + "epoch": 0.5934647808820456, + "grad_norm": 0.43765389919281006, + "learning_rate": 1.6126513474762e-05, + "loss": 0.09572887420654297, + "step": 4259 + }, + { + "epoch": 0.59360412457326, + "grad_norm": 0.5397282242774963, + "learning_rate": 1.6117265265187337e-05, + "loss": 0.10595703125, + "step": 4260 + }, + { + "epoch": 0.5937434682644743, + "grad_norm": 0.3795608580112457, + "learning_rate": 1.6108017918267692e-05, + "loss": 0.08953285217285156, + "step": 4261 + }, + { + "epoch": 0.5938828119556887, + "grad_norm": 0.5667406916618347, + "learning_rate": 1.6098771436057613e-05, + "loss": 0.09344482421875, + "step": 4262 + }, + { + "epoch": 0.5940221556469031, + "grad_norm": 0.35895273089408875, + "learning_rate": 1.6089525820611453e-05, + "loss": 0.07699084281921387, + "step": 4263 + }, + { + "epoch": 0.5941614993381175, + "grad_norm": 0.7554701566696167, + "learning_rate": 1.6080281073983375e-05, + "loss": 0.10429000854492188, + "step": 4264 + }, + { + "epoch": 0.5943008430293318, + "grad_norm": 0.5861360430717468, + "learning_rate": 1.6071037198227353e-05, + "loss": 0.09770584106445312, + "step": 4265 + }, + { + "epoch": 0.5944401867205462, + "grad_norm": 0.5748872756958008, + "learning_rate": 1.606179419539717e-05, + "loss": 0.10071945190429688, + "step": 4266 + }, + { + "epoch": 0.5945795304117606, + "grad_norm": 0.6628357172012329, + "learning_rate": 1.60525520675464e-05, + "loss": 0.09652900695800781, + "step": 4267 + }, + { + "epoch": 0.594718874102975, + "grad_norm": 0.5141552090644836, + "learning_rate": 1.6043310816728453e-05, + "loss": 0.08684539794921875, + "step": 4268 + }, + { + "epoch": 0.5948582177941893, + "grad_norm": 0.8549497723579407, + "learning_rate": 1.6034070444996498e-05, + "loss": 0.09982872009277344, + "step": 4269 + }, + { + "epoch": 0.5949975614854037, + "grad_norm": 0.30955612659454346, + "learning_rate": 1.6024830954403547e-05, + "loss": 0.07477283477783203, + "step": 4270 + }, + { + "epoch": 0.5951369051766181, + "grad_norm": 0.28224754333496094, + "learning_rate": 1.60155923470024e-05, + "loss": 0.08225822448730469, + "step": 4271 + }, + { + "epoch": 0.5952762488678325, + "grad_norm": 0.7289025187492371, + "learning_rate": 1.6006354624845672e-05, + "loss": 0.10581398010253906, + "step": 4272 + }, + { + "epoch": 0.5954155925590469, + "grad_norm": 0.34535107016563416, + "learning_rate": 1.5997117789985776e-05, + "loss": 0.0826263427734375, + "step": 4273 + }, + { + "epoch": 0.5955549362502612, + "grad_norm": 0.31299877166748047, + "learning_rate": 1.5987881844474916e-05, + "loss": 0.0789041519165039, + "step": 4274 + }, + { + "epoch": 0.5956942799414756, + "grad_norm": 0.46027547121047974, + "learning_rate": 1.597864679036511e-05, + "loss": 0.08938789367675781, + "step": 4275 + }, + { + "epoch": 0.59583362363269, + "grad_norm": 0.5807428359985352, + "learning_rate": 1.5969412629708175e-05, + "loss": 0.09460067749023438, + "step": 4276 + }, + { + "epoch": 0.5959729673239044, + "grad_norm": 0.5122325420379639, + "learning_rate": 1.5960179364555736e-05, + "loss": 0.09899330139160156, + "step": 4277 + }, + { + "epoch": 0.5961123110151187, + "grad_norm": 0.5406933426856995, + "learning_rate": 1.5950946996959215e-05, + "loss": 0.11636543273925781, + "step": 4278 + }, + { + "epoch": 0.5962516547063331, + "grad_norm": 0.40240830183029175, + "learning_rate": 1.594171552896983e-05, + "loss": 0.0969083309173584, + "step": 4279 + }, + { + "epoch": 0.5963909983975475, + "grad_norm": 0.43527093529701233, + "learning_rate": 1.5932484962638596e-05, + "loss": 0.09745502471923828, + "step": 4280 + }, + { + "epoch": 0.5965303420887619, + "grad_norm": 0.7289873957633972, + "learning_rate": 1.5923255300016343e-05, + "loss": 0.10216140747070312, + "step": 4281 + }, + { + "epoch": 0.5966696857799764, + "grad_norm": 0.4373548626899719, + "learning_rate": 1.5914026543153692e-05, + "loss": 0.09338760375976562, + "step": 4282 + }, + { + "epoch": 0.5968090294711907, + "grad_norm": 0.4189755916595459, + "learning_rate": 1.5904798694101052e-05, + "loss": 0.09575653076171875, + "step": 4283 + }, + { + "epoch": 0.5969483731624051, + "grad_norm": 0.4911472499370575, + "learning_rate": 1.5895571754908656e-05, + "loss": 0.09208488464355469, + "step": 4284 + }, + { + "epoch": 0.5970877168536195, + "grad_norm": 0.44053131341934204, + "learning_rate": 1.5886345727626506e-05, + "loss": 0.08357906341552734, + "step": 4285 + }, + { + "epoch": 0.5972270605448339, + "grad_norm": 0.37428534030914307, + "learning_rate": 1.587712061430442e-05, + "loss": 0.08483266830444336, + "step": 4286 + }, + { + "epoch": 0.5973664042360483, + "grad_norm": 0.4775887131690979, + "learning_rate": 1.5867896416992002e-05, + "loss": 0.08924102783203125, + "step": 4287 + }, + { + "epoch": 0.5975057479272626, + "grad_norm": 0.4304426908493042, + "learning_rate": 1.5858673137738664e-05, + "loss": 0.09410858154296875, + "step": 4288 + }, + { + "epoch": 0.597645091618477, + "grad_norm": 0.43928590416908264, + "learning_rate": 1.5849450778593615e-05, + "loss": 0.08529424667358398, + "step": 4289 + }, + { + "epoch": 0.5977844353096914, + "grad_norm": 0.4520960748195648, + "learning_rate": 1.5840229341605837e-05, + "loss": 0.08745002746582031, + "step": 4290 + }, + { + "epoch": 0.5979237790009058, + "grad_norm": 0.6489978432655334, + "learning_rate": 1.5831008828824134e-05, + "loss": 0.10941123962402344, + "step": 4291 + }, + { + "epoch": 0.5980631226921201, + "grad_norm": 0.4049092233181, + "learning_rate": 1.582178924229708e-05, + "loss": 0.10071659088134766, + "step": 4292 + }, + { + "epoch": 0.5982024663833345, + "grad_norm": 0.44184044003486633, + "learning_rate": 1.5812570584073076e-05, + "loss": 0.08213090896606445, + "step": 4293 + }, + { + "epoch": 0.5983418100745489, + "grad_norm": 0.330539345741272, + "learning_rate": 1.580335285620028e-05, + "loss": 0.09272956848144531, + "step": 4294 + }, + { + "epoch": 0.5984811537657633, + "grad_norm": 0.4492928683757782, + "learning_rate": 1.5794136060726682e-05, + "loss": 0.1010589599609375, + "step": 4295 + }, + { + "epoch": 0.5986204974569777, + "grad_norm": 0.5034825801849365, + "learning_rate": 1.5784920199700015e-05, + "loss": 0.09301567077636719, + "step": 4296 + }, + { + "epoch": 0.598759841148192, + "grad_norm": 0.4567437469959259, + "learning_rate": 1.5775705275167854e-05, + "loss": 0.09641170501708984, + "step": 4297 + }, + { + "epoch": 0.5988991848394064, + "grad_norm": 0.40427276492118835, + "learning_rate": 1.576649128917754e-05, + "loss": 0.08669090270996094, + "step": 4298 + }, + { + "epoch": 0.5990385285306208, + "grad_norm": 0.37781208753585815, + "learning_rate": 1.5757278243776203e-05, + "loss": 0.08764362335205078, + "step": 4299 + }, + { + "epoch": 0.5991778722218352, + "grad_norm": 0.2829378545284271, + "learning_rate": 1.5748066141010785e-05, + "loss": 0.06861305236816406, + "step": 4300 + }, + { + "epoch": 0.5993172159130495, + "grad_norm": 0.333654522895813, + "learning_rate": 1.5738854982927993e-05, + "loss": 0.082061767578125, + "step": 4301 + }, + { + "epoch": 0.5994565596042639, + "grad_norm": 0.5933333039283752, + "learning_rate": 1.5729644771574343e-05, + "loss": 0.09534072875976562, + "step": 4302 + }, + { + "epoch": 0.5995959032954783, + "grad_norm": 0.6097525358200073, + "learning_rate": 1.572043550899612e-05, + "loss": 0.08976078033447266, + "step": 4303 + }, + { + "epoch": 0.5997352469866927, + "grad_norm": 0.3395460247993469, + "learning_rate": 1.5711227197239435e-05, + "loss": 0.08432388305664062, + "step": 4304 + }, + { + "epoch": 0.599874590677907, + "grad_norm": 0.3357216715812683, + "learning_rate": 1.5702019838350153e-05, + "loss": 0.08263969421386719, + "step": 4305 + }, + { + "epoch": 0.6000139343691214, + "grad_norm": 0.4308425486087799, + "learning_rate": 1.5692813434373934e-05, + "loss": 0.09267139434814453, + "step": 4306 + }, + { + "epoch": 0.6001532780603358, + "grad_norm": 0.38832974433898926, + "learning_rate": 1.5683607987356236e-05, + "loss": 0.08647823333740234, + "step": 4307 + }, + { + "epoch": 0.6002926217515502, + "grad_norm": 0.5447728633880615, + "learning_rate": 1.5674403499342292e-05, + "loss": 0.10590314865112305, + "step": 4308 + }, + { + "epoch": 0.6004319654427646, + "grad_norm": 0.4216476380825043, + "learning_rate": 1.566519997237714e-05, + "loss": 0.08045673370361328, + "step": 4309 + }, + { + "epoch": 0.6005713091339789, + "grad_norm": 0.45460644364356995, + "learning_rate": 1.5655997408505595e-05, + "loss": 0.07049560546875, + "step": 4310 + }, + { + "epoch": 0.6007106528251933, + "grad_norm": 0.6487840414047241, + "learning_rate": 1.5646795809772246e-05, + "loss": 0.09601211547851562, + "step": 4311 + }, + { + "epoch": 0.6008499965164077, + "grad_norm": 0.42527836561203003, + "learning_rate": 1.563759517822148e-05, + "loss": 0.08262062072753906, + "step": 4312 + }, + { + "epoch": 0.6009893402076221, + "grad_norm": 0.4264076054096222, + "learning_rate": 1.562839551589747e-05, + "loss": 0.08202552795410156, + "step": 4313 + }, + { + "epoch": 0.6011286838988364, + "grad_norm": 0.46605825424194336, + "learning_rate": 1.5619196824844174e-05, + "loss": 0.1017913818359375, + "step": 4314 + }, + { + "epoch": 0.6012680275900508, + "grad_norm": 0.3580735921859741, + "learning_rate": 1.5609999107105322e-05, + "loss": 0.08504867553710938, + "step": 4315 + }, + { + "epoch": 0.6014073712812652, + "grad_norm": 0.4196797013282776, + "learning_rate": 1.5600802364724456e-05, + "loss": 0.09925079345703125, + "step": 4316 + }, + { + "epoch": 0.6015467149724796, + "grad_norm": 0.312677800655365, + "learning_rate": 1.559160659974486e-05, + "loss": 0.06736373901367188, + "step": 4317 + }, + { + "epoch": 0.601686058663694, + "grad_norm": 0.33112943172454834, + "learning_rate": 1.5582411814209633e-05, + "loss": 0.07274532318115234, + "step": 4318 + }, + { + "epoch": 0.6018254023549083, + "grad_norm": 0.41685375571250916, + "learning_rate": 1.5573218010161642e-05, + "loss": 0.09782218933105469, + "step": 4319 + }, + { + "epoch": 0.6019647460461227, + "grad_norm": 0.5263218879699707, + "learning_rate": 1.556402518964355e-05, + "loss": 0.08154678344726562, + "step": 4320 + }, + { + "epoch": 0.6021040897373371, + "grad_norm": 0.38316217064857483, + "learning_rate": 1.5554833354697787e-05, + "loss": 0.08935546875, + "step": 4321 + }, + { + "epoch": 0.6022434334285516, + "grad_norm": 0.4328362047672272, + "learning_rate": 1.5545642507366566e-05, + "loss": 0.09283828735351562, + "step": 4322 + }, + { + "epoch": 0.602382777119766, + "grad_norm": 0.3548051118850708, + "learning_rate": 1.5536452649691884e-05, + "loss": 0.08063888549804688, + "step": 4323 + }, + { + "epoch": 0.6025221208109803, + "grad_norm": 0.4427885413169861, + "learning_rate": 1.5527263783715515e-05, + "loss": 0.08179855346679688, + "step": 4324 + }, + { + "epoch": 0.6026614645021947, + "grad_norm": 0.7794479727745056, + "learning_rate": 1.551807591147902e-05, + "loss": 0.102569580078125, + "step": 4325 + }, + { + "epoch": 0.6028008081934091, + "grad_norm": 0.5385928750038147, + "learning_rate": 1.5508889035023738e-05, + "loss": 0.09138298034667969, + "step": 4326 + }, + { + "epoch": 0.6029401518846235, + "grad_norm": 0.474816232919693, + "learning_rate": 1.549970315639078e-05, + "loss": 0.09482955932617188, + "step": 4327 + }, + { + "epoch": 0.6030794955758378, + "grad_norm": 0.6967283487319946, + "learning_rate": 1.5490518277621028e-05, + "loss": 0.09765625, + "step": 4328 + }, + { + "epoch": 0.6032188392670522, + "grad_norm": 0.40661489963531494, + "learning_rate": 1.5481334400755167e-05, + "loss": 0.0853424072265625, + "step": 4329 + }, + { + "epoch": 0.6033581829582666, + "grad_norm": 0.5682750940322876, + "learning_rate": 1.547215152783364e-05, + "loss": 0.10231208801269531, + "step": 4330 + }, + { + "epoch": 0.603497526649481, + "grad_norm": 0.5921511054039001, + "learning_rate": 1.5462969660896677e-05, + "loss": 0.1125640869140625, + "step": 4331 + }, + { + "epoch": 0.6036368703406954, + "grad_norm": 0.6482626795768738, + "learning_rate": 1.545378880198426e-05, + "loss": 0.11110496520996094, + "step": 4332 + }, + { + "epoch": 0.6037762140319097, + "grad_norm": 0.5294292569160461, + "learning_rate": 1.544460895313619e-05, + "loss": 0.08251190185546875, + "step": 4333 + }, + { + "epoch": 0.6039155577231241, + "grad_norm": 0.4900476336479187, + "learning_rate": 1.5435430116392003e-05, + "loss": 0.09024810791015625, + "step": 4334 + }, + { + "epoch": 0.6040549014143385, + "grad_norm": 0.4689769744873047, + "learning_rate": 1.542625229379103e-05, + "loss": 0.09596824645996094, + "step": 4335 + }, + { + "epoch": 0.6041942451055529, + "grad_norm": 0.44141486287117004, + "learning_rate": 1.5417075487372384e-05, + "loss": 0.10376167297363281, + "step": 4336 + }, + { + "epoch": 0.6043335887967672, + "grad_norm": 0.693385124206543, + "learning_rate": 1.5407899699174936e-05, + "loss": 0.10559654235839844, + "step": 4337 + }, + { + "epoch": 0.6044729324879816, + "grad_norm": 0.5303184390068054, + "learning_rate": 1.5398724931237334e-05, + "loss": 0.09731101989746094, + "step": 4338 + }, + { + "epoch": 0.604612276179196, + "grad_norm": 0.38057681918144226, + "learning_rate": 1.5389551185598e-05, + "loss": 0.09008407592773438, + "step": 4339 + }, + { + "epoch": 0.6047516198704104, + "grad_norm": 0.322321355342865, + "learning_rate": 1.5380378464295133e-05, + "loss": 0.07313346862792969, + "step": 4340 + }, + { + "epoch": 0.6048909635616248, + "grad_norm": 0.32284215092658997, + "learning_rate": 1.537120676936671e-05, + "loss": 0.06865310668945312, + "step": 4341 + }, + { + "epoch": 0.6050303072528391, + "grad_norm": 0.3937086760997772, + "learning_rate": 1.5362036102850465e-05, + "loss": 0.07859992980957031, + "step": 4342 + }, + { + "epoch": 0.6051696509440535, + "grad_norm": 0.4401264786720276, + "learning_rate": 1.535286646678391e-05, + "loss": 0.09062767028808594, + "step": 4343 + }, + { + "epoch": 0.6053089946352679, + "grad_norm": 0.3379107415676117, + "learning_rate": 1.5343697863204323e-05, + "loss": 0.08278465270996094, + "step": 4344 + }, + { + "epoch": 0.6054483383264823, + "grad_norm": 0.5662685632705688, + "learning_rate": 1.533453029414877e-05, + "loss": 0.1298656463623047, + "step": 4345 + }, + { + "epoch": 0.6055876820176966, + "grad_norm": 0.3649745583534241, + "learning_rate": 1.5325363761654075e-05, + "loss": 0.0875387191772461, + "step": 4346 + }, + { + "epoch": 0.605727025708911, + "grad_norm": 0.6496816873550415, + "learning_rate": 1.5316198267756834e-05, + "loss": 0.09914779663085938, + "step": 4347 + }, + { + "epoch": 0.6058663694001254, + "grad_norm": 0.7972959876060486, + "learning_rate": 1.5307033814493392e-05, + "loss": 0.12493133544921875, + "step": 4348 + }, + { + "epoch": 0.6060057130913398, + "grad_norm": 0.4698571562767029, + "learning_rate": 1.5297870403899898e-05, + "loss": 0.10098075866699219, + "step": 4349 + }, + { + "epoch": 0.6061450567825541, + "grad_norm": 0.3449389338493347, + "learning_rate": 1.528870803801225e-05, + "loss": 0.07189369201660156, + "step": 4350 + }, + { + "epoch": 0.6062844004737685, + "grad_norm": 0.5436161160469055, + "learning_rate": 1.5279546718866113e-05, + "loss": 0.08678817749023438, + "step": 4351 + }, + { + "epoch": 0.6064237441649829, + "grad_norm": 0.3170844316482544, + "learning_rate": 1.5270386448496926e-05, + "loss": 0.06845378875732422, + "step": 4352 + }, + { + "epoch": 0.6065630878561973, + "grad_norm": 0.5241167545318604, + "learning_rate": 1.5261227228939896e-05, + "loss": 0.11334609985351562, + "step": 4353 + }, + { + "epoch": 0.6067024315474117, + "grad_norm": 0.40496253967285156, + "learning_rate": 1.5252069062229985e-05, + "loss": 0.09139060974121094, + "step": 4354 + }, + { + "epoch": 0.606841775238626, + "grad_norm": 0.5199905037879944, + "learning_rate": 1.5242911950401929e-05, + "loss": 0.08851242065429688, + "step": 4355 + }, + { + "epoch": 0.6069811189298404, + "grad_norm": 0.5239295363426208, + "learning_rate": 1.5233755895490232e-05, + "loss": 0.08280181884765625, + "step": 4356 + }, + { + "epoch": 0.6071204626210548, + "grad_norm": 0.34253889322280884, + "learning_rate": 1.522460089952916e-05, + "loss": 0.0868682861328125, + "step": 4357 + }, + { + "epoch": 0.6072598063122692, + "grad_norm": 0.4954981207847595, + "learning_rate": 1.521544696455275e-05, + "loss": 0.08172035217285156, + "step": 4358 + }, + { + "epoch": 0.6073991500034835, + "grad_norm": 0.44847506284713745, + "learning_rate": 1.520629409259479e-05, + "loss": 0.09559249877929688, + "step": 4359 + }, + { + "epoch": 0.6075384936946979, + "grad_norm": 0.36361604928970337, + "learning_rate": 1.5197142285688831e-05, + "loss": 0.0835714340209961, + "step": 4360 + }, + { + "epoch": 0.6076778373859123, + "grad_norm": 0.6710972189903259, + "learning_rate": 1.518799154586821e-05, + "loss": 0.099578857421875, + "step": 4361 + }, + { + "epoch": 0.6078171810771267, + "grad_norm": 0.6074786186218262, + "learning_rate": 1.5178841875166008e-05, + "loss": 0.09278106689453125, + "step": 4362 + }, + { + "epoch": 0.6079565247683412, + "grad_norm": 0.4814873933792114, + "learning_rate": 1.5169693275615079e-05, + "loss": 0.0791778564453125, + "step": 4363 + }, + { + "epoch": 0.6080958684595555, + "grad_norm": 0.6024544835090637, + "learning_rate": 1.5160545749248014e-05, + "loss": 0.09933662414550781, + "step": 4364 + }, + { + "epoch": 0.6082352121507699, + "grad_norm": 0.5104150176048279, + "learning_rate": 1.5151399298097204e-05, + "loss": 0.08344554901123047, + "step": 4365 + }, + { + "epoch": 0.6083745558419843, + "grad_norm": 0.3751945495605469, + "learning_rate": 1.5142253924194774e-05, + "loss": 0.08081817626953125, + "step": 4366 + }, + { + "epoch": 0.6085138995331987, + "grad_norm": 0.5127018690109253, + "learning_rate": 1.5133109629572614e-05, + "loss": 0.09491491317749023, + "step": 4367 + }, + { + "epoch": 0.608653243224413, + "grad_norm": 0.3190455436706543, + "learning_rate": 1.5123966416262392e-05, + "loss": 0.07256436347961426, + "step": 4368 + }, + { + "epoch": 0.6087925869156274, + "grad_norm": 0.4870467782020569, + "learning_rate": 1.51148242862955e-05, + "loss": 0.08829116821289062, + "step": 4369 + }, + { + "epoch": 0.6089319306068418, + "grad_norm": 0.4539022147655487, + "learning_rate": 1.5105683241703123e-05, + "loss": 0.10619544982910156, + "step": 4370 + }, + { + "epoch": 0.6090712742980562, + "grad_norm": 0.6138098239898682, + "learning_rate": 1.5096543284516188e-05, + "loss": 0.09449195861816406, + "step": 4371 + }, + { + "epoch": 0.6092106179892706, + "grad_norm": 0.3306792378425598, + "learning_rate": 1.5087404416765392e-05, + "loss": 0.07597541809082031, + "step": 4372 + }, + { + "epoch": 0.6093499616804849, + "grad_norm": 0.45789843797683716, + "learning_rate": 1.5078266640481178e-05, + "loss": 0.08342361450195312, + "step": 4373 + }, + { + "epoch": 0.6094893053716993, + "grad_norm": 0.42192134261131287, + "learning_rate": 1.5069129957693755e-05, + "loss": 0.08091926574707031, + "step": 4374 + }, + { + "epoch": 0.6096286490629137, + "grad_norm": 0.27014026045799255, + "learning_rate": 1.5059994370433078e-05, + "loss": 0.0751791000366211, + "step": 4375 + }, + { + "epoch": 0.6097679927541281, + "grad_norm": 0.44448035955429077, + "learning_rate": 1.5050859880728865e-05, + "loss": 0.08721923828125, + "step": 4376 + }, + { + "epoch": 0.6099073364453425, + "grad_norm": 0.42383864521980286, + "learning_rate": 1.50417264906106e-05, + "loss": 0.09196662902832031, + "step": 4377 + }, + { + "epoch": 0.6100466801365568, + "grad_norm": 0.5203892588615417, + "learning_rate": 1.5032594202107509e-05, + "loss": 0.0965108871459961, + "step": 4378 + }, + { + "epoch": 0.6101860238277712, + "grad_norm": 0.5571521520614624, + "learning_rate": 1.5023463017248582e-05, + "loss": 0.08015823364257812, + "step": 4379 + }, + { + "epoch": 0.6103253675189856, + "grad_norm": 0.3954719007015228, + "learning_rate": 1.501433293806255e-05, + "loss": 0.07181072235107422, + "step": 4380 + }, + { + "epoch": 0.6104647112102, + "grad_norm": 0.755531907081604, + "learning_rate": 1.5005203966577919e-05, + "loss": 0.13747692108154297, + "step": 4381 + }, + { + "epoch": 0.6106040549014143, + "grad_norm": 0.39293602108955383, + "learning_rate": 1.4996076104822929e-05, + "loss": 0.07456207275390625, + "step": 4382 + }, + { + "epoch": 0.6107433985926287, + "grad_norm": 0.4849872291088104, + "learning_rate": 1.498694935482559e-05, + "loss": 0.0839996337890625, + "step": 4383 + }, + { + "epoch": 0.6108827422838431, + "grad_norm": 0.3508874177932739, + "learning_rate": 1.4977823718613657e-05, + "loss": 0.0843820571899414, + "step": 4384 + }, + { + "epoch": 0.6110220859750575, + "grad_norm": 0.4649898409843445, + "learning_rate": 1.4968699198214634e-05, + "loss": 0.09135055541992188, + "step": 4385 + }, + { + "epoch": 0.6111614296662718, + "grad_norm": 0.4128688871860504, + "learning_rate": 1.495957579565578e-05, + "loss": 0.08603286743164062, + "step": 4386 + }, + { + "epoch": 0.6113007733574862, + "grad_norm": 0.4544171094894409, + "learning_rate": 1.495045351296411e-05, + "loss": 0.08951663970947266, + "step": 4387 + }, + { + "epoch": 0.6114401170487006, + "grad_norm": 0.476219117641449, + "learning_rate": 1.4941332352166385e-05, + "loss": 0.08294677734375, + "step": 4388 + }, + { + "epoch": 0.611579460739915, + "grad_norm": 0.5389524698257446, + "learning_rate": 1.4932212315289123e-05, + "loss": 0.10058212280273438, + "step": 4389 + }, + { + "epoch": 0.6117188044311294, + "grad_norm": 0.5569494962692261, + "learning_rate": 1.4923093404358588e-05, + "loss": 0.07723617553710938, + "step": 4390 + }, + { + "epoch": 0.6118581481223437, + "grad_norm": 0.5259640216827393, + "learning_rate": 1.4913975621400787e-05, + "loss": 0.09177589416503906, + "step": 4391 + }, + { + "epoch": 0.6119974918135581, + "grad_norm": 0.7622935771942139, + "learning_rate": 1.4904858968441485e-05, + "loss": 0.09806251525878906, + "step": 4392 + }, + { + "epoch": 0.6121368355047725, + "grad_norm": 0.4114643931388855, + "learning_rate": 1.4895743447506196e-05, + "loss": 0.09537029266357422, + "step": 4393 + }, + { + "epoch": 0.6122761791959869, + "grad_norm": 0.6361806988716125, + "learning_rate": 1.4886629060620181e-05, + "loss": 0.10441207885742188, + "step": 4394 + }, + { + "epoch": 0.6124155228872012, + "grad_norm": 0.3229290246963501, + "learning_rate": 1.4877515809808459e-05, + "loss": 0.06705188751220703, + "step": 4395 + }, + { + "epoch": 0.6125548665784156, + "grad_norm": 0.3499566912651062, + "learning_rate": 1.4868403697095764e-05, + "loss": 0.0762176513671875, + "step": 4396 + }, + { + "epoch": 0.61269421026963, + "grad_norm": 0.3780028223991394, + "learning_rate": 1.4859292724506613e-05, + "loss": 0.10133171081542969, + "step": 4397 + }, + { + "epoch": 0.6128335539608444, + "grad_norm": 0.631094753742218, + "learning_rate": 1.4850182894065258e-05, + "loss": 0.091156005859375, + "step": 4398 + }, + { + "epoch": 0.6129728976520588, + "grad_norm": 0.6815646886825562, + "learning_rate": 1.4841074207795684e-05, + "loss": 0.10101509094238281, + "step": 4399 + }, + { + "epoch": 0.6131122413432731, + "grad_norm": 0.5502398610115051, + "learning_rate": 1.483196666772165e-05, + "loss": 0.0879669189453125, + "step": 4400 + }, + { + "epoch": 0.6132515850344875, + "grad_norm": 0.5953850150108337, + "learning_rate": 1.482286027586663e-05, + "loss": 0.08375465869903564, + "step": 4401 + }, + { + "epoch": 0.6133909287257019, + "grad_norm": 0.4005519151687622, + "learning_rate": 1.4813755034253862e-05, + "loss": 0.07425689697265625, + "step": 4402 + }, + { + "epoch": 0.6135302724169164, + "grad_norm": 0.43426942825317383, + "learning_rate": 1.4804650944906316e-05, + "loss": 0.08553123474121094, + "step": 4403 + }, + { + "epoch": 0.6136696161081308, + "grad_norm": 0.43814972043037415, + "learning_rate": 1.4795548009846723e-05, + "loss": 0.08447456359863281, + "step": 4404 + }, + { + "epoch": 0.6138089597993451, + "grad_norm": 0.6006798148155212, + "learning_rate": 1.4786446231097546e-05, + "loss": 0.106170654296875, + "step": 4405 + }, + { + "epoch": 0.6139483034905595, + "grad_norm": 0.4009344279766083, + "learning_rate": 1.4777345610680987e-05, + "loss": 0.09085845947265625, + "step": 4406 + }, + { + "epoch": 0.6140876471817739, + "grad_norm": 0.299641489982605, + "learning_rate": 1.4768246150618995e-05, + "loss": 0.08453083038330078, + "step": 4407 + }, + { + "epoch": 0.6142269908729883, + "grad_norm": 0.6586974859237671, + "learning_rate": 1.4759147852933263e-05, + "loss": 0.12646484375, + "step": 4408 + }, + { + "epoch": 0.6143663345642026, + "grad_norm": 0.3263009786605835, + "learning_rate": 1.4750050719645227e-05, + "loss": 0.08183860778808594, + "step": 4409 + }, + { + "epoch": 0.614505678255417, + "grad_norm": 0.4397795498371124, + "learning_rate": 1.4740954752776064e-05, + "loss": 0.09541893005371094, + "step": 4410 + }, + { + "epoch": 0.6146450219466314, + "grad_norm": 0.5777227878570557, + "learning_rate": 1.47318599543467e-05, + "loss": 0.09882068634033203, + "step": 4411 + }, + { + "epoch": 0.6147843656378458, + "grad_norm": 0.7973178029060364, + "learning_rate": 1.4722766326377769e-05, + "loss": 0.11433029174804688, + "step": 4412 + }, + { + "epoch": 0.6149237093290602, + "grad_norm": 0.30807486176490784, + "learning_rate": 1.4713673870889682e-05, + "loss": 0.07827496528625488, + "step": 4413 + }, + { + "epoch": 0.6150630530202745, + "grad_norm": 0.34693291783332825, + "learning_rate": 1.4704582589902571e-05, + "loss": 0.07625198364257812, + "step": 4414 + }, + { + "epoch": 0.6152023967114889, + "grad_norm": 0.5173124074935913, + "learning_rate": 1.4695492485436308e-05, + "loss": 0.0933837890625, + "step": 4415 + }, + { + "epoch": 0.6153417404027033, + "grad_norm": 0.428061306476593, + "learning_rate": 1.4686403559510522e-05, + "loss": 0.09287357330322266, + "step": 4416 + }, + { + "epoch": 0.6154810840939177, + "grad_norm": 0.4411545395851135, + "learning_rate": 1.4677315814144549e-05, + "loss": 0.09669303894042969, + "step": 4417 + }, + { + "epoch": 0.615620427785132, + "grad_norm": 0.4492502510547638, + "learning_rate": 1.4668229251357482e-05, + "loss": 0.09242057800292969, + "step": 4418 + }, + { + "epoch": 0.6157597714763464, + "grad_norm": 0.533232569694519, + "learning_rate": 1.4659143873168145e-05, + "loss": 0.08636283874511719, + "step": 4419 + }, + { + "epoch": 0.6158991151675608, + "grad_norm": 0.7099279165267944, + "learning_rate": 1.4650059681595109e-05, + "loss": 0.11150550842285156, + "step": 4420 + }, + { + "epoch": 0.6160384588587752, + "grad_norm": 0.37659889459609985, + "learning_rate": 1.4640976678656674e-05, + "loss": 0.08949470520019531, + "step": 4421 + }, + { + "epoch": 0.6161778025499896, + "grad_norm": 0.5112534165382385, + "learning_rate": 1.463189486637087e-05, + "loss": 0.10235786437988281, + "step": 4422 + }, + { + "epoch": 0.6163171462412039, + "grad_norm": 0.548365592956543, + "learning_rate": 1.4622814246755468e-05, + "loss": 0.08872795104980469, + "step": 4423 + }, + { + "epoch": 0.6164564899324183, + "grad_norm": 0.6633404493331909, + "learning_rate": 1.4613734821827976e-05, + "loss": 0.10908699035644531, + "step": 4424 + }, + { + "epoch": 0.6165958336236327, + "grad_norm": 0.38173216581344604, + "learning_rate": 1.4604656593605637e-05, + "loss": 0.08480453491210938, + "step": 4425 + }, + { + "epoch": 0.6167351773148471, + "grad_norm": 0.3343430757522583, + "learning_rate": 1.4595579564105432e-05, + "loss": 0.07572650909423828, + "step": 4426 + }, + { + "epoch": 0.6168745210060614, + "grad_norm": 0.6159416437149048, + "learning_rate": 1.458650373534406e-05, + "loss": 0.08988809585571289, + "step": 4427 + }, + { + "epoch": 0.6170138646972758, + "grad_norm": 0.45072686672210693, + "learning_rate": 1.457742910933796e-05, + "loss": 0.07847213745117188, + "step": 4428 + }, + { + "epoch": 0.6171532083884902, + "grad_norm": 0.760272741317749, + "learning_rate": 1.4568355688103318e-05, + "loss": 0.11523723602294922, + "step": 4429 + }, + { + "epoch": 0.6172925520797046, + "grad_norm": 0.28444910049438477, + "learning_rate": 1.4559283473656031e-05, + "loss": 0.08106231689453125, + "step": 4430 + }, + { + "epoch": 0.617431895770919, + "grad_norm": 0.5238324999809265, + "learning_rate": 1.4550212468011742e-05, + "loss": 0.10854339599609375, + "step": 4431 + }, + { + "epoch": 0.6175712394621333, + "grad_norm": 0.5808846950531006, + "learning_rate": 1.454114267318583e-05, + "loss": 0.10589408874511719, + "step": 4432 + }, + { + "epoch": 0.6177105831533477, + "grad_norm": 0.37198713421821594, + "learning_rate": 1.4532074091193385e-05, + "loss": 0.0775146484375, + "step": 4433 + }, + { + "epoch": 0.6178499268445621, + "grad_norm": 0.5099762082099915, + "learning_rate": 1.4523006724049238e-05, + "loss": 0.09353065490722656, + "step": 4434 + }, + { + "epoch": 0.6179892705357765, + "grad_norm": 0.30511561036109924, + "learning_rate": 1.4513940573767955e-05, + "loss": 0.06302547454833984, + "step": 4435 + }, + { + "epoch": 0.6181286142269908, + "grad_norm": 0.3193608522415161, + "learning_rate": 1.450487564236383e-05, + "loss": 0.0826120376586914, + "step": 4436 + }, + { + "epoch": 0.6182679579182052, + "grad_norm": 0.5943095684051514, + "learning_rate": 1.4495811931850886e-05, + "loss": 0.11620330810546875, + "step": 4437 + }, + { + "epoch": 0.6184073016094196, + "grad_norm": 0.43876248598098755, + "learning_rate": 1.4486749444242862e-05, + "loss": 0.1007843017578125, + "step": 4438 + }, + { + "epoch": 0.618546645300634, + "grad_norm": 0.5506213903427124, + "learning_rate": 1.447768818155324e-05, + "loss": 0.10116386413574219, + "step": 4439 + }, + { + "epoch": 0.6186859889918483, + "grad_norm": 0.41697922348976135, + "learning_rate": 1.446862814579523e-05, + "loss": 0.09810066223144531, + "step": 4440 + }, + { + "epoch": 0.6188253326830627, + "grad_norm": 0.3057571351528168, + "learning_rate": 1.4459569338981765e-05, + "loss": 0.07350349426269531, + "step": 4441 + }, + { + "epoch": 0.6189646763742771, + "grad_norm": 0.661978006362915, + "learning_rate": 1.4450511763125506e-05, + "loss": 0.09393024444580078, + "step": 4442 + }, + { + "epoch": 0.6191040200654916, + "grad_norm": 0.49882903695106506, + "learning_rate": 1.444145542023883e-05, + "loss": 0.09566783905029297, + "step": 4443 + }, + { + "epoch": 0.619243363756706, + "grad_norm": 0.4393889605998993, + "learning_rate": 1.4432400312333854e-05, + "loss": 0.0820169448852539, + "step": 4444 + }, + { + "epoch": 0.6193827074479203, + "grad_norm": 0.6881753206253052, + "learning_rate": 1.4423346441422422e-05, + "loss": 0.096466064453125, + "step": 4445 + }, + { + "epoch": 0.6195220511391347, + "grad_norm": 0.41369688510894775, + "learning_rate": 1.4414293809516094e-05, + "loss": 0.09396743774414062, + "step": 4446 + }, + { + "epoch": 0.6196613948303491, + "grad_norm": 0.33910924196243286, + "learning_rate": 1.4405242418626153e-05, + "loss": 0.07105255126953125, + "step": 4447 + }, + { + "epoch": 0.6198007385215635, + "grad_norm": 0.49747779965400696, + "learning_rate": 1.4396192270763622e-05, + "loss": 0.07398223876953125, + "step": 4448 + }, + { + "epoch": 0.6199400822127779, + "grad_norm": 0.4024459421634674, + "learning_rate": 1.4387143367939231e-05, + "loss": 0.09295082092285156, + "step": 4449 + }, + { + "epoch": 0.6200794259039922, + "grad_norm": 0.48299771547317505, + "learning_rate": 1.4378095712163439e-05, + "loss": 0.07696032524108887, + "step": 4450 + }, + { + "epoch": 0.6202187695952066, + "grad_norm": 0.5658461451530457, + "learning_rate": 1.4369049305446423e-05, + "loss": 0.09518814086914062, + "step": 4451 + }, + { + "epoch": 0.620358113286421, + "grad_norm": 0.641631543636322, + "learning_rate": 1.4360004149798101e-05, + "loss": 0.07993888854980469, + "step": 4452 + }, + { + "epoch": 0.6204974569776354, + "grad_norm": 0.6183514595031738, + "learning_rate": 1.4350960247228096e-05, + "loss": 0.08828258514404297, + "step": 4453 + }, + { + "epoch": 0.6206368006688497, + "grad_norm": 0.3500426113605499, + "learning_rate": 1.4341917599745751e-05, + "loss": 0.07753944396972656, + "step": 4454 + }, + { + "epoch": 0.6207761443600641, + "grad_norm": 0.40619340538978577, + "learning_rate": 1.4332876209360136e-05, + "loss": 0.07937431335449219, + "step": 4455 + }, + { + "epoch": 0.6209154880512785, + "grad_norm": 0.43321502208709717, + "learning_rate": 1.4323836078080046e-05, + "loss": 0.09180736541748047, + "step": 4456 + }, + { + "epoch": 0.6210548317424929, + "grad_norm": 0.6153237223625183, + "learning_rate": 1.4314797207913995e-05, + "loss": 0.09538459777832031, + "step": 4457 + }, + { + "epoch": 0.6211941754337073, + "grad_norm": 0.49036869406700134, + "learning_rate": 1.4305759600870208e-05, + "loss": 0.086822509765625, + "step": 4458 + }, + { + "epoch": 0.6213335191249216, + "grad_norm": 0.7468209266662598, + "learning_rate": 1.4296723258956635e-05, + "loss": 0.11522293090820312, + "step": 4459 + }, + { + "epoch": 0.621472862816136, + "grad_norm": 0.3647741973400116, + "learning_rate": 1.428768818418094e-05, + "loss": 0.08116865158081055, + "step": 4460 + }, + { + "epoch": 0.6216122065073504, + "grad_norm": 0.46906211972236633, + "learning_rate": 1.4278654378550522e-05, + "loss": 0.09273171424865723, + "step": 4461 + }, + { + "epoch": 0.6217515501985648, + "grad_norm": 0.3539462685585022, + "learning_rate": 1.4269621844072481e-05, + "loss": 0.08896446228027344, + "step": 4462 + }, + { + "epoch": 0.6218908938897791, + "grad_norm": 0.39186936616897583, + "learning_rate": 1.4260590582753641e-05, + "loss": 0.07697677612304688, + "step": 4463 + }, + { + "epoch": 0.6220302375809935, + "grad_norm": 0.7098251581192017, + "learning_rate": 1.4251560596600536e-05, + "loss": 0.11263275146484375, + "step": 4464 + }, + { + "epoch": 0.6221695812722079, + "grad_norm": 0.32677844166755676, + "learning_rate": 1.4242531887619428e-05, + "loss": 0.06747150421142578, + "step": 4465 + }, + { + "epoch": 0.6223089249634223, + "grad_norm": 0.4834625720977783, + "learning_rate": 1.4233504457816291e-05, + "loss": 0.09395027160644531, + "step": 4466 + }, + { + "epoch": 0.6224482686546366, + "grad_norm": 0.4588233530521393, + "learning_rate": 1.4224478309196808e-05, + "loss": 0.08855247497558594, + "step": 4467 + }, + { + "epoch": 0.622587612345851, + "grad_norm": 0.533184289932251, + "learning_rate": 1.4215453443766391e-05, + "loss": 0.0948781967163086, + "step": 4468 + }, + { + "epoch": 0.6227269560370654, + "grad_norm": 0.5852271914482117, + "learning_rate": 1.420642986353016e-05, + "loss": 0.08489608764648438, + "step": 4469 + }, + { + "epoch": 0.6228662997282798, + "grad_norm": 0.48225435614585876, + "learning_rate": 1.4197407570492941e-05, + "loss": 0.0953378677368164, + "step": 4470 + }, + { + "epoch": 0.6230056434194942, + "grad_norm": 0.45322680473327637, + "learning_rate": 1.4188386566659276e-05, + "loss": 0.09158515930175781, + "step": 4471 + }, + { + "epoch": 0.6231449871107085, + "grad_norm": 0.43203744292259216, + "learning_rate": 1.4179366854033441e-05, + "loss": 0.09255027770996094, + "step": 4472 + }, + { + "epoch": 0.6232843308019229, + "grad_norm": 0.3702640235424042, + "learning_rate": 1.4170348434619405e-05, + "loss": 0.08378791809082031, + "step": 4473 + }, + { + "epoch": 0.6234236744931373, + "grad_norm": 0.2860408127307892, + "learning_rate": 1.4161331310420856e-05, + "loss": 0.07097530364990234, + "step": 4474 + }, + { + "epoch": 0.6235630181843517, + "grad_norm": 0.7155112624168396, + "learning_rate": 1.4152315483441188e-05, + "loss": 0.09708595275878906, + "step": 4475 + }, + { + "epoch": 0.623702361875566, + "grad_norm": 0.41667822003364563, + "learning_rate": 1.414330095568351e-05, + "loss": 0.09103202819824219, + "step": 4476 + }, + { + "epoch": 0.6238417055667804, + "grad_norm": 0.4447556138038635, + "learning_rate": 1.4134287729150653e-05, + "loss": 0.09880447387695312, + "step": 4477 + }, + { + "epoch": 0.6239810492579948, + "grad_norm": 0.2927219867706299, + "learning_rate": 1.4125275805845147e-05, + "loss": 0.07500076293945312, + "step": 4478 + }, + { + "epoch": 0.6241203929492092, + "grad_norm": 0.3688456118106842, + "learning_rate": 1.4116265187769239e-05, + "loss": 0.09557437896728516, + "step": 4479 + }, + { + "epoch": 0.6242597366404236, + "grad_norm": 0.6001667976379395, + "learning_rate": 1.4107255876924865e-05, + "loss": 0.10188961029052734, + "step": 4480 + }, + { + "epoch": 0.6243990803316379, + "grad_norm": 0.34049341082572937, + "learning_rate": 1.409824787531371e-05, + "loss": 0.08034133911132812, + "step": 4481 + }, + { + "epoch": 0.6245384240228523, + "grad_norm": 0.3560030162334442, + "learning_rate": 1.408924118493714e-05, + "loss": 0.08527755737304688, + "step": 4482 + }, + { + "epoch": 0.6246777677140668, + "grad_norm": 0.39699485898017883, + "learning_rate": 1.4080235807796225e-05, + "loss": 0.09283256530761719, + "step": 4483 + }, + { + "epoch": 0.6248171114052812, + "grad_norm": 0.4312022626399994, + "learning_rate": 1.4071231745891768e-05, + "loss": 0.09514713287353516, + "step": 4484 + }, + { + "epoch": 0.6249564550964956, + "grad_norm": 0.6397456526756287, + "learning_rate": 1.4062229001224268e-05, + "loss": 0.09261131286621094, + "step": 4485 + }, + { + "epoch": 0.6250957987877099, + "grad_norm": 0.32254770398139954, + "learning_rate": 1.4053227575793917e-05, + "loss": 0.08660507202148438, + "step": 4486 + }, + { + "epoch": 0.6252351424789243, + "grad_norm": 0.810996949672699, + "learning_rate": 1.4044227471600627e-05, + "loss": 0.1227569580078125, + "step": 4487 + }, + { + "epoch": 0.6253744861701387, + "grad_norm": 0.4717305898666382, + "learning_rate": 1.4035228690644023e-05, + "loss": 0.08169364929199219, + "step": 4488 + }, + { + "epoch": 0.6255138298613531, + "grad_norm": 0.7052586078643799, + "learning_rate": 1.4026231234923429e-05, + "loss": 0.08963680267333984, + "step": 4489 + }, + { + "epoch": 0.6256531735525674, + "grad_norm": 0.30612772703170776, + "learning_rate": 1.4017235106437871e-05, + "loss": 0.07557821273803711, + "step": 4490 + }, + { + "epoch": 0.6257925172437818, + "grad_norm": 0.5853651165962219, + "learning_rate": 1.4008240307186084e-05, + "loss": 0.09716224670410156, + "step": 4491 + }, + { + "epoch": 0.6259318609349962, + "grad_norm": 0.4164164960384369, + "learning_rate": 1.3999246839166499e-05, + "loss": 0.10063362121582031, + "step": 4492 + }, + { + "epoch": 0.6260712046262106, + "grad_norm": 0.40865442156791687, + "learning_rate": 1.399025470437727e-05, + "loss": 0.07592201232910156, + "step": 4493 + }, + { + "epoch": 0.626210548317425, + "grad_norm": 0.3577278256416321, + "learning_rate": 1.398126390481624e-05, + "loss": 0.07727861404418945, + "step": 4494 + }, + { + "epoch": 0.6263498920086393, + "grad_norm": 0.5487609505653381, + "learning_rate": 1.3972274442480971e-05, + "loss": 0.09692096710205078, + "step": 4495 + }, + { + "epoch": 0.6264892356998537, + "grad_norm": 0.48852795362472534, + "learning_rate": 1.3963286319368695e-05, + "loss": 0.08880424499511719, + "step": 4496 + }, + { + "epoch": 0.6266285793910681, + "grad_norm": 0.529085636138916, + "learning_rate": 1.395429953747638e-05, + "loss": 0.09057426452636719, + "step": 4497 + }, + { + "epoch": 0.6267679230822825, + "grad_norm": 0.3385777771472931, + "learning_rate": 1.3945314098800684e-05, + "loss": 0.07641220092773438, + "step": 4498 + }, + { + "epoch": 0.6269072667734968, + "grad_norm": 0.868129551410675, + "learning_rate": 1.3936330005337959e-05, + "loss": 0.11498832702636719, + "step": 4499 + }, + { + "epoch": 0.6270466104647112, + "grad_norm": 0.4676065742969513, + "learning_rate": 1.392734725908428e-05, + "loss": 0.08199501037597656, + "step": 4500 + }, + { + "epoch": 0.6271859541559256, + "grad_norm": 0.4930526316165924, + "learning_rate": 1.3918365862035395e-05, + "loss": 0.08622550964355469, + "step": 4501 + }, + { + "epoch": 0.62732529784714, + "grad_norm": 0.4433969557285309, + "learning_rate": 1.3909385816186767e-05, + "loss": 0.07199263572692871, + "step": 4502 + }, + { + "epoch": 0.6274646415383544, + "grad_norm": 0.35890382528305054, + "learning_rate": 1.390040712353356e-05, + "loss": 0.0752878189086914, + "step": 4503 + }, + { + "epoch": 0.6276039852295687, + "grad_norm": 0.39713412523269653, + "learning_rate": 1.3891429786070634e-05, + "loss": 0.08225440979003906, + "step": 4504 + }, + { + "epoch": 0.6277433289207831, + "grad_norm": 0.5960804224014282, + "learning_rate": 1.3882453805792549e-05, + "loss": 0.1018218994140625, + "step": 4505 + }, + { + "epoch": 0.6278826726119975, + "grad_norm": 0.3689640164375305, + "learning_rate": 1.3873479184693568e-05, + "loss": 0.08486557006835938, + "step": 4506 + }, + { + "epoch": 0.6280220163032119, + "grad_norm": 0.530841588973999, + "learning_rate": 1.3864505924767637e-05, + "loss": 0.08708620071411133, + "step": 4507 + }, + { + "epoch": 0.6281613599944262, + "grad_norm": 0.35067078471183777, + "learning_rate": 1.3855534028008411e-05, + "loss": 0.07676506042480469, + "step": 4508 + }, + { + "epoch": 0.6283007036856406, + "grad_norm": 0.36142614483833313, + "learning_rate": 1.3846563496409245e-05, + "loss": 0.06765270233154297, + "step": 4509 + }, + { + "epoch": 0.628440047376855, + "grad_norm": 0.5791271924972534, + "learning_rate": 1.383759433196318e-05, + "loss": 0.08510589599609375, + "step": 4510 + }, + { + "epoch": 0.6285793910680694, + "grad_norm": 0.4670427739620209, + "learning_rate": 1.3828626536662978e-05, + "loss": 0.09586906433105469, + "step": 4511 + }, + { + "epoch": 0.6287187347592837, + "grad_norm": 0.6364824175834656, + "learning_rate": 1.3819660112501054e-05, + "loss": 0.07987594604492188, + "step": 4512 + }, + { + "epoch": 0.6288580784504981, + "grad_norm": 0.4139570891857147, + "learning_rate": 1.3810695061469556e-05, + "loss": 0.07884407043457031, + "step": 4513 + }, + { + "epoch": 0.6289974221417125, + "grad_norm": 0.48506057262420654, + "learning_rate": 1.3801731385560312e-05, + "loss": 0.07767295837402344, + "step": 4514 + }, + { + "epoch": 0.6291367658329269, + "grad_norm": 0.599557638168335, + "learning_rate": 1.3792769086764839e-05, + "loss": 0.0836329460144043, + "step": 4515 + }, + { + "epoch": 0.6292761095241413, + "grad_norm": 0.5897219777107239, + "learning_rate": 1.3783808167074373e-05, + "loss": 0.10245704650878906, + "step": 4516 + }, + { + "epoch": 0.6294154532153556, + "grad_norm": 0.3548260033130646, + "learning_rate": 1.3774848628479807e-05, + "loss": 0.07126235961914062, + "step": 4517 + }, + { + "epoch": 0.62955479690657, + "grad_norm": 0.46439287066459656, + "learning_rate": 1.3765890472971755e-05, + "loss": 0.09689140319824219, + "step": 4518 + }, + { + "epoch": 0.6296941405977844, + "grad_norm": 0.6596532464027405, + "learning_rate": 1.3756933702540506e-05, + "loss": 0.11289596557617188, + "step": 4519 + }, + { + "epoch": 0.6298334842889988, + "grad_norm": 0.5637410283088684, + "learning_rate": 1.3747978319176064e-05, + "loss": 0.10574150085449219, + "step": 4520 + }, + { + "epoch": 0.6299728279802131, + "grad_norm": 0.517463743686676, + "learning_rate": 1.3739024324868107e-05, + "loss": 0.07601737976074219, + "step": 4521 + }, + { + "epoch": 0.6301121716714275, + "grad_norm": 0.4546743631362915, + "learning_rate": 1.3730071721605999e-05, + "loss": 0.07903456687927246, + "step": 4522 + }, + { + "epoch": 0.6302515153626419, + "grad_norm": 0.5079646110534668, + "learning_rate": 1.3721120511378811e-05, + "loss": 0.10005378723144531, + "step": 4523 + }, + { + "epoch": 0.6303908590538564, + "grad_norm": 0.5320662260055542, + "learning_rate": 1.3712170696175289e-05, + "loss": 0.0963287353515625, + "step": 4524 + }, + { + "epoch": 0.6305302027450708, + "grad_norm": 0.4058755040168762, + "learning_rate": 1.3703222277983892e-05, + "loss": 0.0789031982421875, + "step": 4525 + }, + { + "epoch": 0.6306695464362851, + "grad_norm": 0.670536994934082, + "learning_rate": 1.3694275258792742e-05, + "loss": 0.11563873291015625, + "step": 4526 + }, + { + "epoch": 0.6308088901274995, + "grad_norm": 0.629120409488678, + "learning_rate": 1.368532964058968e-05, + "loss": 0.10261344909667969, + "step": 4527 + }, + { + "epoch": 0.6309482338187139, + "grad_norm": 0.6925860643386841, + "learning_rate": 1.3676385425362193e-05, + "loss": 0.10440826416015625, + "step": 4528 + }, + { + "epoch": 0.6310875775099283, + "grad_norm": 0.5033110976219177, + "learning_rate": 1.3667442615097497e-05, + "loss": 0.10672187805175781, + "step": 4529 + }, + { + "epoch": 0.6312269212011427, + "grad_norm": 0.6298391222953796, + "learning_rate": 1.3658501211782478e-05, + "loss": 0.0975637435913086, + "step": 4530 + }, + { + "epoch": 0.631366264892357, + "grad_norm": 0.6088295578956604, + "learning_rate": 1.3649561217403707e-05, + "loss": 0.09957122802734375, + "step": 4531 + }, + { + "epoch": 0.6315056085835714, + "grad_norm": 0.4881676137447357, + "learning_rate": 1.3640622633947459e-05, + "loss": 0.07848834991455078, + "step": 4532 + }, + { + "epoch": 0.6316449522747858, + "grad_norm": 0.5341084003448486, + "learning_rate": 1.3631685463399668e-05, + "loss": 0.09707450866699219, + "step": 4533 + }, + { + "epoch": 0.6317842959660002, + "grad_norm": 0.49783626198768616, + "learning_rate": 1.3622749707745979e-05, + "loss": 0.08331871032714844, + "step": 4534 + }, + { + "epoch": 0.6319236396572145, + "grad_norm": 0.6988818049430847, + "learning_rate": 1.3613815368971705e-05, + "loss": 0.10155296325683594, + "step": 4535 + }, + { + "epoch": 0.6320629833484289, + "grad_norm": 0.5254523158073425, + "learning_rate": 1.360488244906186e-05, + "loss": 0.09968757629394531, + "step": 4536 + }, + { + "epoch": 0.6322023270396433, + "grad_norm": 0.5565057396888733, + "learning_rate": 1.3595950950001139e-05, + "loss": 0.08344364166259766, + "step": 4537 + }, + { + "epoch": 0.6323416707308577, + "grad_norm": 0.41148972511291504, + "learning_rate": 1.3587020873773901e-05, + "loss": 0.07659530639648438, + "step": 4538 + }, + { + "epoch": 0.632481014422072, + "grad_norm": 0.469332754611969, + "learning_rate": 1.3578092222364214e-05, + "loss": 0.07682609558105469, + "step": 4539 + }, + { + "epoch": 0.6326203581132864, + "grad_norm": 0.8571029901504517, + "learning_rate": 1.3569164997755821e-05, + "loss": 0.11107063293457031, + "step": 4540 + }, + { + "epoch": 0.6327597018045008, + "grad_norm": 0.46046575903892517, + "learning_rate": 1.3560239201932151e-05, + "loss": 0.08532428741455078, + "step": 4541 + }, + { + "epoch": 0.6328990454957152, + "grad_norm": 0.42538532614707947, + "learning_rate": 1.35513148368763e-05, + "loss": 0.08919143676757812, + "step": 4542 + }, + { + "epoch": 0.6330383891869296, + "grad_norm": 0.4671628773212433, + "learning_rate": 1.3542391904571082e-05, + "loss": 0.08650684356689453, + "step": 4543 + }, + { + "epoch": 0.6331777328781439, + "grad_norm": 0.31135469675064087, + "learning_rate": 1.3533470406998941e-05, + "loss": 0.07791900634765625, + "step": 4544 + }, + { + "epoch": 0.6333170765693583, + "grad_norm": 0.46909481287002563, + "learning_rate": 1.3524550346142044e-05, + "loss": 0.08908462524414062, + "step": 4545 + }, + { + "epoch": 0.6334564202605727, + "grad_norm": 0.49026426672935486, + "learning_rate": 1.3515631723982223e-05, + "loss": 0.08891677856445312, + "step": 4546 + }, + { + "epoch": 0.6335957639517871, + "grad_norm": 0.4357423484325409, + "learning_rate": 1.3506714542500986e-05, + "loss": 0.08912467956542969, + "step": 4547 + }, + { + "epoch": 0.6337351076430014, + "grad_norm": 0.474579781293869, + "learning_rate": 1.3497798803679547e-05, + "loss": 0.0873870849609375, + "step": 4548 + }, + { + "epoch": 0.6338744513342158, + "grad_norm": 0.394206166267395, + "learning_rate": 1.348888450949876e-05, + "loss": 0.08397626876831055, + "step": 4549 + }, + { + "epoch": 0.6340137950254302, + "grad_norm": 0.42324498295783997, + "learning_rate": 1.3479971661939183e-05, + "loss": 0.08812332153320312, + "step": 4550 + }, + { + "epoch": 0.6341531387166446, + "grad_norm": 0.5643967986106873, + "learning_rate": 1.3471060262981044e-05, + "loss": 0.09161853790283203, + "step": 4551 + }, + { + "epoch": 0.634292482407859, + "grad_norm": 0.7874306440353394, + "learning_rate": 1.346215031460426e-05, + "loss": 0.08915901184082031, + "step": 4552 + }, + { + "epoch": 0.6344318260990733, + "grad_norm": 0.5301236510276794, + "learning_rate": 1.3453241818788421e-05, + "loss": 0.08858871459960938, + "step": 4553 + }, + { + "epoch": 0.6345711697902877, + "grad_norm": 0.5552811622619629, + "learning_rate": 1.3444334777512778e-05, + "loss": 0.09210395812988281, + "step": 4554 + }, + { + "epoch": 0.6347105134815021, + "grad_norm": 0.3862810730934143, + "learning_rate": 1.3435429192756275e-05, + "loss": 0.08188056945800781, + "step": 4555 + }, + { + "epoch": 0.6348498571727165, + "grad_norm": 0.6092393398284912, + "learning_rate": 1.342652506649754e-05, + "loss": 0.10169601440429688, + "step": 4556 + }, + { + "epoch": 0.6349892008639308, + "grad_norm": 0.5023985505104065, + "learning_rate": 1.3417622400714859e-05, + "loss": 0.08365154266357422, + "step": 4557 + }, + { + "epoch": 0.6351285445551452, + "grad_norm": 0.3709619343280792, + "learning_rate": 1.3408721197386205e-05, + "loss": 0.09221649169921875, + "step": 4558 + }, + { + "epoch": 0.6352678882463596, + "grad_norm": 0.7899066209793091, + "learning_rate": 1.3399821458489215e-05, + "loss": 0.08936119079589844, + "step": 4559 + }, + { + "epoch": 0.635407231937574, + "grad_norm": 0.540030300617218, + "learning_rate": 1.339092318600121e-05, + "loss": 0.08859634399414062, + "step": 4560 + }, + { + "epoch": 0.6355465756287884, + "grad_norm": 1.2505874633789062, + "learning_rate": 1.3382026381899191e-05, + "loss": 0.13393402099609375, + "step": 4561 + }, + { + "epoch": 0.6356859193200027, + "grad_norm": 0.5224848389625549, + "learning_rate": 1.3373131048159817e-05, + "loss": 0.09838485717773438, + "step": 4562 + }, + { + "epoch": 0.6358252630112171, + "grad_norm": 0.4612959325313568, + "learning_rate": 1.3364237186759426e-05, + "loss": 0.08090782165527344, + "step": 4563 + }, + { + "epoch": 0.6359646067024316, + "grad_norm": 0.5161504745483398, + "learning_rate": 1.3355344799674042e-05, + "loss": 0.09120368957519531, + "step": 4564 + }, + { + "epoch": 0.636103950393646, + "grad_norm": 0.48041802644729614, + "learning_rate": 1.3346453888879341e-05, + "loss": 0.0976104736328125, + "step": 4565 + }, + { + "epoch": 0.6362432940848604, + "grad_norm": 0.7622586488723755, + "learning_rate": 1.3337564456350682e-05, + "loss": 0.11400985717773438, + "step": 4566 + }, + { + "epoch": 0.6363826377760747, + "grad_norm": 0.36136558651924133, + "learning_rate": 1.3328676504063092e-05, + "loss": 0.07190132141113281, + "step": 4567 + }, + { + "epoch": 0.6365219814672891, + "grad_norm": 0.6801868081092834, + "learning_rate": 1.3319790033991278e-05, + "loss": 0.09403800964355469, + "step": 4568 + }, + { + "epoch": 0.6366613251585035, + "grad_norm": 0.6349158883094788, + "learning_rate": 1.331090504810961e-05, + "loss": 0.11871147155761719, + "step": 4569 + }, + { + "epoch": 0.6368006688497179, + "grad_norm": 0.5272253155708313, + "learning_rate": 1.3302021548392122e-05, + "loss": 0.10081863403320312, + "step": 4570 + }, + { + "epoch": 0.6369400125409322, + "grad_norm": 0.3466297388076782, + "learning_rate": 1.3293139536812522e-05, + "loss": 0.09223651885986328, + "step": 4571 + }, + { + "epoch": 0.6370793562321466, + "grad_norm": 0.5383222103118896, + "learning_rate": 1.3284259015344205e-05, + "loss": 0.0865936279296875, + "step": 4572 + }, + { + "epoch": 0.637218699923361, + "grad_norm": 0.7467571496963501, + "learning_rate": 1.327537998596021e-05, + "loss": 0.10842132568359375, + "step": 4573 + }, + { + "epoch": 0.6373580436145754, + "grad_norm": 0.3473077118396759, + "learning_rate": 1.326650245063326e-05, + "loss": 0.06428742408752441, + "step": 4574 + }, + { + "epoch": 0.6374973873057898, + "grad_norm": 0.33400723338127136, + "learning_rate": 1.3257626411335733e-05, + "loss": 0.07910728454589844, + "step": 4575 + }, + { + "epoch": 0.6376367309970041, + "grad_norm": 0.43160945177078247, + "learning_rate": 1.3248751870039682e-05, + "loss": 0.10201263427734375, + "step": 4576 + }, + { + "epoch": 0.6377760746882185, + "grad_norm": 0.3479001522064209, + "learning_rate": 1.3239878828716837e-05, + "loss": 0.08756673336029053, + "step": 4577 + }, + { + "epoch": 0.6379154183794329, + "grad_norm": 0.5224100351333618, + "learning_rate": 1.3231007289338579e-05, + "loss": 0.08924245834350586, + "step": 4578 + }, + { + "epoch": 0.6380547620706473, + "grad_norm": 0.36925795674324036, + "learning_rate": 1.322213725387596e-05, + "loss": 0.07948446273803711, + "step": 4579 + }, + { + "epoch": 0.6381941057618616, + "grad_norm": 0.5740024447441101, + "learning_rate": 1.321326872429971e-05, + "loss": 0.08915233612060547, + "step": 4580 + }, + { + "epoch": 0.638333449453076, + "grad_norm": 0.38985100388526917, + "learning_rate": 1.3204401702580199e-05, + "loss": 0.08762550354003906, + "step": 4581 + }, + { + "epoch": 0.6384727931442904, + "grad_norm": 0.5441884994506836, + "learning_rate": 1.3195536190687485e-05, + "loss": 0.0982666015625, + "step": 4582 + }, + { + "epoch": 0.6386121368355048, + "grad_norm": 0.3988303244113922, + "learning_rate": 1.3186672190591279e-05, + "loss": 0.07756233215332031, + "step": 4583 + }, + { + "epoch": 0.6387514805267192, + "grad_norm": 0.48537105321884155, + "learning_rate": 1.3177809704260964e-05, + "loss": 0.09985542297363281, + "step": 4584 + }, + { + "epoch": 0.6388908242179335, + "grad_norm": 0.422480046749115, + "learning_rate": 1.3168948733665583e-05, + "loss": 0.07730293273925781, + "step": 4585 + }, + { + "epoch": 0.6390301679091479, + "grad_norm": 0.6528667211532593, + "learning_rate": 1.3160089280773834e-05, + "loss": 0.11107540130615234, + "step": 4586 + }, + { + "epoch": 0.6391695116003623, + "grad_norm": 0.4626547694206238, + "learning_rate": 1.3151231347554085e-05, + "loss": 0.08795833587646484, + "step": 4587 + }, + { + "epoch": 0.6393088552915767, + "grad_norm": 0.44604310393333435, + "learning_rate": 1.3142374935974373e-05, + "loss": 0.07763671875, + "step": 4588 + }, + { + "epoch": 0.639448198982791, + "grad_norm": 0.37876030802726746, + "learning_rate": 1.313352004800239e-05, + "loss": 0.06640625, + "step": 4589 + }, + { + "epoch": 0.6395875426740054, + "grad_norm": 0.7446131110191345, + "learning_rate": 1.312466668560549e-05, + "loss": 0.10737895965576172, + "step": 4590 + }, + { + "epoch": 0.6397268863652198, + "grad_norm": 0.3513387143611908, + "learning_rate": 1.3115814850750686e-05, + "loss": 0.07133674621582031, + "step": 4591 + }, + { + "epoch": 0.6398662300564342, + "grad_norm": 0.5027082562446594, + "learning_rate": 1.3106964545404645e-05, + "loss": 0.08562088012695312, + "step": 4592 + }, + { + "epoch": 0.6400055737476485, + "grad_norm": 0.48914945125579834, + "learning_rate": 1.3098115771533718e-05, + "loss": 0.10676383972167969, + "step": 4593 + }, + { + "epoch": 0.6401449174388629, + "grad_norm": 0.4955807626247406, + "learning_rate": 1.3089268531103887e-05, + "loss": 0.08692550659179688, + "step": 4594 + }, + { + "epoch": 0.6402842611300773, + "grad_norm": 0.5905790328979492, + "learning_rate": 1.3080422826080828e-05, + "loss": 0.0927278995513916, + "step": 4595 + }, + { + "epoch": 0.6404236048212917, + "grad_norm": 0.6545858383178711, + "learning_rate": 1.3071578658429828e-05, + "loss": 0.0995321273803711, + "step": 4596 + }, + { + "epoch": 0.6405629485125061, + "grad_norm": 0.48971158266067505, + "learning_rate": 1.3062736030115877e-05, + "loss": 0.0888824462890625, + "step": 4597 + }, + { + "epoch": 0.6407022922037204, + "grad_norm": 0.40644195675849915, + "learning_rate": 1.3053894943103598e-05, + "loss": 0.08763313293457031, + "step": 4598 + }, + { + "epoch": 0.6408416358949348, + "grad_norm": 0.602123498916626, + "learning_rate": 1.3045055399357276e-05, + "loss": 0.09143209457397461, + "step": 4599 + }, + { + "epoch": 0.6409809795861492, + "grad_norm": 0.5871927738189697, + "learning_rate": 1.3036217400840865e-05, + "loss": 0.08821392059326172, + "step": 4600 + }, + { + "epoch": 0.6411203232773636, + "grad_norm": 0.5228808522224426, + "learning_rate": 1.3027380949517964e-05, + "loss": 0.07227039337158203, + "step": 4601 + }, + { + "epoch": 0.641259666968578, + "grad_norm": 0.3637200891971588, + "learning_rate": 1.3018546047351828e-05, + "loss": 0.08392000198364258, + "step": 4602 + }, + { + "epoch": 0.6413990106597923, + "grad_norm": 0.479594349861145, + "learning_rate": 1.3009712696305363e-05, + "loss": 0.08737945556640625, + "step": 4603 + }, + { + "epoch": 0.6415383543510068, + "grad_norm": 0.42066508531570435, + "learning_rate": 1.3000880898341155e-05, + "loss": 0.08960342407226562, + "step": 4604 + }, + { + "epoch": 0.6416776980422212, + "grad_norm": 0.40642020106315613, + "learning_rate": 1.2992050655421413e-05, + "loss": 0.09284305572509766, + "step": 4605 + }, + { + "epoch": 0.6418170417334356, + "grad_norm": 0.4473344087600708, + "learning_rate": 1.2983221969508028e-05, + "loss": 0.09665298461914062, + "step": 4606 + }, + { + "epoch": 0.64195638542465, + "grad_norm": 0.5384500026702881, + "learning_rate": 1.2974394842562523e-05, + "loss": 0.10090827941894531, + "step": 4607 + }, + { + "epoch": 0.6420957291158643, + "grad_norm": 0.47728878259658813, + "learning_rate": 1.2965569276546081e-05, + "loss": 0.08326339721679688, + "step": 4608 + }, + { + "epoch": 0.6422350728070787, + "grad_norm": 0.40275031328201294, + "learning_rate": 1.2956745273419551e-05, + "loss": 0.08894729614257812, + "step": 4609 + }, + { + "epoch": 0.6423744164982931, + "grad_norm": 0.44639405608177185, + "learning_rate": 1.2947922835143415e-05, + "loss": 0.08776283264160156, + "step": 4610 + }, + { + "epoch": 0.6425137601895075, + "grad_norm": 0.648344099521637, + "learning_rate": 1.2939101963677838e-05, + "loss": 0.09097766876220703, + "step": 4611 + }, + { + "epoch": 0.6426531038807218, + "grad_norm": 1.236985683441162, + "learning_rate": 1.2930282660982592e-05, + "loss": 0.12028121948242188, + "step": 4612 + }, + { + "epoch": 0.6427924475719362, + "grad_norm": 0.5436742901802063, + "learning_rate": 1.2921464929017134e-05, + "loss": 0.11348915100097656, + "step": 4613 + }, + { + "epoch": 0.6429317912631506, + "grad_norm": 0.5806987881660461, + "learning_rate": 1.2912648769740563e-05, + "loss": 0.09092330932617188, + "step": 4614 + }, + { + "epoch": 0.643071134954365, + "grad_norm": 0.45672929286956787, + "learning_rate": 1.2903834185111625e-05, + "loss": 0.09088706970214844, + "step": 4615 + }, + { + "epoch": 0.6432104786455793, + "grad_norm": 0.5508505702018738, + "learning_rate": 1.2895021177088733e-05, + "loss": 0.10752105712890625, + "step": 4616 + }, + { + "epoch": 0.6433498223367937, + "grad_norm": 0.5546190738677979, + "learning_rate": 1.2886209747629921e-05, + "loss": 0.10874176025390625, + "step": 4617 + }, + { + "epoch": 0.6434891660280081, + "grad_norm": 0.4518747925758362, + "learning_rate": 1.2877399898692892e-05, + "loss": 0.0937957763671875, + "step": 4618 + }, + { + "epoch": 0.6436285097192225, + "grad_norm": 0.4524734318256378, + "learning_rate": 1.286859163223499e-05, + "loss": 0.10285758972167969, + "step": 4619 + }, + { + "epoch": 0.6437678534104369, + "grad_norm": 0.6606327891349792, + "learning_rate": 1.2859784950213218e-05, + "loss": 0.10219573974609375, + "step": 4620 + }, + { + "epoch": 0.6439071971016512, + "grad_norm": 0.4358295500278473, + "learning_rate": 1.2850979854584216e-05, + "loss": 0.0882415771484375, + "step": 4621 + }, + { + "epoch": 0.6440465407928656, + "grad_norm": 0.4081271290779114, + "learning_rate": 1.2842176347304283e-05, + "loss": 0.08611869812011719, + "step": 4622 + }, + { + "epoch": 0.64418588448408, + "grad_norm": 0.3783053457736969, + "learning_rate": 1.2833374430329341e-05, + "loss": 0.08376216888427734, + "step": 4623 + }, + { + "epoch": 0.6443252281752944, + "grad_norm": 0.34234657883644104, + "learning_rate": 1.2824574105614983e-05, + "loss": 0.06608867645263672, + "step": 4624 + }, + { + "epoch": 0.6444645718665087, + "grad_norm": 0.2793162167072296, + "learning_rate": 1.2815775375116442e-05, + "loss": 0.07828140258789062, + "step": 4625 + }, + { + "epoch": 0.6446039155577231, + "grad_norm": 0.43398377299308777, + "learning_rate": 1.280697824078859e-05, + "loss": 0.09373700618743896, + "step": 4626 + }, + { + "epoch": 0.6447432592489375, + "grad_norm": 0.313435435295105, + "learning_rate": 1.2798182704585968e-05, + "loss": 0.07974544167518616, + "step": 4627 + }, + { + "epoch": 0.6448826029401519, + "grad_norm": 0.4344255328178406, + "learning_rate": 1.2789388768462715e-05, + "loss": 0.08259010314941406, + "step": 4628 + }, + { + "epoch": 0.6450219466313662, + "grad_norm": 0.30372217297554016, + "learning_rate": 1.2780596434372663e-05, + "loss": 0.08020210266113281, + "step": 4629 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 0.40446215867996216, + "learning_rate": 1.2771805704269258e-05, + "loss": 0.08685874938964844, + "step": 4630 + }, + { + "epoch": 0.645300634013795, + "grad_norm": 0.6505599617958069, + "learning_rate": 1.2763016580105601e-05, + "loss": 0.08129596710205078, + "step": 4631 + }, + { + "epoch": 0.6454399777050094, + "grad_norm": 0.6981503367424011, + "learning_rate": 1.2754229063834448e-05, + "loss": 0.1235504150390625, + "step": 4632 + }, + { + "epoch": 0.6455793213962238, + "grad_norm": 0.39096203446388245, + "learning_rate": 1.2745443157408164e-05, + "loss": 0.07837581634521484, + "step": 4633 + }, + { + "epoch": 0.6457186650874381, + "grad_norm": 0.47469308972358704, + "learning_rate": 1.2736658862778788e-05, + "loss": 0.09674358367919922, + "step": 4634 + }, + { + "epoch": 0.6458580087786525, + "grad_norm": 0.45918789505958557, + "learning_rate": 1.2727876181897982e-05, + "loss": 0.08660030364990234, + "step": 4635 + }, + { + "epoch": 0.6459973524698669, + "grad_norm": 0.5240320563316345, + "learning_rate": 1.2719095116717069e-05, + "loss": 0.10026359558105469, + "step": 4636 + }, + { + "epoch": 0.6461366961610813, + "grad_norm": 0.4577418565750122, + "learning_rate": 1.2710315669186994e-05, + "loss": 0.09513473510742188, + "step": 4637 + }, + { + "epoch": 0.6462760398522956, + "grad_norm": 0.338151216506958, + "learning_rate": 1.2701537841258358e-05, + "loss": 0.08573532104492188, + "step": 4638 + }, + { + "epoch": 0.64641538354351, + "grad_norm": 0.48299440741539, + "learning_rate": 1.2692761634881377e-05, + "loss": 0.10324478149414062, + "step": 4639 + }, + { + "epoch": 0.6465547272347244, + "grad_norm": 0.35390031337738037, + "learning_rate": 1.2683987052005938e-05, + "loss": 0.07771492004394531, + "step": 4640 + }, + { + "epoch": 0.6466940709259388, + "grad_norm": 0.4264015555381775, + "learning_rate": 1.2675214094581547e-05, + "loss": 0.08539009094238281, + "step": 4641 + }, + { + "epoch": 0.6468334146171532, + "grad_norm": 0.46686241030693054, + "learning_rate": 1.2666442764557352e-05, + "loss": 0.09500837326049805, + "step": 4642 + }, + { + "epoch": 0.6469727583083675, + "grad_norm": 0.6178699731826782, + "learning_rate": 1.2657673063882161e-05, + "loss": 0.09256327152252197, + "step": 4643 + }, + { + "epoch": 0.647112101999582, + "grad_norm": 0.8067387342453003, + "learning_rate": 1.2648904994504374e-05, + "loss": 0.1185464859008789, + "step": 4644 + }, + { + "epoch": 0.6472514456907964, + "grad_norm": 0.42314353585243225, + "learning_rate": 1.2640138558372073e-05, + "loss": 0.08864974975585938, + "step": 4645 + }, + { + "epoch": 0.6473907893820108, + "grad_norm": 0.39091137051582336, + "learning_rate": 1.2631373757432957e-05, + "loss": 0.10087203979492188, + "step": 4646 + }, + { + "epoch": 0.6475301330732252, + "grad_norm": 0.48380839824676514, + "learning_rate": 1.2622610593634356e-05, + "loss": 0.09992170333862305, + "step": 4647 + }, + { + "epoch": 0.6476694767644395, + "grad_norm": 0.4516989588737488, + "learning_rate": 1.2613849068923266e-05, + "loss": 0.09030914306640625, + "step": 4648 + }, + { + "epoch": 0.6478088204556539, + "grad_norm": 0.36360034346580505, + "learning_rate": 1.2605089185246277e-05, + "loss": 0.07397842407226562, + "step": 4649 + }, + { + "epoch": 0.6479481641468683, + "grad_norm": 0.5439326167106628, + "learning_rate": 1.2596330944549642e-05, + "loss": 0.09405326843261719, + "step": 4650 + }, + { + "epoch": 0.6480875078380827, + "grad_norm": 0.5021411776542664, + "learning_rate": 1.2587574348779238e-05, + "loss": 0.10547065734863281, + "step": 4651 + }, + { + "epoch": 0.648226851529297, + "grad_norm": 0.5724138021469116, + "learning_rate": 1.2578819399880591e-05, + "loss": 0.10145187377929688, + "step": 4652 + }, + { + "epoch": 0.6483661952205114, + "grad_norm": 0.4749514162540436, + "learning_rate": 1.2570066099798847e-05, + "loss": 0.09420967102050781, + "step": 4653 + }, + { + "epoch": 0.6485055389117258, + "grad_norm": 0.4693751335144043, + "learning_rate": 1.2561314450478785e-05, + "loss": 0.099212646484375, + "step": 4654 + }, + { + "epoch": 0.6486448826029402, + "grad_norm": 0.574442446231842, + "learning_rate": 1.255256445386482e-05, + "loss": 0.1045980453491211, + "step": 4655 + }, + { + "epoch": 0.6487842262941546, + "grad_norm": 0.43001362681388855, + "learning_rate": 1.2543816111901008e-05, + "loss": 0.08460617065429688, + "step": 4656 + }, + { + "epoch": 0.6489235699853689, + "grad_norm": 0.36931005120277405, + "learning_rate": 1.253506942653103e-05, + "loss": 0.07511568069458008, + "step": 4657 + }, + { + "epoch": 0.6490629136765833, + "grad_norm": 0.49116718769073486, + "learning_rate": 1.2526324399698193e-05, + "loss": 0.09001731872558594, + "step": 4658 + }, + { + "epoch": 0.6492022573677977, + "grad_norm": 0.4516569972038269, + "learning_rate": 1.2517581033345461e-05, + "loss": 0.08733177185058594, + "step": 4659 + }, + { + "epoch": 0.6493416010590121, + "grad_norm": 0.6007871031761169, + "learning_rate": 1.2508839329415384e-05, + "loss": 0.0968780517578125, + "step": 4660 + }, + { + "epoch": 0.6494809447502264, + "grad_norm": 0.3941996991634369, + "learning_rate": 1.2500099289850185e-05, + "loss": 0.09589004516601562, + "step": 4661 + }, + { + "epoch": 0.6496202884414408, + "grad_norm": 0.39108631014823914, + "learning_rate": 1.2491360916591697e-05, + "loss": 0.08215761184692383, + "step": 4662 + }, + { + "epoch": 0.6497596321326552, + "grad_norm": 0.37633052468299866, + "learning_rate": 1.2482624211581387e-05, + "loss": 0.08888435363769531, + "step": 4663 + }, + { + "epoch": 0.6498989758238696, + "grad_norm": 0.5749726295471191, + "learning_rate": 1.2473889176760361e-05, + "loss": 0.0899350643157959, + "step": 4664 + }, + { + "epoch": 0.650038319515084, + "grad_norm": 0.3255908787250519, + "learning_rate": 1.246515581406933e-05, + "loss": 0.0656747817993164, + "step": 4665 + }, + { + "epoch": 0.6501776632062983, + "grad_norm": 0.38335445523262024, + "learning_rate": 1.2456424125448655e-05, + "loss": 0.08130168914794922, + "step": 4666 + }, + { + "epoch": 0.6503170068975127, + "grad_norm": 0.9725938439369202, + "learning_rate": 1.2447694112838309e-05, + "loss": 0.09482955932617188, + "step": 4667 + }, + { + "epoch": 0.6504563505887271, + "grad_norm": 0.3844677805900574, + "learning_rate": 1.2438965778177918e-05, + "loss": 0.09054756164550781, + "step": 4668 + }, + { + "epoch": 0.6505956942799415, + "grad_norm": 0.4399562478065491, + "learning_rate": 1.243023912340671e-05, + "loss": 0.0886082649230957, + "step": 4669 + }, + { + "epoch": 0.6507350379711558, + "grad_norm": 0.5949267745018005, + "learning_rate": 1.2421514150463546e-05, + "loss": 0.1095123291015625, + "step": 4670 + }, + { + "epoch": 0.6508743816623702, + "grad_norm": 0.4178057014942169, + "learning_rate": 1.2412790861286914e-05, + "loss": 0.089599609375, + "step": 4671 + }, + { + "epoch": 0.6510137253535846, + "grad_norm": 0.6200020909309387, + "learning_rate": 1.2404069257814939e-05, + "loss": 0.10635948181152344, + "step": 4672 + }, + { + "epoch": 0.651153069044799, + "grad_norm": 0.7045363783836365, + "learning_rate": 1.2395349341985355e-05, + "loss": 0.10467529296875, + "step": 4673 + }, + { + "epoch": 0.6512924127360133, + "grad_norm": 0.746483325958252, + "learning_rate": 1.2386631115735525e-05, + "loss": 0.10455513000488281, + "step": 4674 + }, + { + "epoch": 0.6514317564272277, + "grad_norm": 0.41018441319465637, + "learning_rate": 1.2377914581002459e-05, + "loss": 0.08329010009765625, + "step": 4675 + }, + { + "epoch": 0.6515711001184421, + "grad_norm": 0.5130643248558044, + "learning_rate": 1.2369199739722744e-05, + "loss": 0.08539295196533203, + "step": 4676 + }, + { + "epoch": 0.6517104438096565, + "grad_norm": 0.5700236558914185, + "learning_rate": 1.2360486593832639e-05, + "loss": 0.09559249877929688, + "step": 4677 + }, + { + "epoch": 0.6518497875008709, + "grad_norm": 0.5587480068206787, + "learning_rate": 1.2351775145267996e-05, + "loss": 0.09914016723632812, + "step": 4678 + }, + { + "epoch": 0.6519891311920852, + "grad_norm": 0.3500096797943115, + "learning_rate": 1.2343065395964304e-05, + "loss": 0.08072495460510254, + "step": 4679 + }, + { + "epoch": 0.6521284748832996, + "grad_norm": 0.3164267838001251, + "learning_rate": 1.2334357347856678e-05, + "loss": 0.07980155944824219, + "step": 4680 + }, + { + "epoch": 0.652267818574514, + "grad_norm": 0.6220977306365967, + "learning_rate": 1.2325651002879835e-05, + "loss": 0.10767555236816406, + "step": 4681 + }, + { + "epoch": 0.6524071622657284, + "grad_norm": 0.3947206735610962, + "learning_rate": 1.2316946362968129e-05, + "loss": 0.08630180358886719, + "step": 4682 + }, + { + "epoch": 0.6525465059569427, + "grad_norm": 0.43033215403556824, + "learning_rate": 1.230824343005553e-05, + "loss": 0.09185981750488281, + "step": 4683 + }, + { + "epoch": 0.6526858496481572, + "grad_norm": 0.30780503153800964, + "learning_rate": 1.2299542206075641e-05, + "loss": 0.07055473327636719, + "step": 4684 + }, + { + "epoch": 0.6528251933393716, + "grad_norm": 0.38337865471839905, + "learning_rate": 1.2290842692961673e-05, + "loss": 0.08349800109863281, + "step": 4685 + }, + { + "epoch": 0.652964537030586, + "grad_norm": 0.4130343496799469, + "learning_rate": 1.2282144892646453e-05, + "loss": 0.07947444915771484, + "step": 4686 + }, + { + "epoch": 0.6531038807218004, + "grad_norm": 0.4116237461566925, + "learning_rate": 1.227344880706243e-05, + "loss": 0.08200263977050781, + "step": 4687 + }, + { + "epoch": 0.6532432244130147, + "grad_norm": 0.42065051198005676, + "learning_rate": 1.2264754438141684e-05, + "loss": 0.07603931427001953, + "step": 4688 + }, + { + "epoch": 0.6533825681042291, + "grad_norm": 0.3685922622680664, + "learning_rate": 1.2256061787815908e-05, + "loss": 0.07397651672363281, + "step": 4689 + }, + { + "epoch": 0.6535219117954435, + "grad_norm": 0.41605833172798157, + "learning_rate": 1.2247370858016407e-05, + "loss": 0.08231449127197266, + "step": 4690 + }, + { + "epoch": 0.6536612554866579, + "grad_norm": 0.612356424331665, + "learning_rate": 1.22386816506741e-05, + "loss": 0.11708450317382812, + "step": 4691 + }, + { + "epoch": 0.6538005991778723, + "grad_norm": 0.4272867441177368, + "learning_rate": 1.2229994167719537e-05, + "loss": 0.06774330139160156, + "step": 4692 + }, + { + "epoch": 0.6539399428690866, + "grad_norm": 0.42801523208618164, + "learning_rate": 1.2221308411082877e-05, + "loss": 0.09359359741210938, + "step": 4693 + }, + { + "epoch": 0.654079286560301, + "grad_norm": 0.4659161865711212, + "learning_rate": 1.2212624382693896e-05, + "loss": 0.0911712646484375, + "step": 4694 + }, + { + "epoch": 0.6542186302515154, + "grad_norm": 0.5477996468544006, + "learning_rate": 1.220394208448199e-05, + "loss": 0.09504127502441406, + "step": 4695 + }, + { + "epoch": 0.6543579739427298, + "grad_norm": 0.28576716780662537, + "learning_rate": 1.2195261518376173e-05, + "loss": 0.07413673400878906, + "step": 4696 + }, + { + "epoch": 0.6544973176339441, + "grad_norm": 0.4582616686820984, + "learning_rate": 1.2186582686305056e-05, + "loss": 0.09170341491699219, + "step": 4697 + }, + { + "epoch": 0.6546366613251585, + "grad_norm": 0.3294510841369629, + "learning_rate": 1.2177905590196884e-05, + "loss": 0.07083892822265625, + "step": 4698 + }, + { + "epoch": 0.6547760050163729, + "grad_norm": 0.45074737071990967, + "learning_rate": 1.2169230231979503e-05, + "loss": 0.08593940734863281, + "step": 4699 + }, + { + "epoch": 0.6549153487075873, + "grad_norm": 0.6865416169166565, + "learning_rate": 1.216055661358039e-05, + "loss": 0.09991455078125, + "step": 4700 + }, + { + "epoch": 0.6550546923988017, + "grad_norm": 0.5591391324996948, + "learning_rate": 1.215188473692662e-05, + "loss": 0.10121536254882812, + "step": 4701 + }, + { + "epoch": 0.655194036090016, + "grad_norm": 0.3577703833580017, + "learning_rate": 1.2143214603944889e-05, + "loss": 0.07126617431640625, + "step": 4702 + }, + { + "epoch": 0.6553333797812304, + "grad_norm": 0.4598657488822937, + "learning_rate": 1.213454621656149e-05, + "loss": 0.08643531799316406, + "step": 4703 + }, + { + "epoch": 0.6554727234724448, + "grad_norm": 0.7309152483940125, + "learning_rate": 1.2125879576702354e-05, + "loss": 0.10023021697998047, + "step": 4704 + }, + { + "epoch": 0.6556120671636592, + "grad_norm": 0.2723618447780609, + "learning_rate": 1.211721468629301e-05, + "loss": 0.06342697143554688, + "step": 4705 + }, + { + "epoch": 0.6557514108548735, + "grad_norm": 0.40843167901039124, + "learning_rate": 1.2108551547258598e-05, + "loss": 0.07693672180175781, + "step": 4706 + }, + { + "epoch": 0.6558907545460879, + "grad_norm": 0.4685195982456207, + "learning_rate": 1.2099890161523864e-05, + "loss": 0.09447956085205078, + "step": 4707 + }, + { + "epoch": 0.6560300982373023, + "grad_norm": 0.568392276763916, + "learning_rate": 1.209123053101317e-05, + "loss": 0.08964920043945312, + "step": 4708 + }, + { + "epoch": 0.6561694419285167, + "grad_norm": 0.37359726428985596, + "learning_rate": 1.2082572657650494e-05, + "loss": 0.10072517395019531, + "step": 4709 + }, + { + "epoch": 0.656308785619731, + "grad_norm": 0.6548362374305725, + "learning_rate": 1.2073916543359415e-05, + "loss": 0.08612823486328125, + "step": 4710 + }, + { + "epoch": 0.6564481293109454, + "grad_norm": 0.46498650312423706, + "learning_rate": 1.2065262190063132e-05, + "loss": 0.07733821868896484, + "step": 4711 + }, + { + "epoch": 0.6565874730021598, + "grad_norm": 1.0608278512954712, + "learning_rate": 1.2056609599684426e-05, + "loss": 0.10894393920898438, + "step": 4712 + }, + { + "epoch": 0.6567268166933742, + "grad_norm": 0.37892091274261475, + "learning_rate": 1.2047958774145722e-05, + "loss": 0.08504486083984375, + "step": 4713 + }, + { + "epoch": 0.6568661603845886, + "grad_norm": 0.385387659072876, + "learning_rate": 1.2039309715369033e-05, + "loss": 0.07960891723632812, + "step": 4714 + }, + { + "epoch": 0.6570055040758029, + "grad_norm": 0.6441800594329834, + "learning_rate": 1.203066242527597e-05, + "loss": 0.10402393341064453, + "step": 4715 + }, + { + "epoch": 0.6571448477670173, + "grad_norm": 0.3423520028591156, + "learning_rate": 1.2022016905787779e-05, + "loss": 0.08773231506347656, + "step": 4716 + }, + { + "epoch": 0.6572841914582317, + "grad_norm": 0.5155045986175537, + "learning_rate": 1.2013373158825297e-05, + "loss": 0.08702850341796875, + "step": 4717 + }, + { + "epoch": 0.6574235351494461, + "grad_norm": 0.35209301114082336, + "learning_rate": 1.2004731186308956e-05, + "loss": 0.08453369140625, + "step": 4718 + }, + { + "epoch": 0.6575628788406604, + "grad_norm": 0.5628413558006287, + "learning_rate": 1.1996090990158804e-05, + "loss": 0.08868026733398438, + "step": 4719 + }, + { + "epoch": 0.6577022225318748, + "grad_norm": 0.30286741256713867, + "learning_rate": 1.198745257229451e-05, + "loss": 0.07702445983886719, + "step": 4720 + }, + { + "epoch": 0.6578415662230892, + "grad_norm": 0.36571767926216125, + "learning_rate": 1.197881593463532e-05, + "loss": 0.07804489135742188, + "step": 4721 + }, + { + "epoch": 0.6579809099143036, + "grad_norm": 0.43436551094055176, + "learning_rate": 1.197018107910011e-05, + "loss": 0.08164405822753906, + "step": 4722 + }, + { + "epoch": 0.658120253605518, + "grad_norm": 0.46832138299942017, + "learning_rate": 1.1961548007607335e-05, + "loss": 0.09968185424804688, + "step": 4723 + }, + { + "epoch": 0.6582595972967323, + "grad_norm": 0.4728173315525055, + "learning_rate": 1.1952916722075068e-05, + "loss": 0.08155536651611328, + "step": 4724 + }, + { + "epoch": 0.6583989409879468, + "grad_norm": 1.0498130321502686, + "learning_rate": 1.1944287224420991e-05, + "loss": 0.11681938171386719, + "step": 4725 + }, + { + "epoch": 0.6585382846791612, + "grad_norm": 0.39551427960395813, + "learning_rate": 1.1935659516562375e-05, + "loss": 0.08014583587646484, + "step": 4726 + }, + { + "epoch": 0.6586776283703756, + "grad_norm": 0.34950757026672363, + "learning_rate": 1.1927033600416113e-05, + "loss": 0.08426761627197266, + "step": 4727 + }, + { + "epoch": 0.65881697206159, + "grad_norm": 0.4925874173641205, + "learning_rate": 1.1918409477898668e-05, + "loss": 0.0899496078491211, + "step": 4728 + }, + { + "epoch": 0.6589563157528043, + "grad_norm": 0.516136646270752, + "learning_rate": 1.1909787150926128e-05, + "loss": 0.11353874206542969, + "step": 4729 + }, + { + "epoch": 0.6590956594440187, + "grad_norm": 0.3319043517112732, + "learning_rate": 1.1901166621414184e-05, + "loss": 0.07875823974609375, + "step": 4730 + }, + { + "epoch": 0.6592350031352331, + "grad_norm": 0.5964818000793457, + "learning_rate": 1.1892547891278115e-05, + "loss": 0.12690067291259766, + "step": 4731 + }, + { + "epoch": 0.6593743468264475, + "grad_norm": 0.7224604487419128, + "learning_rate": 1.1883930962432811e-05, + "loss": 0.1112823486328125, + "step": 4732 + }, + { + "epoch": 0.6595136905176618, + "grad_norm": 0.43247929215431213, + "learning_rate": 1.1875315836792755e-05, + "loss": 0.07916259765625, + "step": 4733 + }, + { + "epoch": 0.6596530342088762, + "grad_norm": 0.42042434215545654, + "learning_rate": 1.1866702516272031e-05, + "loss": 0.08523404598236084, + "step": 4734 + }, + { + "epoch": 0.6597923779000906, + "grad_norm": 0.4308447539806366, + "learning_rate": 1.1858091002784315e-05, + "loss": 0.09669685363769531, + "step": 4735 + }, + { + "epoch": 0.659931721591305, + "grad_norm": 0.4032500982284546, + "learning_rate": 1.18494812982429e-05, + "loss": 0.08689308166503906, + "step": 4736 + }, + { + "epoch": 0.6600710652825194, + "grad_norm": 0.4127522110939026, + "learning_rate": 1.1840873404560662e-05, + "loss": 0.09236526489257812, + "step": 4737 + }, + { + "epoch": 0.6602104089737337, + "grad_norm": 0.5064621567726135, + "learning_rate": 1.1832267323650081e-05, + "loss": 0.11086380481719971, + "step": 4738 + }, + { + "epoch": 0.6603497526649481, + "grad_norm": 0.5723005533218384, + "learning_rate": 1.1823663057423218e-05, + "loss": 0.0951995849609375, + "step": 4739 + }, + { + "epoch": 0.6604890963561625, + "grad_norm": 0.41496503353118896, + "learning_rate": 1.1815060607791761e-05, + "loss": 0.08989715576171875, + "step": 4740 + }, + { + "epoch": 0.6606284400473769, + "grad_norm": 0.38054153323173523, + "learning_rate": 1.1806459976666972e-05, + "loss": 0.08905220031738281, + "step": 4741 + }, + { + "epoch": 0.6607677837385912, + "grad_norm": 0.399604856967926, + "learning_rate": 1.1797861165959707e-05, + "loss": 0.07761955261230469, + "step": 4742 + }, + { + "epoch": 0.6609071274298056, + "grad_norm": 0.5525346398353577, + "learning_rate": 1.1789264177580448e-05, + "loss": 0.1053924560546875, + "step": 4743 + }, + { + "epoch": 0.66104647112102, + "grad_norm": 0.3222920596599579, + "learning_rate": 1.1780669013439224e-05, + "loss": 0.08133316040039062, + "step": 4744 + }, + { + "epoch": 0.6611858148122344, + "grad_norm": 0.5630582571029663, + "learning_rate": 1.1772075675445695e-05, + "loss": 0.09263992309570312, + "step": 4745 + }, + { + "epoch": 0.6613251585034488, + "grad_norm": 0.44839245080947876, + "learning_rate": 1.1763484165509108e-05, + "loss": 0.09129142761230469, + "step": 4746 + }, + { + "epoch": 0.6614645021946631, + "grad_norm": 0.4273834228515625, + "learning_rate": 1.1754894485538288e-05, + "loss": 0.07399463653564453, + "step": 4747 + }, + { + "epoch": 0.6616038458858775, + "grad_norm": 0.3285491466522217, + "learning_rate": 1.1746306637441684e-05, + "loss": 0.08945083618164062, + "step": 4748 + }, + { + "epoch": 0.6617431895770919, + "grad_norm": 0.522596001625061, + "learning_rate": 1.1737720623127307e-05, + "loss": 0.0934600830078125, + "step": 4749 + }, + { + "epoch": 0.6618825332683063, + "grad_norm": 0.36476871371269226, + "learning_rate": 1.1729136444502775e-05, + "loss": 0.09197998046875, + "step": 4750 + }, + { + "epoch": 0.6620218769595206, + "grad_norm": 0.48481330275535583, + "learning_rate": 1.1720554103475297e-05, + "loss": 0.09187030792236328, + "step": 4751 + }, + { + "epoch": 0.662161220650735, + "grad_norm": 0.5381113290786743, + "learning_rate": 1.1711973601951676e-05, + "loss": 0.09410762786865234, + "step": 4752 + }, + { + "epoch": 0.6623005643419494, + "grad_norm": 0.3489442765712738, + "learning_rate": 1.1703394941838302e-05, + "loss": 0.08454608917236328, + "step": 4753 + }, + { + "epoch": 0.6624399080331638, + "grad_norm": 0.758690357208252, + "learning_rate": 1.1694818125041163e-05, + "loss": 0.09891319274902344, + "step": 4754 + }, + { + "epoch": 0.6625792517243781, + "grad_norm": 0.4216858744621277, + "learning_rate": 1.1686243153465817e-05, + "loss": 0.08476448059082031, + "step": 4755 + }, + { + "epoch": 0.6627185954155925, + "grad_norm": 0.5540733933448792, + "learning_rate": 1.1677670029017437e-05, + "loss": 0.0945892333984375, + "step": 4756 + }, + { + "epoch": 0.6628579391068069, + "grad_norm": 0.4999327063560486, + "learning_rate": 1.1669098753600777e-05, + "loss": 0.10305595397949219, + "step": 4757 + }, + { + "epoch": 0.6629972827980213, + "grad_norm": 0.4513465166091919, + "learning_rate": 1.1660529329120173e-05, + "loss": 0.09360337257385254, + "step": 4758 + }, + { + "epoch": 0.6631366264892357, + "grad_norm": 0.45661938190460205, + "learning_rate": 1.1651961757479567e-05, + "loss": 0.09617805480957031, + "step": 4759 + }, + { + "epoch": 0.66327597018045, + "grad_norm": 0.4659121632575989, + "learning_rate": 1.1643396040582468e-05, + "loss": 0.08000373840332031, + "step": 4760 + }, + { + "epoch": 0.6634153138716644, + "grad_norm": 0.5865063071250916, + "learning_rate": 1.1634832180331976e-05, + "loss": 0.08164691925048828, + "step": 4761 + }, + { + "epoch": 0.6635546575628788, + "grad_norm": 0.5576199293136597, + "learning_rate": 1.1626270178630796e-05, + "loss": 0.10361671447753906, + "step": 4762 + }, + { + "epoch": 0.6636940012540932, + "grad_norm": 0.414394736289978, + "learning_rate": 1.1617710037381214e-05, + "loss": 0.09154796600341797, + "step": 4763 + }, + { + "epoch": 0.6638333449453075, + "grad_norm": 0.444062203168869, + "learning_rate": 1.1609151758485088e-05, + "loss": 0.09856891632080078, + "step": 4764 + }, + { + "epoch": 0.663972688636522, + "grad_norm": 0.3495365381240845, + "learning_rate": 1.160059534384387e-05, + "loss": 0.08142280578613281, + "step": 4765 + }, + { + "epoch": 0.6641120323277364, + "grad_norm": 0.5171904563903809, + "learning_rate": 1.1592040795358604e-05, + "loss": 0.11140251159667969, + "step": 4766 + }, + { + "epoch": 0.6642513760189508, + "grad_norm": 0.4348148703575134, + "learning_rate": 1.1583488114929924e-05, + "loss": 0.08557319641113281, + "step": 4767 + }, + { + "epoch": 0.6643907197101652, + "grad_norm": 0.32204869389533997, + "learning_rate": 1.1574937304458023e-05, + "loss": 0.07945311069488525, + "step": 4768 + }, + { + "epoch": 0.6645300634013795, + "grad_norm": 0.4891279637813568, + "learning_rate": 1.1566388365842717e-05, + "loss": 0.07808876037597656, + "step": 4769 + }, + { + "epoch": 0.6646694070925939, + "grad_norm": 0.34862902760505676, + "learning_rate": 1.1557841300983363e-05, + "loss": 0.0677947998046875, + "step": 4770 + }, + { + "epoch": 0.6648087507838083, + "grad_norm": 0.4044741690158844, + "learning_rate": 1.1549296111778942e-05, + "loss": 0.08869171142578125, + "step": 4771 + }, + { + "epoch": 0.6649480944750227, + "grad_norm": 0.4348275363445282, + "learning_rate": 1.1540752800127986e-05, + "loss": 0.09490203857421875, + "step": 4772 + }, + { + "epoch": 0.665087438166237, + "grad_norm": 0.5157663226127625, + "learning_rate": 1.1532211367928628e-05, + "loss": 0.08634567260742188, + "step": 4773 + }, + { + "epoch": 0.6652267818574514, + "grad_norm": 0.46724674105644226, + "learning_rate": 1.152367181707859e-05, + "loss": 0.07767963409423828, + "step": 4774 + }, + { + "epoch": 0.6653661255486658, + "grad_norm": 0.4626327157020569, + "learning_rate": 1.1515134149475156e-05, + "loss": 0.06425762176513672, + "step": 4775 + }, + { + "epoch": 0.6655054692398802, + "grad_norm": 0.48510491847991943, + "learning_rate": 1.1506598367015194e-05, + "loss": 0.09752845764160156, + "step": 4776 + }, + { + "epoch": 0.6656448129310946, + "grad_norm": 0.45258283615112305, + "learning_rate": 1.1498064471595167e-05, + "loss": 0.08234977722167969, + "step": 4777 + }, + { + "epoch": 0.6657841566223089, + "grad_norm": 0.3468647003173828, + "learning_rate": 1.1489532465111122e-05, + "loss": 0.07949447631835938, + "step": 4778 + }, + { + "epoch": 0.6659235003135233, + "grad_norm": 0.4563741981983185, + "learning_rate": 1.1481002349458655e-05, + "loss": 0.08969497680664062, + "step": 4779 + }, + { + "epoch": 0.6660628440047377, + "grad_norm": 0.5133095383644104, + "learning_rate": 1.1472474126532981e-05, + "loss": 0.08984375, + "step": 4780 + }, + { + "epoch": 0.6662021876959521, + "grad_norm": 0.3769344985485077, + "learning_rate": 1.1463947798228871e-05, + "loss": 0.07590103149414062, + "step": 4781 + }, + { + "epoch": 0.6663415313871665, + "grad_norm": 0.5638754963874817, + "learning_rate": 1.1455423366440673e-05, + "loss": 0.0915374755859375, + "step": 4782 + }, + { + "epoch": 0.6664808750783808, + "grad_norm": 0.38903337717056274, + "learning_rate": 1.1446900833062325e-05, + "loss": 0.09134101867675781, + "step": 4783 + }, + { + "epoch": 0.6666202187695952, + "grad_norm": 0.8779253959655762, + "learning_rate": 1.1438380199987341e-05, + "loss": 0.0915985107421875, + "step": 4784 + }, + { + "epoch": 0.6667595624608096, + "grad_norm": 0.4409971535205841, + "learning_rate": 1.1429861469108827e-05, + "loss": 0.08480167388916016, + "step": 4785 + }, + { + "epoch": 0.666898906152024, + "grad_norm": 0.4875450134277344, + "learning_rate": 1.1421344642319418e-05, + "loss": 0.09196853637695312, + "step": 4786 + }, + { + "epoch": 0.6670382498432383, + "grad_norm": 0.3875437080860138, + "learning_rate": 1.1412829721511378e-05, + "loss": 0.07814788818359375, + "step": 4787 + }, + { + "epoch": 0.6671775935344527, + "grad_norm": 0.47170504927635193, + "learning_rate": 1.140431670857653e-05, + "loss": 0.09966659545898438, + "step": 4788 + }, + { + "epoch": 0.6673169372256671, + "grad_norm": 0.30563125014305115, + "learning_rate": 1.1395805605406263e-05, + "loss": 0.06636428833007812, + "step": 4789 + }, + { + "epoch": 0.6674562809168815, + "grad_norm": 0.37705859541893005, + "learning_rate": 1.1387296413891551e-05, + "loss": 0.07981586456298828, + "step": 4790 + }, + { + "epoch": 0.6675956246080959, + "grad_norm": 0.33448612689971924, + "learning_rate": 1.1378789135922954e-05, + "loss": 0.07532310485839844, + "step": 4791 + }, + { + "epoch": 0.6677349682993102, + "grad_norm": 0.5244847536087036, + "learning_rate": 1.1370283773390582e-05, + "loss": 0.09309959411621094, + "step": 4792 + }, + { + "epoch": 0.6678743119905246, + "grad_norm": 0.42090773582458496, + "learning_rate": 1.136178032818413e-05, + "loss": 0.09040260314941406, + "step": 4793 + }, + { + "epoch": 0.668013655681739, + "grad_norm": 0.35900363326072693, + "learning_rate": 1.1353278802192875e-05, + "loss": 0.0791473388671875, + "step": 4794 + }, + { + "epoch": 0.6681529993729534, + "grad_norm": 0.3735945224761963, + "learning_rate": 1.1344779197305674e-05, + "loss": 0.0743718147277832, + "step": 4795 + }, + { + "epoch": 0.6682923430641677, + "grad_norm": 0.5991089344024658, + "learning_rate": 1.1336281515410927e-05, + "loss": 0.09581470489501953, + "step": 4796 + }, + { + "epoch": 0.6684316867553821, + "grad_norm": 0.4501776099205017, + "learning_rate": 1.1327785758396627e-05, + "loss": 0.0990438461303711, + "step": 4797 + }, + { + "epoch": 0.6685710304465965, + "grad_norm": 0.4198278486728668, + "learning_rate": 1.131929192815034e-05, + "loss": 0.08720970153808594, + "step": 4798 + }, + { + "epoch": 0.6687103741378109, + "grad_norm": 0.518855094909668, + "learning_rate": 1.1310800026559213e-05, + "loss": 0.103912353515625, + "step": 4799 + }, + { + "epoch": 0.6688497178290252, + "grad_norm": 0.5569597482681274, + "learning_rate": 1.130231005550993e-05, + "loss": 0.08606529235839844, + "step": 4800 + }, + { + "epoch": 0.6689890615202396, + "grad_norm": 0.6275116801261902, + "learning_rate": 1.1293822016888792e-05, + "loss": 0.10003554821014404, + "step": 4801 + }, + { + "epoch": 0.669128405211454, + "grad_norm": 0.4208291471004486, + "learning_rate": 1.1285335912581628e-05, + "loss": 0.07799816131591797, + "step": 4802 + }, + { + "epoch": 0.6692677489026684, + "grad_norm": 0.443580687046051, + "learning_rate": 1.1276851744473874e-05, + "loss": 0.08867835998535156, + "step": 4803 + }, + { + "epoch": 0.6694070925938828, + "grad_norm": 0.42185094952583313, + "learning_rate": 1.12683695144505e-05, + "loss": 0.08760452270507812, + "step": 4804 + }, + { + "epoch": 0.6695464362850972, + "grad_norm": 0.5209079384803772, + "learning_rate": 1.1259889224396072e-05, + "loss": 0.08470726013183594, + "step": 4805 + }, + { + "epoch": 0.6696857799763116, + "grad_norm": 0.5895528793334961, + "learning_rate": 1.1251410876194729e-05, + "loss": 0.11820793151855469, + "step": 4806 + }, + { + "epoch": 0.669825123667526, + "grad_norm": 0.4164199233055115, + "learning_rate": 1.1242934471730153e-05, + "loss": 0.09224700927734375, + "step": 4807 + }, + { + "epoch": 0.6699644673587404, + "grad_norm": 0.5391632914543152, + "learning_rate": 1.1234460012885603e-05, + "loss": 0.09307432174682617, + "step": 4808 + }, + { + "epoch": 0.6701038110499548, + "grad_norm": 0.45489412546157837, + "learning_rate": 1.122598750154392e-05, + "loss": 0.09956502914428711, + "step": 4809 + }, + { + "epoch": 0.6702431547411691, + "grad_norm": 0.47525903582572937, + "learning_rate": 1.1217516939587507e-05, + "loss": 0.0873565673828125, + "step": 4810 + }, + { + "epoch": 0.6703824984323835, + "grad_norm": 0.3884410560131073, + "learning_rate": 1.1209048328898313e-05, + "loss": 0.08139801025390625, + "step": 4811 + }, + { + "epoch": 0.6705218421235979, + "grad_norm": 0.5214821696281433, + "learning_rate": 1.1200581671357886e-05, + "loss": 0.1036367416381836, + "step": 4812 + }, + { + "epoch": 0.6706611858148123, + "grad_norm": 0.5189434885978699, + "learning_rate": 1.1192116968847313e-05, + "loss": 0.09898567199707031, + "step": 4813 + }, + { + "epoch": 0.6708005295060266, + "grad_norm": 0.5373475551605225, + "learning_rate": 1.1183654223247268e-05, + "loss": 0.10629081726074219, + "step": 4814 + }, + { + "epoch": 0.670939873197241, + "grad_norm": 0.43163520097732544, + "learning_rate": 1.1175193436437968e-05, + "loss": 0.08435344696044922, + "step": 4815 + }, + { + "epoch": 0.6710792168884554, + "grad_norm": 0.4748704135417938, + "learning_rate": 1.116673461029921e-05, + "loss": 0.08908271789550781, + "step": 4816 + }, + { + "epoch": 0.6712185605796698, + "grad_norm": 0.5028665661811829, + "learning_rate": 1.1158277746710373e-05, + "loss": 0.10021018981933594, + "step": 4817 + }, + { + "epoch": 0.6713579042708842, + "grad_norm": 0.734558641910553, + "learning_rate": 1.1149822847550345e-05, + "loss": 0.10863685607910156, + "step": 4818 + }, + { + "epoch": 0.6714972479620985, + "grad_norm": 0.8124876618385315, + "learning_rate": 1.1141369914697627e-05, + "loss": 0.11934852600097656, + "step": 4819 + }, + { + "epoch": 0.6716365916533129, + "grad_norm": 0.4086875915527344, + "learning_rate": 1.1132918950030274e-05, + "loss": 0.07647705078125, + "step": 4820 + }, + { + "epoch": 0.6717759353445273, + "grad_norm": 0.4812062680721283, + "learning_rate": 1.1124469955425885e-05, + "loss": 0.07578086853027344, + "step": 4821 + }, + { + "epoch": 0.6719152790357417, + "grad_norm": 0.5106896758079529, + "learning_rate": 1.1116022932761648e-05, + "loss": 0.094451904296875, + "step": 4822 + }, + { + "epoch": 0.672054622726956, + "grad_norm": 0.38408637046813965, + "learning_rate": 1.1107577883914282e-05, + "loss": 0.08738517761230469, + "step": 4823 + }, + { + "epoch": 0.6721939664181704, + "grad_norm": 0.4966738224029541, + "learning_rate": 1.10991348107601e-05, + "loss": 0.09626007080078125, + "step": 4824 + }, + { + "epoch": 0.6723333101093848, + "grad_norm": 0.48225414752960205, + "learning_rate": 1.1090693715174947e-05, + "loss": 0.07508707046508789, + "step": 4825 + }, + { + "epoch": 0.6724726538005992, + "grad_norm": 0.5283902883529663, + "learning_rate": 1.1082254599034248e-05, + "loss": 0.10731983184814453, + "step": 4826 + }, + { + "epoch": 0.6726119974918136, + "grad_norm": 0.5842603445053101, + "learning_rate": 1.1073817464212989e-05, + "loss": 0.09079551696777344, + "step": 4827 + }, + { + "epoch": 0.6727513411830279, + "grad_norm": 0.37882477045059204, + "learning_rate": 1.1065382312585698e-05, + "loss": 0.078277587890625, + "step": 4828 + }, + { + "epoch": 0.6728906848742423, + "grad_norm": 0.5684933066368103, + "learning_rate": 1.1056949146026472e-05, + "loss": 0.09433364868164062, + "step": 4829 + }, + { + "epoch": 0.6730300285654567, + "grad_norm": 0.3098498284816742, + "learning_rate": 1.1048517966408969e-05, + "loss": 0.07866859436035156, + "step": 4830 + }, + { + "epoch": 0.6731693722566711, + "grad_norm": 0.41058146953582764, + "learning_rate": 1.104008877560642e-05, + "loss": 0.08298206329345703, + "step": 4831 + }, + { + "epoch": 0.6733087159478854, + "grad_norm": 0.6024137735366821, + "learning_rate": 1.1031661575491577e-05, + "loss": 0.10195350646972656, + "step": 4832 + }, + { + "epoch": 0.6734480596390998, + "grad_norm": 0.5680757164955139, + "learning_rate": 1.1023236367936789e-05, + "loss": 0.09607315063476562, + "step": 4833 + }, + { + "epoch": 0.6735874033303142, + "grad_norm": 0.43134987354278564, + "learning_rate": 1.1014813154813928e-05, + "loss": 0.09138965606689453, + "step": 4834 + }, + { + "epoch": 0.6737267470215286, + "grad_norm": 0.583651602268219, + "learning_rate": 1.1006391937994459e-05, + "loss": 0.1250457763671875, + "step": 4835 + }, + { + "epoch": 0.673866090712743, + "grad_norm": 0.49883833527565, + "learning_rate": 1.0997972719349363e-05, + "loss": 0.09820175170898438, + "step": 4836 + }, + { + "epoch": 0.6740054344039573, + "grad_norm": 0.5072662234306335, + "learning_rate": 1.0989555500749211e-05, + "loss": 0.09569931030273438, + "step": 4837 + }, + { + "epoch": 0.6741447780951717, + "grad_norm": 0.3892297148704529, + "learning_rate": 1.0981140284064122e-05, + "loss": 0.07639122009277344, + "step": 4838 + }, + { + "epoch": 0.6742841217863861, + "grad_norm": 0.513451099395752, + "learning_rate": 1.097272707116376e-05, + "loss": 0.07732963562011719, + "step": 4839 + }, + { + "epoch": 0.6744234654776005, + "grad_norm": 0.37242284417152405, + "learning_rate": 1.0964315863917337e-05, + "loss": 0.08827400207519531, + "step": 4840 + }, + { + "epoch": 0.6745628091688148, + "grad_norm": 0.5179314613342285, + "learning_rate": 1.0955906664193641e-05, + "loss": 0.09829521179199219, + "step": 4841 + }, + { + "epoch": 0.6747021528600292, + "grad_norm": 0.7256091237068176, + "learning_rate": 1.0947499473861012e-05, + "loss": 0.10951805114746094, + "step": 4842 + }, + { + "epoch": 0.6748414965512436, + "grad_norm": 0.3512406647205353, + "learning_rate": 1.0939094294787327e-05, + "loss": 0.0781097412109375, + "step": 4843 + }, + { + "epoch": 0.674980840242458, + "grad_norm": 0.3789004981517792, + "learning_rate": 1.0930691128840019e-05, + "loss": 0.08141708374023438, + "step": 4844 + }, + { + "epoch": 0.6751201839336725, + "grad_norm": 0.7110893726348877, + "learning_rate": 1.0922289977886087e-05, + "loss": 0.09627342224121094, + "step": 4845 + }, + { + "epoch": 0.6752595276248868, + "grad_norm": 0.4009181559085846, + "learning_rate": 1.091389084379208e-05, + "loss": 0.08507919311523438, + "step": 4846 + }, + { + "epoch": 0.6753988713161012, + "grad_norm": 0.417097270488739, + "learning_rate": 1.0905493728424079e-05, + "loss": 0.08824348449707031, + "step": 4847 + }, + { + "epoch": 0.6755382150073156, + "grad_norm": 0.5669693946838379, + "learning_rate": 1.0897098633647745e-05, + "loss": 0.09903240203857422, + "step": 4848 + }, + { + "epoch": 0.67567755869853, + "grad_norm": 0.627981960773468, + "learning_rate": 1.0888705561328283e-05, + "loss": 0.08664989471435547, + "step": 4849 + }, + { + "epoch": 0.6758169023897443, + "grad_norm": 0.48987433314323425, + "learning_rate": 1.088031451333042e-05, + "loss": 0.096588134765625, + "step": 4850 + }, + { + "epoch": 0.6759562460809587, + "grad_norm": 0.3278180956840515, + "learning_rate": 1.0871925491518462e-05, + "loss": 0.08094978332519531, + "step": 4851 + }, + { + "epoch": 0.6760955897721731, + "grad_norm": 0.369577556848526, + "learning_rate": 1.0863538497756263e-05, + "loss": 0.08350753784179688, + "step": 4852 + }, + { + "epoch": 0.6762349334633875, + "grad_norm": 0.4383300542831421, + "learning_rate": 1.085515353390723e-05, + "loss": 0.07998466491699219, + "step": 4853 + }, + { + "epoch": 0.6763742771546019, + "grad_norm": 0.5135626196861267, + "learning_rate": 1.0846770601834303e-05, + "loss": 0.09029197692871094, + "step": 4854 + }, + { + "epoch": 0.6765136208458162, + "grad_norm": 0.4230712950229645, + "learning_rate": 1.0838389703399965e-05, + "loss": 0.08742523193359375, + "step": 4855 + }, + { + "epoch": 0.6766529645370306, + "grad_norm": 0.3738914430141449, + "learning_rate": 1.0830010840466282e-05, + "loss": 0.07634544372558594, + "step": 4856 + }, + { + "epoch": 0.676792308228245, + "grad_norm": 0.47375279664993286, + "learning_rate": 1.0821634014894827e-05, + "loss": 0.08305931091308594, + "step": 4857 + }, + { + "epoch": 0.6769316519194594, + "grad_norm": 0.4676540791988373, + "learning_rate": 1.0813259228546746e-05, + "loss": 0.09147453308105469, + "step": 4858 + }, + { + "epoch": 0.6770709956106737, + "grad_norm": 0.6252943873405457, + "learning_rate": 1.080488648328274e-05, + "loss": 0.09505653381347656, + "step": 4859 + }, + { + "epoch": 0.6772103393018881, + "grad_norm": 0.6211961507797241, + "learning_rate": 1.0796515780963026e-05, + "loss": 0.08797454833984375, + "step": 4860 + }, + { + "epoch": 0.6773496829931025, + "grad_norm": 0.35568127036094666, + "learning_rate": 1.078814712344738e-05, + "loss": 0.08891677856445312, + "step": 4861 + }, + { + "epoch": 0.6774890266843169, + "grad_norm": 0.29430627822875977, + "learning_rate": 1.0779780512595136e-05, + "loss": 0.07552528381347656, + "step": 4862 + }, + { + "epoch": 0.6776283703755313, + "grad_norm": 0.45537781715393066, + "learning_rate": 1.0771415950265163e-05, + "loss": 0.08466529846191406, + "step": 4863 + }, + { + "epoch": 0.6777677140667456, + "grad_norm": 0.46794429421424866, + "learning_rate": 1.0763053438315876e-05, + "loss": 0.09175872802734375, + "step": 4864 + }, + { + "epoch": 0.67790705775796, + "grad_norm": 0.7305806875228882, + "learning_rate": 1.0754692978605226e-05, + "loss": 0.12187004089355469, + "step": 4865 + }, + { + "epoch": 0.6780464014491744, + "grad_norm": 0.5237967371940613, + "learning_rate": 1.074633457299072e-05, + "loss": 0.08536338806152344, + "step": 4866 + }, + { + "epoch": 0.6781857451403888, + "grad_norm": 0.32630664110183716, + "learning_rate": 1.0737978223329413e-05, + "loss": 0.08584785461425781, + "step": 4867 + }, + { + "epoch": 0.6783250888316031, + "grad_norm": 0.5523802638053894, + "learning_rate": 1.0729623931477886e-05, + "loss": 0.10518836975097656, + "step": 4868 + }, + { + "epoch": 0.6784644325228175, + "grad_norm": 0.4231260120868683, + "learning_rate": 1.0721271699292272e-05, + "loss": 0.08514976501464844, + "step": 4869 + }, + { + "epoch": 0.6786037762140319, + "grad_norm": 0.5426061153411865, + "learning_rate": 1.0712921528628258e-05, + "loss": 0.09732437133789062, + "step": 4870 + }, + { + "epoch": 0.6787431199052463, + "grad_norm": 1.0948518514633179, + "learning_rate": 1.0704573421341053e-05, + "loss": 0.131561279296875, + "step": 4871 + }, + { + "epoch": 0.6788824635964607, + "grad_norm": 0.4287346303462982, + "learning_rate": 1.0696227379285409e-05, + "loss": 0.09264087677001953, + "step": 4872 + }, + { + "epoch": 0.679021807287675, + "grad_norm": 0.4813828468322754, + "learning_rate": 1.0687883404315631e-05, + "loss": 0.08468818664550781, + "step": 4873 + }, + { + "epoch": 0.6791611509788894, + "grad_norm": 0.6006562113761902, + "learning_rate": 1.0679541498285568e-05, + "loss": 0.0951995849609375, + "step": 4874 + }, + { + "epoch": 0.6793004946701038, + "grad_norm": 0.6672459840774536, + "learning_rate": 1.0671201663048595e-05, + "loss": 0.10435867309570312, + "step": 4875 + }, + { + "epoch": 0.6794398383613182, + "grad_norm": 0.5519182085990906, + "learning_rate": 1.0662863900457627e-05, + "loss": 0.09297657012939453, + "step": 4876 + }, + { + "epoch": 0.6795791820525325, + "grad_norm": 0.3479318916797638, + "learning_rate": 1.0654528212365127e-05, + "loss": 0.08003044128417969, + "step": 4877 + }, + { + "epoch": 0.6797185257437469, + "grad_norm": 0.41658154129981995, + "learning_rate": 1.0646194600623106e-05, + "loss": 0.07932853698730469, + "step": 4878 + }, + { + "epoch": 0.6798578694349613, + "grad_norm": 0.8580193519592285, + "learning_rate": 1.0637863067083087e-05, + "loss": 0.110809326171875, + "step": 4879 + }, + { + "epoch": 0.6799972131261757, + "grad_norm": 1.054715871810913, + "learning_rate": 1.0629533613596162e-05, + "loss": 0.09983444213867188, + "step": 4880 + }, + { + "epoch": 0.68013655681739, + "grad_norm": 0.5964725613594055, + "learning_rate": 1.0621206242012936e-05, + "loss": 0.090240478515625, + "step": 4881 + }, + { + "epoch": 0.6802759005086044, + "grad_norm": 0.350365549325943, + "learning_rate": 1.061288095418355e-05, + "loss": 0.0809011459350586, + "step": 4882 + }, + { + "epoch": 0.6804152441998188, + "grad_norm": 0.4745481014251709, + "learning_rate": 1.060455775195771e-05, + "loss": 0.08615493774414062, + "step": 4883 + }, + { + "epoch": 0.6805545878910332, + "grad_norm": 0.5190402865409851, + "learning_rate": 1.0596236637184631e-05, + "loss": 0.09444427490234375, + "step": 4884 + }, + { + "epoch": 0.6806939315822477, + "grad_norm": 0.6106507778167725, + "learning_rate": 1.058791761171309e-05, + "loss": 0.08579063415527344, + "step": 4885 + }, + { + "epoch": 0.680833275273462, + "grad_norm": 0.35820189118385315, + "learning_rate": 1.0579600677391375e-05, + "loss": 0.07704353332519531, + "step": 4886 + }, + { + "epoch": 0.6809726189646764, + "grad_norm": 0.6800050735473633, + "learning_rate": 1.0571285836067308e-05, + "loss": 0.11551570892333984, + "step": 4887 + }, + { + "epoch": 0.6811119626558908, + "grad_norm": 0.3523734211921692, + "learning_rate": 1.0562973089588278e-05, + "loss": 0.09112167358398438, + "step": 4888 + }, + { + "epoch": 0.6812513063471052, + "grad_norm": 0.43590158224105835, + "learning_rate": 1.0554662439801167e-05, + "loss": 0.09220218658447266, + "step": 4889 + }, + { + "epoch": 0.6813906500383196, + "grad_norm": 0.4077061414718628, + "learning_rate": 1.0546353888552418e-05, + "loss": 0.07097911834716797, + "step": 4890 + }, + { + "epoch": 0.6815299937295339, + "grad_norm": 0.3789897561073303, + "learning_rate": 1.0538047437688018e-05, + "loss": 0.09045886993408203, + "step": 4891 + }, + { + "epoch": 0.6816693374207483, + "grad_norm": 0.36919641494750977, + "learning_rate": 1.0529743089053452e-05, + "loss": 0.07605934143066406, + "step": 4892 + }, + { + "epoch": 0.6818086811119627, + "grad_norm": 0.5043835043907166, + "learning_rate": 1.0521440844493758e-05, + "loss": 0.10205459594726562, + "step": 4893 + }, + { + "epoch": 0.6819480248031771, + "grad_norm": 0.43772369623184204, + "learning_rate": 1.0513140705853506e-05, + "loss": 0.0878448486328125, + "step": 4894 + }, + { + "epoch": 0.6820873684943914, + "grad_norm": 0.4903828501701355, + "learning_rate": 1.0504842674976811e-05, + "loss": 0.08113110065460205, + "step": 4895 + }, + { + "epoch": 0.6822267121856058, + "grad_norm": 0.3065198063850403, + "learning_rate": 1.0496546753707295e-05, + "loss": 0.07199287414550781, + "step": 4896 + }, + { + "epoch": 0.6823660558768202, + "grad_norm": 0.44263017177581787, + "learning_rate": 1.0488252943888115e-05, + "loss": 0.09791088104248047, + "step": 4897 + }, + { + "epoch": 0.6825053995680346, + "grad_norm": 0.4418509304523468, + "learning_rate": 1.0479961247361974e-05, + "loss": 0.08722114562988281, + "step": 4898 + }, + { + "epoch": 0.682644743259249, + "grad_norm": 0.2716187834739685, + "learning_rate": 1.0471671665971104e-05, + "loss": 0.07792472839355469, + "step": 4899 + }, + { + "epoch": 0.6827840869504633, + "grad_norm": 0.42599907517433167, + "learning_rate": 1.0463384201557248e-05, + "loss": 0.0762186050415039, + "step": 4900 + }, + { + "epoch": 0.6829234306416777, + "grad_norm": 0.33162686228752136, + "learning_rate": 1.0455098855961705e-05, + "loss": 0.07582998275756836, + "step": 4901 + }, + { + "epoch": 0.6830627743328921, + "grad_norm": 0.4612365663051605, + "learning_rate": 1.0446815631025276e-05, + "loss": 0.08341121673583984, + "step": 4902 + }, + { + "epoch": 0.6832021180241065, + "grad_norm": 0.49601200222969055, + "learning_rate": 1.0438534528588319e-05, + "loss": 0.07816123962402344, + "step": 4903 + }, + { + "epoch": 0.6833414617153208, + "grad_norm": 0.5382335782051086, + "learning_rate": 1.0430255550490686e-05, + "loss": 0.08054733276367188, + "step": 4904 + }, + { + "epoch": 0.6834808054065352, + "grad_norm": 0.44220927357673645, + "learning_rate": 1.0421978698571791e-05, + "loss": 0.08390045166015625, + "step": 4905 + }, + { + "epoch": 0.6836201490977496, + "grad_norm": 0.5817963480949402, + "learning_rate": 1.0413703974670568e-05, + "loss": 0.10630416870117188, + "step": 4906 + }, + { + "epoch": 0.683759492788964, + "grad_norm": 0.3828657567501068, + "learning_rate": 1.0405431380625461e-05, + "loss": 0.08700752258300781, + "step": 4907 + }, + { + "epoch": 0.6838988364801784, + "grad_norm": 0.35533827543258667, + "learning_rate": 1.0397160918274447e-05, + "loss": 0.07447433471679688, + "step": 4908 + }, + { + "epoch": 0.6840381801713927, + "grad_norm": 0.5968199372291565, + "learning_rate": 1.038889258945504e-05, + "loss": 0.09697914123535156, + "step": 4909 + }, + { + "epoch": 0.6841775238626071, + "grad_norm": 0.6424338817596436, + "learning_rate": 1.0380626396004282e-05, + "loss": 0.08841133117675781, + "step": 4910 + }, + { + "epoch": 0.6843168675538215, + "grad_norm": 0.36700108647346497, + "learning_rate": 1.0372362339758717e-05, + "loss": 0.06962871551513672, + "step": 4911 + }, + { + "epoch": 0.6844562112450359, + "grad_norm": 0.5379570722579956, + "learning_rate": 1.0364100422554445e-05, + "loss": 0.08821773529052734, + "step": 4912 + }, + { + "epoch": 0.6845955549362502, + "grad_norm": 0.6226271390914917, + "learning_rate": 1.0355840646227063e-05, + "loss": 0.11317825317382812, + "step": 4913 + }, + { + "epoch": 0.6847348986274646, + "grad_norm": 0.37019434571266174, + "learning_rate": 1.0347583012611713e-05, + "loss": 0.07421684265136719, + "step": 4914 + }, + { + "epoch": 0.684874242318679, + "grad_norm": 0.8287169933319092, + "learning_rate": 1.0339327523543043e-05, + "loss": 0.12447357177734375, + "step": 4915 + }, + { + "epoch": 0.6850135860098934, + "grad_norm": 0.4877064824104309, + "learning_rate": 1.0331074180855243e-05, + "loss": 0.09601020812988281, + "step": 4916 + }, + { + "epoch": 0.6851529297011077, + "grad_norm": 0.6059339642524719, + "learning_rate": 1.0322822986382026e-05, + "loss": 0.0975198745727539, + "step": 4917 + }, + { + "epoch": 0.6852922733923221, + "grad_norm": 0.6569458842277527, + "learning_rate": 1.0314573941956593e-05, + "loss": 0.07892704010009766, + "step": 4918 + }, + { + "epoch": 0.6854316170835365, + "grad_norm": 0.48729026317596436, + "learning_rate": 1.0306327049411711e-05, + "loss": 0.09211349487304688, + "step": 4919 + }, + { + "epoch": 0.6855709607747509, + "grad_norm": 0.6288434863090515, + "learning_rate": 1.0298082310579653e-05, + "loss": 0.08209085464477539, + "step": 4920 + }, + { + "epoch": 0.6857103044659653, + "grad_norm": 0.4092393219470978, + "learning_rate": 1.0289839727292202e-05, + "loss": 0.0966024398803711, + "step": 4921 + }, + { + "epoch": 0.6858496481571796, + "grad_norm": 0.3225848972797394, + "learning_rate": 1.0281599301380676e-05, + "loss": 0.07741880416870117, + "step": 4922 + }, + { + "epoch": 0.685988991848394, + "grad_norm": 0.41943222284317017, + "learning_rate": 1.0273361034675915e-05, + "loss": 0.08322715759277344, + "step": 4923 + }, + { + "epoch": 0.6861283355396084, + "grad_norm": 0.5733731389045715, + "learning_rate": 1.0265124929008272e-05, + "loss": 0.09089279174804688, + "step": 4924 + }, + { + "epoch": 0.6862676792308228, + "grad_norm": 0.3759693205356598, + "learning_rate": 1.0256890986207612e-05, + "loss": 0.0852212905883789, + "step": 4925 + }, + { + "epoch": 0.6864070229220373, + "grad_norm": 0.5175313353538513, + "learning_rate": 1.0248659208103336e-05, + "loss": 0.09533882141113281, + "step": 4926 + }, + { + "epoch": 0.6865463666132516, + "grad_norm": 0.5275512337684631, + "learning_rate": 1.0240429596524361e-05, + "loss": 0.10393905639648438, + "step": 4927 + }, + { + "epoch": 0.686685710304466, + "grad_norm": 0.485390305519104, + "learning_rate": 1.0232202153299118e-05, + "loss": 0.08108901977539062, + "step": 4928 + }, + { + "epoch": 0.6868250539956804, + "grad_norm": 0.354256808757782, + "learning_rate": 1.0223976880255546e-05, + "loss": 0.09452438354492188, + "step": 4929 + }, + { + "epoch": 0.6869643976868948, + "grad_norm": 0.32568225264549255, + "learning_rate": 1.0215753779221119e-05, + "loss": 0.08383941650390625, + "step": 4930 + }, + { + "epoch": 0.6871037413781091, + "grad_norm": 0.5351448655128479, + "learning_rate": 1.0207532852022833e-05, + "loss": 0.09213829040527344, + "step": 4931 + }, + { + "epoch": 0.6872430850693235, + "grad_norm": 0.430264413356781, + "learning_rate": 1.0199314100487175e-05, + "loss": 0.08803939819335938, + "step": 4932 + }, + { + "epoch": 0.6873824287605379, + "grad_norm": 0.7111284732818604, + "learning_rate": 1.0191097526440177e-05, + "loss": 0.10280227661132812, + "step": 4933 + }, + { + "epoch": 0.6875217724517523, + "grad_norm": 0.3984617590904236, + "learning_rate": 1.0182883131707357e-05, + "loss": 0.08910560607910156, + "step": 4934 + }, + { + "epoch": 0.6876611161429667, + "grad_norm": 0.44057589769363403, + "learning_rate": 1.017467091811379e-05, + "loss": 0.09091377258300781, + "step": 4935 + }, + { + "epoch": 0.687800459834181, + "grad_norm": 0.4026084840297699, + "learning_rate": 1.0166460887484018e-05, + "loss": 0.08025407791137695, + "step": 4936 + }, + { + "epoch": 0.6879398035253954, + "grad_norm": 0.5428236126899719, + "learning_rate": 1.0158253041642132e-05, + "loss": 0.09525871276855469, + "step": 4937 + }, + { + "epoch": 0.6880791472166098, + "grad_norm": 0.5206599831581116, + "learning_rate": 1.0150047382411738e-05, + "loss": 0.09323692321777344, + "step": 4938 + }, + { + "epoch": 0.6882184909078242, + "grad_norm": 0.5373588800430298, + "learning_rate": 1.0141843911615938e-05, + "loss": 0.1096343994140625, + "step": 4939 + }, + { + "epoch": 0.6883578345990385, + "grad_norm": 0.4324110448360443, + "learning_rate": 1.0133642631077348e-05, + "loss": 0.1007048487663269, + "step": 4940 + }, + { + "epoch": 0.6884971782902529, + "grad_norm": 0.8235194087028503, + "learning_rate": 1.0125443542618112e-05, + "loss": 0.09360027313232422, + "step": 4941 + }, + { + "epoch": 0.6886365219814673, + "grad_norm": 0.4011608064174652, + "learning_rate": 1.0117246648059888e-05, + "loss": 0.07843828201293945, + "step": 4942 + }, + { + "epoch": 0.6887758656726817, + "grad_norm": 0.3621940016746521, + "learning_rate": 1.0109051949223825e-05, + "loss": 0.07955074310302734, + "step": 4943 + }, + { + "epoch": 0.688915209363896, + "grad_norm": 0.3738308548927307, + "learning_rate": 1.0100859447930614e-05, + "loss": 0.08170890808105469, + "step": 4944 + }, + { + "epoch": 0.6890545530551104, + "grad_norm": 0.7535528540611267, + "learning_rate": 1.0092669146000422e-05, + "loss": 0.11267948150634766, + "step": 4945 + }, + { + "epoch": 0.6891938967463248, + "grad_norm": 0.411353200674057, + "learning_rate": 1.0084481045252965e-05, + "loss": 0.07584190368652344, + "step": 4946 + }, + { + "epoch": 0.6893332404375392, + "grad_norm": 0.6197094321250916, + "learning_rate": 1.0076295147507437e-05, + "loss": 0.10156822204589844, + "step": 4947 + }, + { + "epoch": 0.6894725841287536, + "grad_norm": 0.744252622127533, + "learning_rate": 1.0068111454582565e-05, + "loss": 0.09397506713867188, + "step": 4948 + }, + { + "epoch": 0.6896119278199679, + "grad_norm": 0.4573446810245514, + "learning_rate": 1.0059929968296597e-05, + "loss": 0.08273911476135254, + "step": 4949 + }, + { + "epoch": 0.6897512715111823, + "grad_norm": 0.37505486607551575, + "learning_rate": 1.005175069046724e-05, + "loss": 0.0882720947265625, + "step": 4950 + }, + { + "epoch": 0.6898906152023967, + "grad_norm": 0.5237414240837097, + "learning_rate": 1.004357362291175e-05, + "loss": 0.09243202209472656, + "step": 4951 + }, + { + "epoch": 0.6900299588936111, + "grad_norm": 0.3145963251590729, + "learning_rate": 1.0035398767446897e-05, + "loss": 0.0698404312133789, + "step": 4952 + }, + { + "epoch": 0.6901693025848255, + "grad_norm": 0.42751795053482056, + "learning_rate": 1.0027226125888951e-05, + "loss": 0.07943344116210938, + "step": 4953 + }, + { + "epoch": 0.6903086462760398, + "grad_norm": 0.305731862783432, + "learning_rate": 1.0019055700053677e-05, + "loss": 0.0751185417175293, + "step": 4954 + }, + { + "epoch": 0.6904479899672542, + "grad_norm": 0.5141036510467529, + "learning_rate": 1.0010887491756347e-05, + "loss": 0.09037208557128906, + "step": 4955 + }, + { + "epoch": 0.6905873336584686, + "grad_norm": 0.45883065462112427, + "learning_rate": 1.0002721502811772e-05, + "loss": 0.09326744079589844, + "step": 4956 + }, + { + "epoch": 0.690726677349683, + "grad_norm": 0.6168180704116821, + "learning_rate": 9.994557735034227e-06, + "loss": 0.10165977478027344, + "step": 4957 + }, + { + "epoch": 0.6908660210408973, + "grad_norm": 0.47111988067626953, + "learning_rate": 9.986396190237526e-06, + "loss": 0.07856941223144531, + "step": 4958 + }, + { + "epoch": 0.6910053647321117, + "grad_norm": 0.5934584140777588, + "learning_rate": 9.978236870234984e-06, + "loss": 0.11830520629882812, + "step": 4959 + }, + { + "epoch": 0.6911447084233261, + "grad_norm": 0.39835435152053833, + "learning_rate": 9.970079776839412e-06, + "loss": 0.08369827270507812, + "step": 4960 + }, + { + "epoch": 0.6912840521145405, + "grad_norm": 0.31611040234565735, + "learning_rate": 9.961924911863117e-06, + "loss": 0.07748603820800781, + "step": 4961 + }, + { + "epoch": 0.6914233958057548, + "grad_norm": 0.5248616933822632, + "learning_rate": 9.953772277117933e-06, + "loss": 0.10153007507324219, + "step": 4962 + }, + { + "epoch": 0.6915627394969692, + "grad_norm": 0.4352099895477295, + "learning_rate": 9.945621874415197e-06, + "loss": 0.0853424072265625, + "step": 4963 + }, + { + "epoch": 0.6917020831881836, + "grad_norm": 0.31712427735328674, + "learning_rate": 9.937473705565728e-06, + "loss": 0.07956790924072266, + "step": 4964 + }, + { + "epoch": 0.691841426879398, + "grad_norm": 0.4393404722213745, + "learning_rate": 9.929327772379877e-06, + "loss": 0.09547805786132812, + "step": 4965 + }, + { + "epoch": 0.6919807705706125, + "grad_norm": 0.6064100861549377, + "learning_rate": 9.921184076667472e-06, + "loss": 0.096435546875, + "step": 4966 + }, + { + "epoch": 0.6921201142618268, + "grad_norm": 0.4758002460002899, + "learning_rate": 9.913042620237868e-06, + "loss": 0.07592487335205078, + "step": 4967 + }, + { + "epoch": 0.6922594579530412, + "grad_norm": 0.42808112502098083, + "learning_rate": 9.904903404899898e-06, + "loss": 0.09575843811035156, + "step": 4968 + }, + { + "epoch": 0.6923988016442556, + "grad_norm": 0.26728254556655884, + "learning_rate": 9.896766432461914e-06, + "loss": 0.07071208953857422, + "step": 4969 + }, + { + "epoch": 0.69253814533547, + "grad_norm": 0.44129127264022827, + "learning_rate": 9.88863170473178e-06, + "loss": 0.09729766845703125, + "step": 4970 + }, + { + "epoch": 0.6926774890266844, + "grad_norm": 0.3642127811908722, + "learning_rate": 9.880499223516831e-06, + "loss": 0.07175731658935547, + "step": 4971 + }, + { + "epoch": 0.6928168327178987, + "grad_norm": 0.5492430329322815, + "learning_rate": 9.872368990623915e-06, + "loss": 0.08669090270996094, + "step": 4972 + }, + { + "epoch": 0.6929561764091131, + "grad_norm": 0.44477763772010803, + "learning_rate": 9.864241007859392e-06, + "loss": 0.0979604721069336, + "step": 4973 + }, + { + "epoch": 0.6930955201003275, + "grad_norm": 0.49836841225624084, + "learning_rate": 9.856115277029123e-06, + "loss": 0.10128211975097656, + "step": 4974 + }, + { + "epoch": 0.6932348637915419, + "grad_norm": 0.9149117469787598, + "learning_rate": 9.84799179993845e-06, + "loss": 0.10821914672851562, + "step": 4975 + }, + { + "epoch": 0.6933742074827562, + "grad_norm": 0.4232766330242157, + "learning_rate": 9.839870578392216e-06, + "loss": 0.081390380859375, + "step": 4976 + }, + { + "epoch": 0.6935135511739706, + "grad_norm": 0.6353781819343567, + "learning_rate": 9.83175161419478e-06, + "loss": 0.0884408950805664, + "step": 4977 + }, + { + "epoch": 0.693652894865185, + "grad_norm": 0.3699425458908081, + "learning_rate": 9.82363490915e-06, + "loss": 0.08887863159179688, + "step": 4978 + }, + { + "epoch": 0.6937922385563994, + "grad_norm": 0.3399544358253479, + "learning_rate": 9.815520465061201e-06, + "loss": 0.07466888427734375, + "step": 4979 + }, + { + "epoch": 0.6939315822476138, + "grad_norm": 0.37842652201652527, + "learning_rate": 9.807408283731244e-06, + "loss": 0.09053230285644531, + "step": 4980 + }, + { + "epoch": 0.6940709259388281, + "grad_norm": 0.44792795181274414, + "learning_rate": 9.799298366962478e-06, + "loss": 0.06544780731201172, + "step": 4981 + }, + { + "epoch": 0.6942102696300425, + "grad_norm": 0.42617154121398926, + "learning_rate": 9.791190716556713e-06, + "loss": 0.09526252746582031, + "step": 4982 + }, + { + "epoch": 0.6943496133212569, + "grad_norm": 0.5011293888092041, + "learning_rate": 9.7830853343153e-06, + "loss": 0.11033821105957031, + "step": 4983 + }, + { + "epoch": 0.6944889570124713, + "grad_norm": 0.46361684799194336, + "learning_rate": 9.774982222039072e-06, + "loss": 0.09849834442138672, + "step": 4984 + }, + { + "epoch": 0.6946283007036856, + "grad_norm": 0.42204809188842773, + "learning_rate": 9.766881381528357e-06, + "loss": 0.08873748779296875, + "step": 4985 + }, + { + "epoch": 0.6947676443949, + "grad_norm": 0.40922337770462036, + "learning_rate": 9.758782814582977e-06, + "loss": 0.08296394348144531, + "step": 4986 + }, + { + "epoch": 0.6949069880861144, + "grad_norm": 0.4351853132247925, + "learning_rate": 9.750686523002233e-06, + "loss": 0.08374500274658203, + "step": 4987 + }, + { + "epoch": 0.6950463317773288, + "grad_norm": 0.39439326524734497, + "learning_rate": 9.742592508584958e-06, + "loss": 0.08825492858886719, + "step": 4988 + }, + { + "epoch": 0.6951856754685432, + "grad_norm": 0.2650200128555298, + "learning_rate": 9.734500773129438e-06, + "loss": 0.0665731430053711, + "step": 4989 + }, + { + "epoch": 0.6953250191597575, + "grad_norm": 0.5532058477401733, + "learning_rate": 9.726411318433482e-06, + "loss": 0.08968734741210938, + "step": 4990 + }, + { + "epoch": 0.6954643628509719, + "grad_norm": 0.42671260237693787, + "learning_rate": 9.71832414629439e-06, + "loss": 0.08704566955566406, + "step": 4991 + }, + { + "epoch": 0.6956037065421863, + "grad_norm": 0.3177131712436676, + "learning_rate": 9.710239258508935e-06, + "loss": 0.07975292205810547, + "step": 4992 + }, + { + "epoch": 0.6957430502334007, + "grad_norm": 0.501865804195404, + "learning_rate": 9.702156656873391e-06, + "loss": 0.0872836709022522, + "step": 4993 + }, + { + "epoch": 0.695882393924615, + "grad_norm": 0.3947516977787018, + "learning_rate": 9.694076343183534e-06, + "loss": 0.09610176086425781, + "step": 4994 + }, + { + "epoch": 0.6960217376158294, + "grad_norm": 0.484899640083313, + "learning_rate": 9.685998319234634e-06, + "loss": 0.1005697250366211, + "step": 4995 + }, + { + "epoch": 0.6961610813070438, + "grad_norm": 0.7951743602752686, + "learning_rate": 9.677922586821434e-06, + "loss": 0.13402748107910156, + "step": 4996 + }, + { + "epoch": 0.6963004249982582, + "grad_norm": 0.4846981167793274, + "learning_rate": 9.669849147738171e-06, + "loss": 0.09032821655273438, + "step": 4997 + }, + { + "epoch": 0.6964397686894725, + "grad_norm": 0.2980306148529053, + "learning_rate": 9.661778003778583e-06, + "loss": 0.07182884216308594, + "step": 4998 + }, + { + "epoch": 0.6965791123806869, + "grad_norm": 0.6199526786804199, + "learning_rate": 9.653709156735908e-06, + "loss": 0.10417938232421875, + "step": 4999 + }, + { + "epoch": 0.6967184560719013, + "grad_norm": 0.9498071670532227, + "learning_rate": 9.64564260840284e-06, + "loss": 0.12453792989253998, + "step": 5000 + }, + { + "epoch": 0.6968577997631157, + "grad_norm": 0.47210821509361267, + "learning_rate": 9.63757836057159e-06, + "loss": 0.0981292724609375, + "step": 5001 + }, + { + "epoch": 0.6969971434543301, + "grad_norm": 0.3370165228843689, + "learning_rate": 9.629516415033859e-06, + "loss": 0.06581687927246094, + "step": 5002 + }, + { + "epoch": 0.6971364871455444, + "grad_norm": 0.41654297709465027, + "learning_rate": 9.621456773580817e-06, + "loss": 0.07712411880493164, + "step": 5003 + }, + { + "epoch": 0.6972758308367588, + "grad_norm": 0.5962006449699402, + "learning_rate": 9.613399438003128e-06, + "loss": 0.09293556213378906, + "step": 5004 + }, + { + "epoch": 0.6974151745279732, + "grad_norm": 0.40792980790138245, + "learning_rate": 9.605344410090954e-06, + "loss": 0.08778762817382812, + "step": 5005 + }, + { + "epoch": 0.6975545182191877, + "grad_norm": 0.6431137323379517, + "learning_rate": 9.597291691633942e-06, + "loss": 0.10729026794433594, + "step": 5006 + }, + { + "epoch": 0.6976938619104021, + "grad_norm": 0.4836854338645935, + "learning_rate": 9.589241284421221e-06, + "loss": 0.08517742156982422, + "step": 5007 + }, + { + "epoch": 0.6978332056016164, + "grad_norm": 0.3867589235305786, + "learning_rate": 9.581193190241398e-06, + "loss": 0.0928487777709961, + "step": 5008 + }, + { + "epoch": 0.6979725492928308, + "grad_norm": 0.36555495858192444, + "learning_rate": 9.57314741088258e-06, + "loss": 0.08622550964355469, + "step": 5009 + }, + { + "epoch": 0.6981118929840452, + "grad_norm": 0.476261705160141, + "learning_rate": 9.565103948132368e-06, + "loss": 0.0862884521484375, + "step": 5010 + }, + { + "epoch": 0.6982512366752596, + "grad_norm": 0.4168708026409149, + "learning_rate": 9.557062803777817e-06, + "loss": 0.08778762817382812, + "step": 5011 + }, + { + "epoch": 0.698390580366474, + "grad_norm": 0.32795649766921997, + "learning_rate": 9.549023979605503e-06, + "loss": 0.07866859436035156, + "step": 5012 + }, + { + "epoch": 0.6985299240576883, + "grad_norm": 0.5222555994987488, + "learning_rate": 9.540987477401454e-06, + "loss": 0.08030891418457031, + "step": 5013 + }, + { + "epoch": 0.6986692677489027, + "grad_norm": 0.4774562120437622, + "learning_rate": 9.53295329895121e-06, + "loss": 0.08321857452392578, + "step": 5014 + }, + { + "epoch": 0.6988086114401171, + "grad_norm": 0.388527512550354, + "learning_rate": 9.52492144603977e-06, + "loss": 0.07991981506347656, + "step": 5015 + }, + { + "epoch": 0.6989479551313315, + "grad_norm": 0.4385647773742676, + "learning_rate": 9.516891920451634e-06, + "loss": 0.08371925354003906, + "step": 5016 + }, + { + "epoch": 0.6990872988225458, + "grad_norm": 0.4272889792919159, + "learning_rate": 9.50886472397079e-06, + "loss": 0.08510017395019531, + "step": 5017 + }, + { + "epoch": 0.6992266425137602, + "grad_norm": 0.46888938546180725, + "learning_rate": 9.500839858380684e-06, + "loss": 0.09125709533691406, + "step": 5018 + }, + { + "epoch": 0.6993659862049746, + "grad_norm": 0.46798235177993774, + "learning_rate": 9.492817325464256e-06, + "loss": 0.09340286254882812, + "step": 5019 + }, + { + "epoch": 0.699505329896189, + "grad_norm": 0.5127155780792236, + "learning_rate": 9.484797127003942e-06, + "loss": 0.08962774276733398, + "step": 5020 + }, + { + "epoch": 0.6996446735874033, + "grad_norm": 0.4248080551624298, + "learning_rate": 9.476779264781633e-06, + "loss": 0.07092761993408203, + "step": 5021 + }, + { + "epoch": 0.6997840172786177, + "grad_norm": 0.3427429497241974, + "learning_rate": 9.468763740578721e-06, + "loss": 0.07021370530128479, + "step": 5022 + }, + { + "epoch": 0.6999233609698321, + "grad_norm": 0.6201679706573486, + "learning_rate": 9.460750556176085e-06, + "loss": 0.10910940170288086, + "step": 5023 + }, + { + "epoch": 0.7000627046610465, + "grad_norm": 0.7650402188301086, + "learning_rate": 9.452739713354055e-06, + "loss": 0.09401702880859375, + "step": 5024 + }, + { + "epoch": 0.7002020483522609, + "grad_norm": 0.5455679297447205, + "learning_rate": 9.444731213892458e-06, + "loss": 0.085174560546875, + "step": 5025 + }, + { + "epoch": 0.7003413920434752, + "grad_norm": 0.4520156979560852, + "learning_rate": 9.436725059570605e-06, + "loss": 0.09481430053710938, + "step": 5026 + }, + { + "epoch": 0.7004807357346896, + "grad_norm": 0.6139205694198608, + "learning_rate": 9.428721252167286e-06, + "loss": 0.10841679573059082, + "step": 5027 + }, + { + "epoch": 0.700620079425904, + "grad_norm": 0.9074677228927612, + "learning_rate": 9.420719793460758e-06, + "loss": 0.12820863723754883, + "step": 5028 + }, + { + "epoch": 0.7007594231171184, + "grad_norm": 0.6901739835739136, + "learning_rate": 9.412720685228755e-06, + "loss": 0.10890674591064453, + "step": 5029 + }, + { + "epoch": 0.7008987668083327, + "grad_norm": 0.5894668698310852, + "learning_rate": 9.404723929248507e-06, + "loss": 0.09566688537597656, + "step": 5030 + }, + { + "epoch": 0.7010381104995471, + "grad_norm": 0.532041609287262, + "learning_rate": 9.396729527296712e-06, + "loss": 0.08476638793945312, + "step": 5031 + }, + { + "epoch": 0.7011774541907615, + "grad_norm": 0.33270666003227234, + "learning_rate": 9.388737481149534e-06, + "loss": 0.07440185546875, + "step": 5032 + }, + { + "epoch": 0.7013167978819759, + "grad_norm": 0.39028748869895935, + "learning_rate": 9.380747792582635e-06, + "loss": 0.07753944396972656, + "step": 5033 + }, + { + "epoch": 0.7014561415731903, + "grad_norm": 0.44581934809684753, + "learning_rate": 9.372760463371127e-06, + "loss": 0.09003114700317383, + "step": 5034 + }, + { + "epoch": 0.7015954852644046, + "grad_norm": 0.6159247756004333, + "learning_rate": 9.364775495289628e-06, + "loss": 0.08996200561523438, + "step": 5035 + }, + { + "epoch": 0.701734828955619, + "grad_norm": 0.33452674746513367, + "learning_rate": 9.3567928901122e-06, + "loss": 0.07396125793457031, + "step": 5036 + }, + { + "epoch": 0.7018741726468334, + "grad_norm": 0.3589850962162018, + "learning_rate": 9.348812649612404e-06, + "loss": 0.09365081787109375, + "step": 5037 + }, + { + "epoch": 0.7020135163380478, + "grad_norm": 0.5276636481285095, + "learning_rate": 9.340834775563275e-06, + "loss": 0.08651161193847656, + "step": 5038 + }, + { + "epoch": 0.7021528600292621, + "grad_norm": 0.409375935792923, + "learning_rate": 9.332859269737303e-06, + "loss": 0.08272933959960938, + "step": 5039 + }, + { + "epoch": 0.7022922037204765, + "grad_norm": 0.3644188940525055, + "learning_rate": 9.32488613390646e-06, + "loss": 0.08662796020507812, + "step": 5040 + }, + { + "epoch": 0.7024315474116909, + "grad_norm": 0.5965469479560852, + "learning_rate": 9.316915369842201e-06, + "loss": 0.08487129211425781, + "step": 5041 + }, + { + "epoch": 0.7025708911029053, + "grad_norm": 0.383035272359848, + "learning_rate": 9.308946979315456e-06, + "loss": 0.09245491027832031, + "step": 5042 + }, + { + "epoch": 0.7027102347941196, + "grad_norm": 0.45325547456741333, + "learning_rate": 9.300980964096604e-06, + "loss": 0.09212589263916016, + "step": 5043 + }, + { + "epoch": 0.702849578485334, + "grad_norm": 0.3766212463378906, + "learning_rate": 9.293017325955524e-06, + "loss": 0.07500076293945312, + "step": 5044 + }, + { + "epoch": 0.7029889221765484, + "grad_norm": 0.575051486492157, + "learning_rate": 9.285056066661547e-06, + "loss": 0.10393142700195312, + "step": 5045 + }, + { + "epoch": 0.7031282658677629, + "grad_norm": 0.3270750641822815, + "learning_rate": 9.277097187983489e-06, + "loss": 0.07072257995605469, + "step": 5046 + }, + { + "epoch": 0.7032676095589773, + "grad_norm": 0.4950430691242218, + "learning_rate": 9.269140691689622e-06, + "loss": 0.08747673034667969, + "step": 5047 + }, + { + "epoch": 0.7034069532501916, + "grad_norm": 0.38087350130081177, + "learning_rate": 9.261186579547703e-06, + "loss": 0.09443283081054688, + "step": 5048 + }, + { + "epoch": 0.703546296941406, + "grad_norm": 0.5877243876457214, + "learning_rate": 9.253234853324968e-06, + "loss": 0.10091590881347656, + "step": 5049 + }, + { + "epoch": 0.7036856406326204, + "grad_norm": 0.483977735042572, + "learning_rate": 9.245285514788082e-06, + "loss": 0.09866619110107422, + "step": 5050 + }, + { + "epoch": 0.7038249843238348, + "grad_norm": 0.4206375777721405, + "learning_rate": 9.237338565703222e-06, + "loss": 0.10135650634765625, + "step": 5051 + }, + { + "epoch": 0.7039643280150492, + "grad_norm": 0.765223503112793, + "learning_rate": 9.229394007836017e-06, + "loss": 0.10506820678710938, + "step": 5052 + }, + { + "epoch": 0.7041036717062635, + "grad_norm": 0.48186278343200684, + "learning_rate": 9.221451842951572e-06, + "loss": 0.08937358856201172, + "step": 5053 + }, + { + "epoch": 0.7042430153974779, + "grad_norm": 0.4771873354911804, + "learning_rate": 9.21351207281445e-06, + "loss": 0.09938240051269531, + "step": 5054 + }, + { + "epoch": 0.7043823590886923, + "grad_norm": 0.5808351039886475, + "learning_rate": 9.205574699188677e-06, + "loss": 0.09616661071777344, + "step": 5055 + }, + { + "epoch": 0.7045217027799067, + "grad_norm": 0.536364734172821, + "learning_rate": 9.197639723837775e-06, + "loss": 0.07604789733886719, + "step": 5056 + }, + { + "epoch": 0.704661046471121, + "grad_norm": 0.3179204761981964, + "learning_rate": 9.189707148524697e-06, + "loss": 0.06556510925292969, + "step": 5057 + }, + { + "epoch": 0.7048003901623354, + "grad_norm": 0.46217232942581177, + "learning_rate": 9.181776975011882e-06, + "loss": 0.09809112548828125, + "step": 5058 + }, + { + "epoch": 0.7049397338535498, + "grad_norm": 0.3891226649284363, + "learning_rate": 9.173849205061251e-06, + "loss": 0.07259368896484375, + "step": 5059 + }, + { + "epoch": 0.7050790775447642, + "grad_norm": 0.2736787497997284, + "learning_rate": 9.165923840434162e-06, + "loss": 0.07279586791992188, + "step": 5060 + }, + { + "epoch": 0.7052184212359786, + "grad_norm": 0.6428344249725342, + "learning_rate": 9.15800088289144e-06, + "loss": 0.11197280883789062, + "step": 5061 + }, + { + "epoch": 0.7053577649271929, + "grad_norm": 0.4593438506126404, + "learning_rate": 9.150080334193394e-06, + "loss": 0.08545684814453125, + "step": 5062 + }, + { + "epoch": 0.7054971086184073, + "grad_norm": 0.41464754939079285, + "learning_rate": 9.142162196099799e-06, + "loss": 0.08229255676269531, + "step": 5063 + }, + { + "epoch": 0.7056364523096217, + "grad_norm": 0.49528393149375916, + "learning_rate": 9.134246470369868e-06, + "loss": 0.08466529846191406, + "step": 5064 + }, + { + "epoch": 0.7057757960008361, + "grad_norm": 0.5297635793685913, + "learning_rate": 9.126333158762309e-06, + "loss": 0.09029388427734375, + "step": 5065 + }, + { + "epoch": 0.7059151396920504, + "grad_norm": 0.3564441502094269, + "learning_rate": 9.118422263035264e-06, + "loss": 0.07944297790527344, + "step": 5066 + }, + { + "epoch": 0.7060544833832648, + "grad_norm": 0.7454593181610107, + "learning_rate": 9.110513784946368e-06, + "loss": 0.08115077018737793, + "step": 5067 + }, + { + "epoch": 0.7061938270744792, + "grad_norm": 0.5070212483406067, + "learning_rate": 9.102607726252692e-06, + "loss": 0.07251167297363281, + "step": 5068 + }, + { + "epoch": 0.7063331707656936, + "grad_norm": 0.6646909713745117, + "learning_rate": 9.094704088710788e-06, + "loss": 0.0895233154296875, + "step": 5069 + }, + { + "epoch": 0.706472514456908, + "grad_norm": 0.3520609736442566, + "learning_rate": 9.08680287407667e-06, + "loss": 0.07454299926757812, + "step": 5070 + }, + { + "epoch": 0.7066118581481223, + "grad_norm": 0.42911022901535034, + "learning_rate": 9.078904084105802e-06, + "loss": 0.09709548950195312, + "step": 5071 + }, + { + "epoch": 0.7067512018393367, + "grad_norm": 0.43758425116539, + "learning_rate": 9.071007720553104e-06, + "loss": 0.09111785888671875, + "step": 5072 + }, + { + "epoch": 0.7068905455305511, + "grad_norm": 0.3653225600719452, + "learning_rate": 9.06311378517298e-06, + "loss": 0.07593727111816406, + "step": 5073 + }, + { + "epoch": 0.7070298892217655, + "grad_norm": 0.44395583868026733, + "learning_rate": 9.055222279719284e-06, + "loss": 0.07247328758239746, + "step": 5074 + }, + { + "epoch": 0.7071692329129798, + "grad_norm": 0.52788245677948, + "learning_rate": 9.047333205945318e-06, + "loss": 0.08943939208984375, + "step": 5075 + }, + { + "epoch": 0.7073085766041942, + "grad_norm": 0.5728692412376404, + "learning_rate": 9.039446565603868e-06, + "loss": 0.09558391571044922, + "step": 5076 + }, + { + "epoch": 0.7074479202954086, + "grad_norm": 0.5029249787330627, + "learning_rate": 9.03156236044715e-06, + "loss": 0.0928487777709961, + "step": 5077 + }, + { + "epoch": 0.707587263986623, + "grad_norm": 0.3800397515296936, + "learning_rate": 9.023680592226868e-06, + "loss": 0.0763106644153595, + "step": 5078 + }, + { + "epoch": 0.7077266076778373, + "grad_norm": 0.4359884262084961, + "learning_rate": 9.015801262694157e-06, + "loss": 0.09386062622070312, + "step": 5079 + }, + { + "epoch": 0.7078659513690517, + "grad_norm": 0.5880823731422424, + "learning_rate": 9.007924373599634e-06, + "loss": 0.08549308776855469, + "step": 5080 + }, + { + "epoch": 0.7080052950602661, + "grad_norm": 0.4931927025318146, + "learning_rate": 9.000049926693375e-06, + "loss": 0.07737350463867188, + "step": 5081 + }, + { + "epoch": 0.7081446387514805, + "grad_norm": 0.5485877990722656, + "learning_rate": 8.992177923724876e-06, + "loss": 0.1060791015625, + "step": 5082 + }, + { + "epoch": 0.7082839824426949, + "grad_norm": 0.7244998216629028, + "learning_rate": 8.98430836644313e-06, + "loss": 0.08716392517089844, + "step": 5083 + }, + { + "epoch": 0.7084233261339092, + "grad_norm": 0.9154658913612366, + "learning_rate": 8.97644125659657e-06, + "loss": 0.09538078308105469, + "step": 5084 + }, + { + "epoch": 0.7085626698251236, + "grad_norm": 0.5552160143852234, + "learning_rate": 8.968576595933098e-06, + "loss": 0.08850860595703125, + "step": 5085 + }, + { + "epoch": 0.7087020135163381, + "grad_norm": 0.45515260100364685, + "learning_rate": 8.960714386200056e-06, + "loss": 0.07755661010742188, + "step": 5086 + }, + { + "epoch": 0.7088413572075525, + "grad_norm": 0.46047306060791016, + "learning_rate": 8.95285462914424e-06, + "loss": 0.10073661804199219, + "step": 5087 + }, + { + "epoch": 0.7089807008987669, + "grad_norm": 0.4763648509979248, + "learning_rate": 8.94499732651192e-06, + "loss": 0.09157943725585938, + "step": 5088 + }, + { + "epoch": 0.7091200445899812, + "grad_norm": 0.5105217695236206, + "learning_rate": 8.937142480048797e-06, + "loss": 0.08606147766113281, + "step": 5089 + }, + { + "epoch": 0.7092593882811956, + "grad_norm": 0.38656941056251526, + "learning_rate": 8.929290091500045e-06, + "loss": 0.0873565673828125, + "step": 5090 + }, + { + "epoch": 0.70939873197241, + "grad_norm": 0.405166894197464, + "learning_rate": 8.921440162610295e-06, + "loss": 0.0823373794555664, + "step": 5091 + }, + { + "epoch": 0.7095380756636244, + "grad_norm": 0.544793426990509, + "learning_rate": 8.913592695123613e-06, + "loss": 0.08838891983032227, + "step": 5092 + }, + { + "epoch": 0.7096774193548387, + "grad_norm": 0.6402265429496765, + "learning_rate": 8.905747690783517e-06, + "loss": 0.09179306030273438, + "step": 5093 + }, + { + "epoch": 0.7098167630460531, + "grad_norm": 0.4001820385456085, + "learning_rate": 8.897905151333002e-06, + "loss": 0.06304383277893066, + "step": 5094 + }, + { + "epoch": 0.7099561067372675, + "grad_norm": 0.37823423743247986, + "learning_rate": 8.890065078514503e-06, + "loss": 0.07881355285644531, + "step": 5095 + }, + { + "epoch": 0.7100954504284819, + "grad_norm": 0.3658105432987213, + "learning_rate": 8.882227474069892e-06, + "loss": 0.07260513305664062, + "step": 5096 + }, + { + "epoch": 0.7102347941196963, + "grad_norm": 0.5410409569740295, + "learning_rate": 8.874392339740518e-06, + "loss": 0.09079933166503906, + "step": 5097 + }, + { + "epoch": 0.7103741378109106, + "grad_norm": 0.6404775977134705, + "learning_rate": 8.866559677267162e-06, + "loss": 0.09413909912109375, + "step": 5098 + }, + { + "epoch": 0.710513481502125, + "grad_norm": 0.3952350914478302, + "learning_rate": 8.858729488390068e-06, + "loss": 0.0760335922241211, + "step": 5099 + }, + { + "epoch": 0.7106528251933394, + "grad_norm": 0.6001894474029541, + "learning_rate": 8.850901774848916e-06, + "loss": 0.08688735961914062, + "step": 5100 + }, + { + "epoch": 0.7107921688845538, + "grad_norm": 0.37381792068481445, + "learning_rate": 8.843076538382853e-06, + "loss": 0.0790548324584961, + "step": 5101 + }, + { + "epoch": 0.7109315125757681, + "grad_norm": 0.4668661653995514, + "learning_rate": 8.835253780730472e-06, + "loss": 0.09211921691894531, + "step": 5102 + }, + { + "epoch": 0.7110708562669825, + "grad_norm": 0.425717294216156, + "learning_rate": 8.827433503629805e-06, + "loss": 0.08949470520019531, + "step": 5103 + }, + { + "epoch": 0.7112101999581969, + "grad_norm": 0.5845116376876831, + "learning_rate": 8.819615708818335e-06, + "loss": 0.0958242416381836, + "step": 5104 + }, + { + "epoch": 0.7113495436494113, + "grad_norm": 0.3730688989162445, + "learning_rate": 8.811800398032999e-06, + "loss": 0.08557307720184326, + "step": 5105 + }, + { + "epoch": 0.7114888873406257, + "grad_norm": 0.28642502427101135, + "learning_rate": 8.803987573010191e-06, + "loss": 0.06844711303710938, + "step": 5106 + }, + { + "epoch": 0.71162823103184, + "grad_norm": 0.4769069254398346, + "learning_rate": 8.796177235485736e-06, + "loss": 0.08262896537780762, + "step": 5107 + }, + { + "epoch": 0.7117675747230544, + "grad_norm": 0.392580509185791, + "learning_rate": 8.788369387194904e-06, + "loss": 0.08881568908691406, + "step": 5108 + }, + { + "epoch": 0.7119069184142688, + "grad_norm": 0.5337678790092468, + "learning_rate": 8.78056402987243e-06, + "loss": 0.09454917907714844, + "step": 5109 + }, + { + "epoch": 0.7120462621054832, + "grad_norm": 0.3828212320804596, + "learning_rate": 8.772761165252488e-06, + "loss": 0.0731048583984375, + "step": 5110 + }, + { + "epoch": 0.7121856057966975, + "grad_norm": 0.45485517382621765, + "learning_rate": 8.76496079506869e-06, + "loss": 0.0859832763671875, + "step": 5111 + }, + { + "epoch": 0.7123249494879119, + "grad_norm": 0.6468502283096313, + "learning_rate": 8.757162921054099e-06, + "loss": 0.09781265258789062, + "step": 5112 + }, + { + "epoch": 0.7124642931791263, + "grad_norm": 0.6272056698799133, + "learning_rate": 8.749367544941238e-06, + "loss": 0.08749043941497803, + "step": 5113 + }, + { + "epoch": 0.7126036368703407, + "grad_norm": 0.49464842677116394, + "learning_rate": 8.741574668462053e-06, + "loss": 0.07564449310302734, + "step": 5114 + }, + { + "epoch": 0.712742980561555, + "grad_norm": 0.9293956160545349, + "learning_rate": 8.733784293347934e-06, + "loss": 0.1070699691772461, + "step": 5115 + }, + { + "epoch": 0.7128823242527694, + "grad_norm": 0.45822668075561523, + "learning_rate": 8.725996421329733e-06, + "loss": 0.07889175415039062, + "step": 5116 + }, + { + "epoch": 0.7130216679439838, + "grad_norm": 0.40463462471961975, + "learning_rate": 8.718211054137744e-06, + "loss": 0.07540225982666016, + "step": 5117 + }, + { + "epoch": 0.7131610116351982, + "grad_norm": 0.4614403247833252, + "learning_rate": 8.710428193501692e-06, + "loss": 0.09105300903320312, + "step": 5118 + }, + { + "epoch": 0.7133003553264126, + "grad_norm": 0.5098798274993896, + "learning_rate": 8.702647841150743e-06, + "loss": 0.0820932388305664, + "step": 5119 + }, + { + "epoch": 0.7134396990176269, + "grad_norm": 0.4748972952365875, + "learning_rate": 8.694869998813527e-06, + "loss": 0.09016036987304688, + "step": 5120 + }, + { + "epoch": 0.7135790427088413, + "grad_norm": 0.3339550793170929, + "learning_rate": 8.68709466821809e-06, + "loss": 0.07823562622070312, + "step": 5121 + }, + { + "epoch": 0.7137183864000557, + "grad_norm": 0.6159592866897583, + "learning_rate": 8.67932185109194e-06, + "loss": 0.09028816223144531, + "step": 5122 + }, + { + "epoch": 0.7138577300912701, + "grad_norm": 1.0369088649749756, + "learning_rate": 8.671551549162025e-06, + "loss": 0.10052299499511719, + "step": 5123 + }, + { + "epoch": 0.7139970737824844, + "grad_norm": 0.4241698086261749, + "learning_rate": 8.663783764154726e-06, + "loss": 0.0866851806640625, + "step": 5124 + }, + { + "epoch": 0.7141364174736988, + "grad_norm": 0.6971359252929688, + "learning_rate": 8.656018497795855e-06, + "loss": 0.09955787658691406, + "step": 5125 + }, + { + "epoch": 0.7142757611649132, + "grad_norm": 0.7341123223304749, + "learning_rate": 8.648255751810686e-06, + "loss": 0.10316085815429688, + "step": 5126 + }, + { + "epoch": 0.7144151048561277, + "grad_norm": 0.3731667101383209, + "learning_rate": 8.640495527923931e-06, + "loss": 0.07238960266113281, + "step": 5127 + }, + { + "epoch": 0.7145544485473421, + "grad_norm": 0.5063349604606628, + "learning_rate": 8.632737827859729e-06, + "loss": 0.08659076690673828, + "step": 5128 + }, + { + "epoch": 0.7146937922385564, + "grad_norm": 0.4976820945739746, + "learning_rate": 8.624982653341656e-06, + "loss": 0.09751319885253906, + "step": 5129 + }, + { + "epoch": 0.7148331359297708, + "grad_norm": 0.4228106737136841, + "learning_rate": 8.61723000609274e-06, + "loss": 0.08514213562011719, + "step": 5130 + }, + { + "epoch": 0.7149724796209852, + "grad_norm": 0.8059839606285095, + "learning_rate": 8.609479887835453e-06, + "loss": 0.10127830505371094, + "step": 5131 + }, + { + "epoch": 0.7151118233121996, + "grad_norm": 0.5300250053405762, + "learning_rate": 8.601732300291674e-06, + "loss": 0.09047317504882812, + "step": 5132 + }, + { + "epoch": 0.715251167003414, + "grad_norm": 0.7335849404335022, + "learning_rate": 8.593987245182754e-06, + "loss": 0.11800861358642578, + "step": 5133 + }, + { + "epoch": 0.7153905106946283, + "grad_norm": 0.43254444003105164, + "learning_rate": 8.586244724229471e-06, + "loss": 0.0892934799194336, + "step": 5134 + }, + { + "epoch": 0.7155298543858427, + "grad_norm": 0.4125460386276245, + "learning_rate": 8.57850473915203e-06, + "loss": 0.087005615234375, + "step": 5135 + }, + { + "epoch": 0.7156691980770571, + "grad_norm": 0.3539395034313202, + "learning_rate": 8.57076729167007e-06, + "loss": 0.08222389221191406, + "step": 5136 + }, + { + "epoch": 0.7158085417682715, + "grad_norm": 0.4764385521411896, + "learning_rate": 8.563032383502685e-06, + "loss": 0.11162471771240234, + "step": 5137 + }, + { + "epoch": 0.7159478854594858, + "grad_norm": 0.8727583885192871, + "learning_rate": 8.555300016368403e-06, + "loss": 0.10814285278320312, + "step": 5138 + }, + { + "epoch": 0.7160872291507002, + "grad_norm": 0.3728167712688446, + "learning_rate": 8.547570191985168e-06, + "loss": 0.08281898498535156, + "step": 5139 + }, + { + "epoch": 0.7162265728419146, + "grad_norm": 0.5188648700714111, + "learning_rate": 8.539842912070367e-06, + "loss": 0.08571815490722656, + "step": 5140 + }, + { + "epoch": 0.716365916533129, + "grad_norm": 0.3792186975479126, + "learning_rate": 8.532118178340829e-06, + "loss": 0.08323478698730469, + "step": 5141 + }, + { + "epoch": 0.7165052602243434, + "grad_norm": 0.45863211154937744, + "learning_rate": 8.524395992512827e-06, + "loss": 0.08736419677734375, + "step": 5142 + }, + { + "epoch": 0.7166446039155577, + "grad_norm": 0.4258401393890381, + "learning_rate": 8.516676356302031e-06, + "loss": 0.08084583282470703, + "step": 5143 + }, + { + "epoch": 0.7167839476067721, + "grad_norm": 0.8365738391876221, + "learning_rate": 8.508959271423589e-06, + "loss": 0.09392356872558594, + "step": 5144 + }, + { + "epoch": 0.7169232912979865, + "grad_norm": 0.5846844911575317, + "learning_rate": 8.501244739592045e-06, + "loss": 0.09545135498046875, + "step": 5145 + }, + { + "epoch": 0.7170626349892009, + "grad_norm": 0.5495191216468811, + "learning_rate": 8.493532762521406e-06, + "loss": 0.09902667999267578, + "step": 5146 + }, + { + "epoch": 0.7172019786804152, + "grad_norm": 0.35728830099105835, + "learning_rate": 8.485823341925084e-06, + "loss": 0.07121992111206055, + "step": 5147 + }, + { + "epoch": 0.7173413223716296, + "grad_norm": 0.40019741654396057, + "learning_rate": 8.47811647951594e-06, + "loss": 0.07990074157714844, + "step": 5148 + }, + { + "epoch": 0.717480666062844, + "grad_norm": 0.5911238193511963, + "learning_rate": 8.470412177006281e-06, + "loss": 0.08901214599609375, + "step": 5149 + }, + { + "epoch": 0.7176200097540584, + "grad_norm": 0.4313996732234955, + "learning_rate": 8.462710436107796e-06, + "loss": 0.08643341064453125, + "step": 5150 + }, + { + "epoch": 0.7177593534452728, + "grad_norm": 0.4302591383457184, + "learning_rate": 8.455011258531653e-06, + "loss": 0.08691120147705078, + "step": 5151 + }, + { + "epoch": 0.7178986971364871, + "grad_norm": 0.42827126383781433, + "learning_rate": 8.44731464598843e-06, + "loss": 0.07576560974121094, + "step": 5152 + }, + { + "epoch": 0.7180380408277015, + "grad_norm": 0.5268940329551697, + "learning_rate": 8.439620600188147e-06, + "loss": 0.11186790466308594, + "step": 5153 + }, + { + "epoch": 0.7181773845189159, + "grad_norm": 0.5756856799125671, + "learning_rate": 8.431929122840234e-06, + "loss": 0.10568046569824219, + "step": 5154 + }, + { + "epoch": 0.7183167282101303, + "grad_norm": 0.3535471260547638, + "learning_rate": 8.424240215653571e-06, + "loss": 0.08204269409179688, + "step": 5155 + }, + { + "epoch": 0.7184560719013446, + "grad_norm": 0.3967357873916626, + "learning_rate": 8.416553880336456e-06, + "loss": 0.08353281021118164, + "step": 5156 + }, + { + "epoch": 0.718595415592559, + "grad_norm": 0.42426976561546326, + "learning_rate": 8.408870118596606e-06, + "loss": 0.08526039123535156, + "step": 5157 + }, + { + "epoch": 0.7187347592837734, + "grad_norm": 0.6807617545127869, + "learning_rate": 8.401188932141184e-06, + "loss": 0.10141658782958984, + "step": 5158 + }, + { + "epoch": 0.7188741029749878, + "grad_norm": 0.33121445775032043, + "learning_rate": 8.393510322676784e-06, + "loss": 0.07322502136230469, + "step": 5159 + }, + { + "epoch": 0.7190134466662021, + "grad_norm": 0.4040040373802185, + "learning_rate": 8.385834291909409e-06, + "loss": 0.09046173095703125, + "step": 5160 + }, + { + "epoch": 0.7191527903574165, + "grad_norm": 0.3986842632293701, + "learning_rate": 8.378160841544493e-06, + "loss": 0.08370780944824219, + "step": 5161 + }, + { + "epoch": 0.7192921340486309, + "grad_norm": 0.9034318327903748, + "learning_rate": 8.370489973286907e-06, + "loss": 0.1002054214477539, + "step": 5162 + }, + { + "epoch": 0.7194314777398453, + "grad_norm": 0.5921790599822998, + "learning_rate": 8.362821688840947e-06, + "loss": 0.10031890869140625, + "step": 5163 + }, + { + "epoch": 0.7195708214310597, + "grad_norm": 1.0242414474487305, + "learning_rate": 8.355155989910322e-06, + "loss": 0.10923165082931519, + "step": 5164 + }, + { + "epoch": 0.719710165122274, + "grad_norm": 0.39333462715148926, + "learning_rate": 8.347492878198185e-06, + "loss": 0.08797073364257812, + "step": 5165 + }, + { + "epoch": 0.7198495088134884, + "grad_norm": 0.3989627957344055, + "learning_rate": 8.339832355407093e-06, + "loss": 0.07514715194702148, + "step": 5166 + }, + { + "epoch": 0.7199888525047029, + "grad_norm": 0.4480408728122711, + "learning_rate": 8.332174423239052e-06, + "loss": 0.08056831359863281, + "step": 5167 + }, + { + "epoch": 0.7201281961959173, + "grad_norm": 0.5003963112831116, + "learning_rate": 8.324519083395467e-06, + "loss": 0.08579635620117188, + "step": 5168 + }, + { + "epoch": 0.7202675398871317, + "grad_norm": 0.5166139006614685, + "learning_rate": 8.316866337577185e-06, + "loss": 0.080413818359375, + "step": 5169 + }, + { + "epoch": 0.720406883578346, + "grad_norm": 0.8948585987091064, + "learning_rate": 8.309216187484482e-06, + "loss": 0.10316658020019531, + "step": 5170 + }, + { + "epoch": 0.7205462272695604, + "grad_norm": 0.6629568338394165, + "learning_rate": 8.301568634817034e-06, + "loss": 0.09470939636230469, + "step": 5171 + }, + { + "epoch": 0.7206855709607748, + "grad_norm": 0.5002240538597107, + "learning_rate": 8.29392368127395e-06, + "loss": 0.06810760498046875, + "step": 5172 + }, + { + "epoch": 0.7208249146519892, + "grad_norm": 0.8545056581497192, + "learning_rate": 8.286281328553769e-06, + "loss": 0.11447715759277344, + "step": 5173 + }, + { + "epoch": 0.7209642583432035, + "grad_norm": 0.3955659866333008, + "learning_rate": 8.278641578354453e-06, + "loss": 0.08530426025390625, + "step": 5174 + }, + { + "epoch": 0.7211036020344179, + "grad_norm": 0.6484873294830322, + "learning_rate": 8.271004432373372e-06, + "loss": 0.08505058288574219, + "step": 5175 + }, + { + "epoch": 0.7212429457256323, + "grad_norm": 0.6358329057693481, + "learning_rate": 8.263369892307334e-06, + "loss": 0.08373105525970459, + "step": 5176 + }, + { + "epoch": 0.7213822894168467, + "grad_norm": 0.6404874920845032, + "learning_rate": 8.255737959852548e-06, + "loss": 0.10569190979003906, + "step": 5177 + }, + { + "epoch": 0.7215216331080611, + "grad_norm": 0.7770054936408997, + "learning_rate": 8.248108636704666e-06, + "loss": 0.10272407531738281, + "step": 5178 + }, + { + "epoch": 0.7216609767992754, + "grad_norm": 0.6536928415298462, + "learning_rate": 8.240481924558739e-06, + "loss": 0.10441207885742188, + "step": 5179 + }, + { + "epoch": 0.7218003204904898, + "grad_norm": 0.667496919631958, + "learning_rate": 8.232857825109256e-06, + "loss": 0.09195709228515625, + "step": 5180 + }, + { + "epoch": 0.7219396641817042, + "grad_norm": 0.39885979890823364, + "learning_rate": 8.225236340050127e-06, + "loss": 0.08759689331054688, + "step": 5181 + }, + { + "epoch": 0.7220790078729186, + "grad_norm": 0.4013884961605072, + "learning_rate": 8.217617471074648e-06, + "loss": 0.07915496826171875, + "step": 5182 + }, + { + "epoch": 0.7222183515641329, + "grad_norm": 0.3844227194786072, + "learning_rate": 8.210001219875569e-06, + "loss": 0.08176612854003906, + "step": 5183 + }, + { + "epoch": 0.7223576952553473, + "grad_norm": 0.44441312551498413, + "learning_rate": 8.202387588145051e-06, + "loss": 0.08437156677246094, + "step": 5184 + }, + { + "epoch": 0.7224970389465617, + "grad_norm": 0.4364369213581085, + "learning_rate": 8.194776577574673e-06, + "loss": 0.08353281021118164, + "step": 5185 + }, + { + "epoch": 0.7226363826377761, + "grad_norm": 0.4178652763366699, + "learning_rate": 8.187168189855421e-06, + "loss": 0.09909820556640625, + "step": 5186 + }, + { + "epoch": 0.7227757263289905, + "grad_norm": 0.3977736532688141, + "learning_rate": 8.179562426677699e-06, + "loss": 0.08158111572265625, + "step": 5187 + }, + { + "epoch": 0.7229150700202048, + "grad_norm": 0.34981605410575867, + "learning_rate": 8.171959289731348e-06, + "loss": 0.07401657104492188, + "step": 5188 + }, + { + "epoch": 0.7230544137114192, + "grad_norm": 0.5822229385375977, + "learning_rate": 8.164358780705596e-06, + "loss": 0.09788954257965088, + "step": 5189 + }, + { + "epoch": 0.7231937574026336, + "grad_norm": 0.5450789928436279, + "learning_rate": 8.156760901289111e-06, + "loss": 0.08370780944824219, + "step": 5190 + }, + { + "epoch": 0.723333101093848, + "grad_norm": 0.6158302426338196, + "learning_rate": 8.149165653169976e-06, + "loss": 0.08121013641357422, + "step": 5191 + }, + { + "epoch": 0.7234724447850623, + "grad_norm": 0.605323851108551, + "learning_rate": 8.141573038035675e-06, + "loss": 0.09774017333984375, + "step": 5192 + }, + { + "epoch": 0.7236117884762767, + "grad_norm": 0.9145119786262512, + "learning_rate": 8.133983057573103e-06, + "loss": 0.13277053833007812, + "step": 5193 + }, + { + "epoch": 0.7237511321674911, + "grad_norm": 0.5585616827011108, + "learning_rate": 8.12639571346859e-06, + "loss": 0.08920478820800781, + "step": 5194 + }, + { + "epoch": 0.7238904758587055, + "grad_norm": 0.38698846101760864, + "learning_rate": 8.118811007407878e-06, + "loss": 0.06668663024902344, + "step": 5195 + }, + { + "epoch": 0.7240298195499199, + "grad_norm": 0.5076373219490051, + "learning_rate": 8.111228941076101e-06, + "loss": 0.0821990966796875, + "step": 5196 + }, + { + "epoch": 0.7241691632411342, + "grad_norm": 0.3955576419830322, + "learning_rate": 8.103649516157835e-06, + "loss": 0.08299827575683594, + "step": 5197 + }, + { + "epoch": 0.7243085069323486, + "grad_norm": 0.39606791734695435, + "learning_rate": 8.096072734337042e-06, + "loss": 0.07647323608398438, + "step": 5198 + }, + { + "epoch": 0.724447850623563, + "grad_norm": 0.4175029993057251, + "learning_rate": 8.088498597297121e-06, + "loss": 0.09475326538085938, + "step": 5199 + }, + { + "epoch": 0.7245871943147774, + "grad_norm": 0.3849242925643921, + "learning_rate": 8.080927106720862e-06, + "loss": 0.07956695556640625, + "step": 5200 + }, + { + "epoch": 0.7247265380059917, + "grad_norm": 0.48727357387542725, + "learning_rate": 8.073358264290483e-06, + "loss": 0.07887077331542969, + "step": 5201 + }, + { + "epoch": 0.7248658816972061, + "grad_norm": 0.42423245310783386, + "learning_rate": 8.065792071687615e-06, + "loss": 0.07625961303710938, + "step": 5202 + }, + { + "epoch": 0.7250052253884205, + "grad_norm": 0.45898333191871643, + "learning_rate": 8.058228530593283e-06, + "loss": 0.08777999877929688, + "step": 5203 + }, + { + "epoch": 0.7251445690796349, + "grad_norm": 0.5169390439987183, + "learning_rate": 8.050667642687933e-06, + "loss": 0.09668922424316406, + "step": 5204 + }, + { + "epoch": 0.7252839127708492, + "grad_norm": 0.5897969603538513, + "learning_rate": 8.043109409651424e-06, + "loss": 0.09189796447753906, + "step": 5205 + }, + { + "epoch": 0.7254232564620636, + "grad_norm": 0.47747132182121277, + "learning_rate": 8.03555383316303e-06, + "loss": 0.08708572387695312, + "step": 5206 + }, + { + "epoch": 0.7255626001532781, + "grad_norm": 0.5636628866195679, + "learning_rate": 8.028000914901422e-06, + "loss": 0.10120010375976562, + "step": 5207 + }, + { + "epoch": 0.7257019438444925, + "grad_norm": 0.45742765069007874, + "learning_rate": 8.020450656544679e-06, + "loss": 0.09696197509765625, + "step": 5208 + }, + { + "epoch": 0.7258412875357069, + "grad_norm": 0.549927294254303, + "learning_rate": 8.012903059770301e-06, + "loss": 0.08673381805419922, + "step": 5209 + }, + { + "epoch": 0.7259806312269212, + "grad_norm": 0.6156222820281982, + "learning_rate": 8.005358126255199e-06, + "loss": 0.08244752883911133, + "step": 5210 + }, + { + "epoch": 0.7261199749181356, + "grad_norm": 0.39128848910331726, + "learning_rate": 7.997815857675673e-06, + "loss": 0.08571052551269531, + "step": 5211 + }, + { + "epoch": 0.72625931860935, + "grad_norm": 0.6438534259796143, + "learning_rate": 7.990276255707449e-06, + "loss": 0.11410331726074219, + "step": 5212 + }, + { + "epoch": 0.7263986623005644, + "grad_norm": 0.5822881460189819, + "learning_rate": 7.982739322025663e-06, + "loss": 0.07949638366699219, + "step": 5213 + }, + { + "epoch": 0.7265380059917788, + "grad_norm": 0.4482503831386566, + "learning_rate": 7.97520505830484e-06, + "loss": 0.08034706115722656, + "step": 5214 + }, + { + "epoch": 0.7266773496829931, + "grad_norm": 0.6491795182228088, + "learning_rate": 7.967673466218914e-06, + "loss": 0.11669158935546875, + "step": 5215 + }, + { + "epoch": 0.7268166933742075, + "grad_norm": 0.37761032581329346, + "learning_rate": 7.960144547441242e-06, + "loss": 0.08394432067871094, + "step": 5216 + }, + { + "epoch": 0.7269560370654219, + "grad_norm": 0.38031429052352905, + "learning_rate": 7.952618303644584e-06, + "loss": 0.0922393798828125, + "step": 5217 + }, + { + "epoch": 0.7270953807566363, + "grad_norm": 0.3551389276981354, + "learning_rate": 7.945094736501094e-06, + "loss": 0.06945919990539551, + "step": 5218 + }, + { + "epoch": 0.7272347244478506, + "grad_norm": 0.4762639105319977, + "learning_rate": 7.937573847682325e-06, + "loss": 0.08401250839233398, + "step": 5219 + }, + { + "epoch": 0.727374068139065, + "grad_norm": 0.6587998867034912, + "learning_rate": 7.930055638859267e-06, + "loss": 0.09933185577392578, + "step": 5220 + }, + { + "epoch": 0.7275134118302794, + "grad_norm": 0.47685685753822327, + "learning_rate": 7.922540111702275e-06, + "loss": 0.09202861785888672, + "step": 5221 + }, + { + "epoch": 0.7276527555214938, + "grad_norm": 0.4172642230987549, + "learning_rate": 7.915027267881139e-06, + "loss": 0.08376312255859375, + "step": 5222 + }, + { + "epoch": 0.7277920992127082, + "grad_norm": 0.8764542937278748, + "learning_rate": 7.907517109065046e-06, + "loss": 0.09502792358398438, + "step": 5223 + }, + { + "epoch": 0.7279314429039225, + "grad_norm": 0.3517884612083435, + "learning_rate": 7.900009636922576e-06, + "loss": 0.08377265930175781, + "step": 5224 + }, + { + "epoch": 0.7280707865951369, + "grad_norm": 0.4839680790901184, + "learning_rate": 7.89250485312171e-06, + "loss": 0.08839797973632812, + "step": 5225 + }, + { + "epoch": 0.7282101302863513, + "grad_norm": 0.4758159816265106, + "learning_rate": 7.885002759329845e-06, + "loss": 0.09103202819824219, + "step": 5226 + }, + { + "epoch": 0.7283494739775657, + "grad_norm": 0.41642919182777405, + "learning_rate": 7.877503357213787e-06, + "loss": 0.08762550354003906, + "step": 5227 + }, + { + "epoch": 0.72848881766878, + "grad_norm": 0.5246632695198059, + "learning_rate": 7.870006648439712e-06, + "loss": 0.10814285278320312, + "step": 5228 + }, + { + "epoch": 0.7286281613599944, + "grad_norm": 0.4119716286659241, + "learning_rate": 7.862512634673237e-06, + "loss": 0.08575057983398438, + "step": 5229 + }, + { + "epoch": 0.7287675050512088, + "grad_norm": 0.31454992294311523, + "learning_rate": 7.855021317579341e-06, + "loss": 0.07938575744628906, + "step": 5230 + }, + { + "epoch": 0.7289068487424232, + "grad_norm": 0.5025148391723633, + "learning_rate": 7.847532698822442e-06, + "loss": 0.08319854736328125, + "step": 5231 + }, + { + "epoch": 0.7290461924336376, + "grad_norm": 0.43763041496276855, + "learning_rate": 7.840046780066325e-06, + "loss": 0.08753776550292969, + "step": 5232 + }, + { + "epoch": 0.7291855361248519, + "grad_norm": 0.42719095945358276, + "learning_rate": 7.832563562974196e-06, + "loss": 0.1043548583984375, + "step": 5233 + }, + { + "epoch": 0.7293248798160663, + "grad_norm": 0.384253591299057, + "learning_rate": 7.825083049208665e-06, + "loss": 0.08589553833007812, + "step": 5234 + }, + { + "epoch": 0.7294642235072807, + "grad_norm": 0.4512743055820465, + "learning_rate": 7.817605240431718e-06, + "loss": 0.09764289855957031, + "step": 5235 + }, + { + "epoch": 0.7296035671984951, + "grad_norm": 0.5818983912467957, + "learning_rate": 7.810130138304755e-06, + "loss": 0.10161495208740234, + "step": 5236 + }, + { + "epoch": 0.7297429108897094, + "grad_norm": 0.6889768242835999, + "learning_rate": 7.802657744488575e-06, + "loss": 0.08957099914550781, + "step": 5237 + }, + { + "epoch": 0.7298822545809238, + "grad_norm": 0.5630994439125061, + "learning_rate": 7.79518806064338e-06, + "loss": 0.09891271591186523, + "step": 5238 + }, + { + "epoch": 0.7300215982721382, + "grad_norm": 0.2852418124675751, + "learning_rate": 7.78772108842876e-06, + "loss": 0.06783103942871094, + "step": 5239 + }, + { + "epoch": 0.7301609419633526, + "grad_norm": 0.40932878851890564, + "learning_rate": 7.780256829503692e-06, + "loss": 0.08333206176757812, + "step": 5240 + }, + { + "epoch": 0.730300285654567, + "grad_norm": 0.49563559889793396, + "learning_rate": 7.772795285526578e-06, + "loss": 0.07872772216796875, + "step": 5241 + }, + { + "epoch": 0.7304396293457813, + "grad_norm": 0.47276851534843445, + "learning_rate": 7.765336458155205e-06, + "loss": 0.09760379791259766, + "step": 5242 + }, + { + "epoch": 0.7305789730369957, + "grad_norm": 0.35983914136886597, + "learning_rate": 7.757880349046742e-06, + "loss": 0.07945442199707031, + "step": 5243 + }, + { + "epoch": 0.7307183167282101, + "grad_norm": 0.47039732336997986, + "learning_rate": 7.750426959857782e-06, + "loss": 0.09909439086914062, + "step": 5244 + }, + { + "epoch": 0.7308576604194245, + "grad_norm": 0.3493168354034424, + "learning_rate": 7.74297629224428e-06, + "loss": 0.08067893981933594, + "step": 5245 + }, + { + "epoch": 0.7309970041106388, + "grad_norm": 0.5753120183944702, + "learning_rate": 7.735528347861623e-06, + "loss": 0.08496665954589844, + "step": 5246 + }, + { + "epoch": 0.7311363478018533, + "grad_norm": 0.40772366523742676, + "learning_rate": 7.728083128364555e-06, + "loss": 0.08683013916015625, + "step": 5247 + }, + { + "epoch": 0.7312756914930677, + "grad_norm": 0.8427327871322632, + "learning_rate": 7.720640635407244e-06, + "loss": 0.11534500122070312, + "step": 5248 + }, + { + "epoch": 0.7314150351842821, + "grad_norm": 0.43415459990501404, + "learning_rate": 7.713200870643246e-06, + "loss": 0.08569145202636719, + "step": 5249 + }, + { + "epoch": 0.7315543788754965, + "grad_norm": 0.4906989634037018, + "learning_rate": 7.705763835725507e-06, + "loss": 0.11303329467773438, + "step": 5250 + }, + { + "epoch": 0.7316937225667108, + "grad_norm": 0.5665615797042847, + "learning_rate": 7.69832953230635e-06, + "loss": 0.10128498077392578, + "step": 5251 + }, + { + "epoch": 0.7318330662579252, + "grad_norm": 0.5099902153015137, + "learning_rate": 7.69089796203752e-06, + "loss": 0.09873008728027344, + "step": 5252 + }, + { + "epoch": 0.7319724099491396, + "grad_norm": 0.4898900091648102, + "learning_rate": 7.683469126570152e-06, + "loss": 0.09905052185058594, + "step": 5253 + }, + { + "epoch": 0.732111753640354, + "grad_norm": 0.6865333318710327, + "learning_rate": 7.67604302755474e-06, + "loss": 0.11147689819335938, + "step": 5254 + }, + { + "epoch": 0.7322510973315683, + "grad_norm": 0.5050451159477234, + "learning_rate": 7.668619666641216e-06, + "loss": 0.1072683334350586, + "step": 5255 + }, + { + "epoch": 0.7323904410227827, + "grad_norm": 0.4831269085407257, + "learning_rate": 7.661199045478874e-06, + "loss": 0.07719993591308594, + "step": 5256 + }, + { + "epoch": 0.7325297847139971, + "grad_norm": 0.2882479727268219, + "learning_rate": 7.653781165716396e-06, + "loss": 0.06712710857391357, + "step": 5257 + }, + { + "epoch": 0.7326691284052115, + "grad_norm": 0.4119734466075897, + "learning_rate": 7.646366029001873e-06, + "loss": 0.07137870788574219, + "step": 5258 + }, + { + "epoch": 0.7328084720964259, + "grad_norm": 0.8076150417327881, + "learning_rate": 7.638953636982789e-06, + "loss": 0.10402870178222656, + "step": 5259 + }, + { + "epoch": 0.7329478157876402, + "grad_norm": 0.41021502017974854, + "learning_rate": 7.631543991305998e-06, + "loss": 0.08690261840820312, + "step": 5260 + }, + { + "epoch": 0.7330871594788546, + "grad_norm": 0.45197373628616333, + "learning_rate": 7.62413709361775e-06, + "loss": 0.08919715881347656, + "step": 5261 + }, + { + "epoch": 0.733226503170069, + "grad_norm": 0.4819374680519104, + "learning_rate": 7.616732945563692e-06, + "loss": 0.07941818237304688, + "step": 5262 + }, + { + "epoch": 0.7333658468612834, + "grad_norm": 0.3713396489620209, + "learning_rate": 7.609331548788865e-06, + "loss": 0.08527183532714844, + "step": 5263 + }, + { + "epoch": 0.7335051905524977, + "grad_norm": 0.6965938210487366, + "learning_rate": 7.601932904937679e-06, + "loss": 0.10346317291259766, + "step": 5264 + }, + { + "epoch": 0.7336445342437121, + "grad_norm": 0.4710712730884552, + "learning_rate": 7.594537015653949e-06, + "loss": 0.08458709716796875, + "step": 5265 + }, + { + "epoch": 0.7337838779349265, + "grad_norm": 0.4169505536556244, + "learning_rate": 7.5871438825808786e-06, + "loss": 0.08943367004394531, + "step": 5266 + }, + { + "epoch": 0.7339232216261409, + "grad_norm": 0.31423258781433105, + "learning_rate": 7.579753507361048e-06, + "loss": 0.07550430297851562, + "step": 5267 + }, + { + "epoch": 0.7340625653173553, + "grad_norm": 0.38406407833099365, + "learning_rate": 7.572365891636422e-06, + "loss": 0.09285640716552734, + "step": 5268 + }, + { + "epoch": 0.7342019090085696, + "grad_norm": 0.4820592403411865, + "learning_rate": 7.5649810370483666e-06, + "loss": 0.09186172485351562, + "step": 5269 + }, + { + "epoch": 0.734341252699784, + "grad_norm": 0.36332079768180847, + "learning_rate": 7.557598945237634e-06, + "loss": 0.08140945434570312, + "step": 5270 + }, + { + "epoch": 0.7344805963909984, + "grad_norm": 0.5391075015068054, + "learning_rate": 7.550219617844354e-06, + "loss": 0.08659172058105469, + "step": 5271 + }, + { + "epoch": 0.7346199400822128, + "grad_norm": 0.5827850103378296, + "learning_rate": 7.542843056508034e-06, + "loss": 0.10856056213378906, + "step": 5272 + }, + { + "epoch": 0.7347592837734271, + "grad_norm": 0.3578699827194214, + "learning_rate": 7.535469262867583e-06, + "loss": 0.07623672485351562, + "step": 5273 + }, + { + "epoch": 0.7348986274646415, + "grad_norm": 0.40833979845046997, + "learning_rate": 7.528098238561301e-06, + "loss": 0.0971221923828125, + "step": 5274 + }, + { + "epoch": 0.7350379711558559, + "grad_norm": 0.559473991394043, + "learning_rate": 7.520729985226842e-06, + "loss": 0.06947040557861328, + "step": 5275 + }, + { + "epoch": 0.7351773148470703, + "grad_norm": 0.41584286093711853, + "learning_rate": 7.513364504501283e-06, + "loss": 0.08779239654541016, + "step": 5276 + }, + { + "epoch": 0.7353166585382847, + "grad_norm": 0.44246241450309753, + "learning_rate": 7.506001798021049e-06, + "loss": 0.09627723693847656, + "step": 5277 + }, + { + "epoch": 0.735456002229499, + "grad_norm": 0.470561683177948, + "learning_rate": 7.498641867421981e-06, + "loss": 0.08346748352050781, + "step": 5278 + }, + { + "epoch": 0.7355953459207134, + "grad_norm": 0.35676249861717224, + "learning_rate": 7.4912847143392706e-06, + "loss": 0.09025096893310547, + "step": 5279 + }, + { + "epoch": 0.7357346896119278, + "grad_norm": 0.32568299770355225, + "learning_rate": 7.483930340407519e-06, + "loss": 0.08262443542480469, + "step": 5280 + }, + { + "epoch": 0.7358740333031422, + "grad_norm": 0.4447092115879059, + "learning_rate": 7.476578747260712e-06, + "loss": 0.08496320247650146, + "step": 5281 + }, + { + "epoch": 0.7360133769943565, + "grad_norm": 0.4531037509441376, + "learning_rate": 7.469229936532179e-06, + "loss": 0.09048843383789062, + "step": 5282 + }, + { + "epoch": 0.7361527206855709, + "grad_norm": 0.6756025552749634, + "learning_rate": 7.46188390985467e-06, + "loss": 0.11101865768432617, + "step": 5283 + }, + { + "epoch": 0.7362920643767853, + "grad_norm": 0.565449059009552, + "learning_rate": 7.454540668860309e-06, + "loss": 0.08837318420410156, + "step": 5284 + }, + { + "epoch": 0.7364314080679997, + "grad_norm": 0.4631728529930115, + "learning_rate": 7.4472002151805985e-06, + "loss": 0.08544921875, + "step": 5285 + }, + { + "epoch": 0.736570751759214, + "grad_norm": 0.39639756083488464, + "learning_rate": 7.4398625504464105e-06, + "loss": 0.08514595031738281, + "step": 5286 + }, + { + "epoch": 0.7367100954504285, + "grad_norm": 0.638043999671936, + "learning_rate": 7.432527676288015e-06, + "loss": 0.11663055419921875, + "step": 5287 + }, + { + "epoch": 0.7368494391416429, + "grad_norm": 0.5970667004585266, + "learning_rate": 7.425195594335053e-06, + "loss": 0.10042953491210938, + "step": 5288 + }, + { + "epoch": 0.7369887828328573, + "grad_norm": 0.2978490889072418, + "learning_rate": 7.417866306216532e-06, + "loss": 0.07342910766601562, + "step": 5289 + }, + { + "epoch": 0.7371281265240717, + "grad_norm": 0.3503442108631134, + "learning_rate": 7.4105398135608645e-06, + "loss": 0.07091140747070312, + "step": 5290 + }, + { + "epoch": 0.737267470215286, + "grad_norm": 0.9584911465644836, + "learning_rate": 7.403216117995835e-06, + "loss": 0.11144065856933594, + "step": 5291 + }, + { + "epoch": 0.7374068139065004, + "grad_norm": 0.37497708201408386, + "learning_rate": 7.395895221148594e-06, + "loss": 0.08430957794189453, + "step": 5292 + }, + { + "epoch": 0.7375461575977148, + "grad_norm": 0.5655354857444763, + "learning_rate": 7.388577124645671e-06, + "loss": 0.10549068450927734, + "step": 5293 + }, + { + "epoch": 0.7376855012889292, + "grad_norm": 0.4072572588920593, + "learning_rate": 7.381261830112989e-06, + "loss": 0.08145713806152344, + "step": 5294 + }, + { + "epoch": 0.7378248449801436, + "grad_norm": 0.6000311970710754, + "learning_rate": 7.373949339175843e-06, + "loss": 0.09606361389160156, + "step": 5295 + }, + { + "epoch": 0.7379641886713579, + "grad_norm": 0.5801194906234741, + "learning_rate": 7.366639653458889e-06, + "loss": 0.1055908203125, + "step": 5296 + }, + { + "epoch": 0.7381035323625723, + "grad_norm": 0.44591984152793884, + "learning_rate": 7.359332774586188e-06, + "loss": 0.08986091613769531, + "step": 5297 + }, + { + "epoch": 0.7382428760537867, + "grad_norm": 0.3283863961696625, + "learning_rate": 7.352028704181145e-06, + "loss": 0.082611083984375, + "step": 5298 + }, + { + "epoch": 0.7383822197450011, + "grad_norm": 0.5221490859985352, + "learning_rate": 7.344727443866573e-06, + "loss": 0.08512377738952637, + "step": 5299 + }, + { + "epoch": 0.7385215634362154, + "grad_norm": 0.6158285737037659, + "learning_rate": 7.3374289952646305e-06, + "loss": 0.09510231018066406, + "step": 5300 + }, + { + "epoch": 0.7386609071274298, + "grad_norm": 0.45601776242256165, + "learning_rate": 7.330133359996876e-06, + "loss": 0.10252952575683594, + "step": 5301 + }, + { + "epoch": 0.7388002508186442, + "grad_norm": 0.40676796436309814, + "learning_rate": 7.322840539684235e-06, + "loss": 0.10297012329101562, + "step": 5302 + }, + { + "epoch": 0.7389395945098586, + "grad_norm": 0.6170949339866638, + "learning_rate": 7.3155505359470046e-06, + "loss": 0.08812665939331055, + "step": 5303 + }, + { + "epoch": 0.739078938201073, + "grad_norm": 0.5237796902656555, + "learning_rate": 7.308263350404845e-06, + "loss": 0.09369850158691406, + "step": 5304 + }, + { + "epoch": 0.7392182818922873, + "grad_norm": 0.6146647334098816, + "learning_rate": 7.3009789846768116e-06, + "loss": 0.103546142578125, + "step": 5305 + }, + { + "epoch": 0.7393576255835017, + "grad_norm": 0.4040724039077759, + "learning_rate": 7.2936974403813336e-06, + "loss": 0.09817886352539062, + "step": 5306 + }, + { + "epoch": 0.7394969692747161, + "grad_norm": 0.4689130485057831, + "learning_rate": 7.286418719136186e-06, + "loss": 0.07189071178436279, + "step": 5307 + }, + { + "epoch": 0.7396363129659305, + "grad_norm": 0.419023722410202, + "learning_rate": 7.279142822558549e-06, + "loss": 0.07885932922363281, + "step": 5308 + }, + { + "epoch": 0.7397756566571448, + "grad_norm": 0.6023585200309753, + "learning_rate": 7.271869752264949e-06, + "loss": 0.09263229370117188, + "step": 5309 + }, + { + "epoch": 0.7399150003483592, + "grad_norm": 1.0242122411727905, + "learning_rate": 7.264599509871309e-06, + "loss": 0.11744499206542969, + "step": 5310 + }, + { + "epoch": 0.7400543440395736, + "grad_norm": 0.5348385572433472, + "learning_rate": 7.2573320969928974e-06, + "loss": 0.09058547019958496, + "step": 5311 + }, + { + "epoch": 0.740193687730788, + "grad_norm": 0.6076871752738953, + "learning_rate": 7.250067515244373e-06, + "loss": 0.08866119384765625, + "step": 5312 + }, + { + "epoch": 0.7403330314220024, + "grad_norm": 0.39442476630210876, + "learning_rate": 7.2428057662397665e-06, + "loss": 0.08121299743652344, + "step": 5313 + }, + { + "epoch": 0.7404723751132167, + "grad_norm": 0.5301928520202637, + "learning_rate": 7.235546851592468e-06, + "loss": 0.08365440368652344, + "step": 5314 + }, + { + "epoch": 0.7406117188044311, + "grad_norm": 0.597322940826416, + "learning_rate": 7.228290772915234e-06, + "loss": 0.101104736328125, + "step": 5315 + }, + { + "epoch": 0.7407510624956455, + "grad_norm": 0.24936163425445557, + "learning_rate": 7.22103753182021e-06, + "loss": 0.06414175033569336, + "step": 5316 + }, + { + "epoch": 0.7408904061868599, + "grad_norm": 0.4575473964214325, + "learning_rate": 7.213787129918901e-06, + "loss": 0.08383941650390625, + "step": 5317 + }, + { + "epoch": 0.7410297498780742, + "grad_norm": 0.38753676414489746, + "learning_rate": 7.206539568822179e-06, + "loss": 0.07906723022460938, + "step": 5318 + }, + { + "epoch": 0.7411690935692886, + "grad_norm": 0.4588843584060669, + "learning_rate": 7.199294850140279e-06, + "loss": 0.08118629455566406, + "step": 5319 + }, + { + "epoch": 0.741308437260503, + "grad_norm": 0.4906274676322937, + "learning_rate": 7.1920529754828235e-06, + "loss": 0.08405876159667969, + "step": 5320 + }, + { + "epoch": 0.7414477809517174, + "grad_norm": 0.6013595461845398, + "learning_rate": 7.184813946458782e-06, + "loss": 0.10378265380859375, + "step": 5321 + }, + { + "epoch": 0.7415871246429317, + "grad_norm": 0.37763848900794983, + "learning_rate": 7.177577764676504e-06, + "loss": 0.08127784729003906, + "step": 5322 + }, + { + "epoch": 0.7417264683341461, + "grad_norm": 1.0654324293136597, + "learning_rate": 7.170344431743707e-06, + "loss": 0.11577224731445312, + "step": 5323 + }, + { + "epoch": 0.7418658120253605, + "grad_norm": 0.49839162826538086, + "learning_rate": 7.163113949267484e-06, + "loss": 0.0998697280883789, + "step": 5324 + }, + { + "epoch": 0.7420051557165749, + "grad_norm": 0.5921831727027893, + "learning_rate": 7.155886318854257e-06, + "loss": 0.0980978012084961, + "step": 5325 + }, + { + "epoch": 0.7421444994077893, + "grad_norm": 0.50941401720047, + "learning_rate": 7.148661542109854e-06, + "loss": 0.08040332794189453, + "step": 5326 + }, + { + "epoch": 0.7422838430990036, + "grad_norm": 0.9845966100692749, + "learning_rate": 7.141439620639463e-06, + "loss": 0.10400390625, + "step": 5327 + }, + { + "epoch": 0.7424231867902181, + "grad_norm": 0.4053991138935089, + "learning_rate": 7.134220556047613e-06, + "loss": 0.08582305908203125, + "step": 5328 + }, + { + "epoch": 0.7425625304814325, + "grad_norm": 0.5667415261268616, + "learning_rate": 7.127004349938234e-06, + "loss": 0.09094047546386719, + "step": 5329 + }, + { + "epoch": 0.7427018741726469, + "grad_norm": 0.4396097958087921, + "learning_rate": 7.119791003914584e-06, + "loss": 0.08977127075195312, + "step": 5330 + }, + { + "epoch": 0.7428412178638613, + "grad_norm": 0.5698896050453186, + "learning_rate": 7.112580519579322e-06, + "loss": 0.09955787658691406, + "step": 5331 + }, + { + "epoch": 0.7429805615550756, + "grad_norm": 0.4492167830467224, + "learning_rate": 7.105372898534435e-06, + "loss": 0.09871864318847656, + "step": 5332 + }, + { + "epoch": 0.74311990524629, + "grad_norm": 0.4554264545440674, + "learning_rate": 7.098168142381301e-06, + "loss": 0.09199142456054688, + "step": 5333 + }, + { + "epoch": 0.7432592489375044, + "grad_norm": 0.3839818239212036, + "learning_rate": 7.090966252720659e-06, + "loss": 0.07531166076660156, + "step": 5334 + }, + { + "epoch": 0.7433985926287188, + "grad_norm": 0.6205188035964966, + "learning_rate": 7.083767231152598e-06, + "loss": 0.09674263000488281, + "step": 5335 + }, + { + "epoch": 0.7435379363199331, + "grad_norm": 0.634882926940918, + "learning_rate": 7.076571079276569e-06, + "loss": 0.10054397583007812, + "step": 5336 + }, + { + "epoch": 0.7436772800111475, + "grad_norm": 1.0610545873641968, + "learning_rate": 7.069377798691397e-06, + "loss": 0.09851217269897461, + "step": 5337 + }, + { + "epoch": 0.7438166237023619, + "grad_norm": 0.749803900718689, + "learning_rate": 7.0621873909952765e-06, + "loss": 0.09797477722167969, + "step": 5338 + }, + { + "epoch": 0.7439559673935763, + "grad_norm": 0.6677827835083008, + "learning_rate": 7.05499985778574e-06, + "loss": 0.10090255737304688, + "step": 5339 + }, + { + "epoch": 0.7440953110847907, + "grad_norm": 0.3929935097694397, + "learning_rate": 7.047815200659691e-06, + "loss": 0.08609962463378906, + "step": 5340 + }, + { + "epoch": 0.744234654776005, + "grad_norm": 0.49244508147239685, + "learning_rate": 7.040633421213401e-06, + "loss": 0.0965433120727539, + "step": 5341 + }, + { + "epoch": 0.7443739984672194, + "grad_norm": 0.41566482186317444, + "learning_rate": 7.033454521042502e-06, + "loss": 0.08229446411132812, + "step": 5342 + }, + { + "epoch": 0.7445133421584338, + "grad_norm": 0.4432116448879242, + "learning_rate": 7.026278501741972e-06, + "loss": 0.08407020568847656, + "step": 5343 + }, + { + "epoch": 0.7446526858496482, + "grad_norm": 0.572174608707428, + "learning_rate": 7.019105364906165e-06, + "loss": 0.09876632690429688, + "step": 5344 + }, + { + "epoch": 0.7447920295408625, + "grad_norm": 0.6263193488121033, + "learning_rate": 7.011935112128791e-06, + "loss": 0.10939598083496094, + "step": 5345 + }, + { + "epoch": 0.7449313732320769, + "grad_norm": 0.33421850204467773, + "learning_rate": 7.004767745002916e-06, + "loss": 0.07464981079101562, + "step": 5346 + }, + { + "epoch": 0.7450707169232913, + "grad_norm": 0.6633138656616211, + "learning_rate": 6.997603265120951e-06, + "loss": 0.09494972229003906, + "step": 5347 + }, + { + "epoch": 0.7452100606145057, + "grad_norm": 0.7169560194015503, + "learning_rate": 6.990441674074695e-06, + "loss": 0.10303878784179688, + "step": 5348 + }, + { + "epoch": 0.74534940430572, + "grad_norm": 0.9497549533843994, + "learning_rate": 6.98328297345529e-06, + "loss": 0.09436416625976562, + "step": 5349 + }, + { + "epoch": 0.7454887479969344, + "grad_norm": 0.4263569414615631, + "learning_rate": 6.9761271648532306e-06, + "loss": 0.09514760971069336, + "step": 5350 + }, + { + "epoch": 0.7456280916881488, + "grad_norm": 0.4876782298088074, + "learning_rate": 6.968974249858371e-06, + "loss": 0.0860757827758789, + "step": 5351 + }, + { + "epoch": 0.7457674353793632, + "grad_norm": 0.3703700304031372, + "learning_rate": 6.9618242300599284e-06, + "loss": 0.08926963806152344, + "step": 5352 + }, + { + "epoch": 0.7459067790705776, + "grad_norm": 0.4060896635055542, + "learning_rate": 6.9546771070464815e-06, + "loss": 0.07756996154785156, + "step": 5353 + }, + { + "epoch": 0.7460461227617919, + "grad_norm": 0.5362085700035095, + "learning_rate": 6.947532882405945e-06, + "loss": 0.09951591491699219, + "step": 5354 + }, + { + "epoch": 0.7461854664530063, + "grad_norm": 0.41025054454803467, + "learning_rate": 6.940391557725616e-06, + "loss": 0.086090087890625, + "step": 5355 + }, + { + "epoch": 0.7463248101442207, + "grad_norm": 0.4145530164241791, + "learning_rate": 6.933253134592128e-06, + "loss": 0.08275794982910156, + "step": 5356 + }, + { + "epoch": 0.7464641538354351, + "grad_norm": 0.40313273668289185, + "learning_rate": 6.9261176145914655e-06, + "loss": 0.07847023010253906, + "step": 5357 + }, + { + "epoch": 0.7466034975266495, + "grad_norm": 0.4994713068008423, + "learning_rate": 6.9189849993089905e-06, + "loss": 0.08940696716308594, + "step": 5358 + }, + { + "epoch": 0.7467428412178638, + "grad_norm": 0.4583868682384491, + "learning_rate": 6.911855290329408e-06, + "loss": 0.09763717651367188, + "step": 5359 + }, + { + "epoch": 0.7468821849090782, + "grad_norm": 0.6026089787483215, + "learning_rate": 6.904728489236767e-06, + "loss": 0.09132194519042969, + "step": 5360 + }, + { + "epoch": 0.7470215286002926, + "grad_norm": 0.4505079686641693, + "learning_rate": 6.897604597614491e-06, + "loss": 0.10080337524414062, + "step": 5361 + }, + { + "epoch": 0.747160872291507, + "grad_norm": 0.46419063210487366, + "learning_rate": 6.890483617045336e-06, + "loss": 0.09720039367675781, + "step": 5362 + }, + { + "epoch": 0.7473002159827213, + "grad_norm": 0.4745637774467468, + "learning_rate": 6.883365549111432e-06, + "loss": 0.09343147277832031, + "step": 5363 + }, + { + "epoch": 0.7474395596739357, + "grad_norm": 0.3138201832771301, + "learning_rate": 6.876250395394237e-06, + "loss": 0.07505416870117188, + "step": 5364 + }, + { + "epoch": 0.7475789033651501, + "grad_norm": 0.6726796627044678, + "learning_rate": 6.869138157474586e-06, + "loss": 0.11291885375976562, + "step": 5365 + }, + { + "epoch": 0.7477182470563645, + "grad_norm": 0.438900351524353, + "learning_rate": 6.862028836932659e-06, + "loss": 0.08656501770019531, + "step": 5366 + }, + { + "epoch": 0.7478575907475788, + "grad_norm": 0.4056769013404846, + "learning_rate": 6.854922435347979e-06, + "loss": 0.093597412109375, + "step": 5367 + }, + { + "epoch": 0.7479969344387933, + "grad_norm": 0.5056219696998596, + "learning_rate": 6.847818954299421e-06, + "loss": 0.11152076721191406, + "step": 5368 + }, + { + "epoch": 0.7481362781300077, + "grad_norm": 0.8636768460273743, + "learning_rate": 6.840718395365222e-06, + "loss": 0.10748291015625, + "step": 5369 + }, + { + "epoch": 0.7482756218212221, + "grad_norm": 0.3618764877319336, + "learning_rate": 6.833620760122972e-06, + "loss": 0.07472658157348633, + "step": 5370 + }, + { + "epoch": 0.7484149655124365, + "grad_norm": 0.45654046535491943, + "learning_rate": 6.826526050149594e-06, + "loss": 0.1010894775390625, + "step": 5371 + }, + { + "epoch": 0.7485543092036508, + "grad_norm": 0.4978749454021454, + "learning_rate": 6.819434267021366e-06, + "loss": 0.07986736297607422, + "step": 5372 + }, + { + "epoch": 0.7486936528948652, + "grad_norm": 0.884177565574646, + "learning_rate": 6.812345412313926e-06, + "loss": 0.10500526428222656, + "step": 5373 + }, + { + "epoch": 0.7488329965860796, + "grad_norm": 0.46364256739616394, + "learning_rate": 6.805259487602261e-06, + "loss": 0.07954025268554688, + "step": 5374 + }, + { + "epoch": 0.748972340277294, + "grad_norm": 0.39954742789268494, + "learning_rate": 6.798176494460693e-06, + "loss": 0.07970046997070312, + "step": 5375 + }, + { + "epoch": 0.7491116839685084, + "grad_norm": 0.5610759854316711, + "learning_rate": 6.791096434462909e-06, + "loss": 0.09056663513183594, + "step": 5376 + }, + { + "epoch": 0.7492510276597227, + "grad_norm": 0.2799004316329956, + "learning_rate": 6.7840193091819285e-06, + "loss": 0.06889724731445312, + "step": 5377 + }, + { + "epoch": 0.7493903713509371, + "grad_norm": 0.5792919397354126, + "learning_rate": 6.776945120190137e-06, + "loss": 0.0978240966796875, + "step": 5378 + }, + { + "epoch": 0.7495297150421515, + "grad_norm": 0.3681413531303406, + "learning_rate": 6.769873869059247e-06, + "loss": 0.07661056518554688, + "step": 5379 + }, + { + "epoch": 0.7496690587333659, + "grad_norm": 0.5948193073272705, + "learning_rate": 6.762805557360335e-06, + "loss": 0.09759044647216797, + "step": 5380 + }, + { + "epoch": 0.7498084024245802, + "grad_norm": 0.3456171751022339, + "learning_rate": 6.755740186663822e-06, + "loss": 0.07912254333496094, + "step": 5381 + }, + { + "epoch": 0.7499477461157946, + "grad_norm": 0.7422981262207031, + "learning_rate": 6.748677758539468e-06, + "loss": 0.08520793914794922, + "step": 5382 + }, + { + "epoch": 0.750087089807009, + "grad_norm": 0.551237165927887, + "learning_rate": 6.741618274556379e-06, + "loss": 0.10649299621582031, + "step": 5383 + }, + { + "epoch": 0.7502264334982234, + "grad_norm": 0.846246063709259, + "learning_rate": 6.734561736283014e-06, + "loss": 0.09929275512695312, + "step": 5384 + }, + { + "epoch": 0.7503657771894378, + "grad_norm": 0.38197061419487, + "learning_rate": 6.727508145287183e-06, + "loss": 0.08131074905395508, + "step": 5385 + }, + { + "epoch": 0.7505051208806521, + "grad_norm": 0.5145688652992249, + "learning_rate": 6.720457503136017e-06, + "loss": 0.08530139923095703, + "step": 5386 + }, + { + "epoch": 0.7506444645718665, + "grad_norm": 0.30903929471969604, + "learning_rate": 6.713409811396028e-06, + "loss": 0.06810569763183594, + "step": 5387 + }, + { + "epoch": 0.7507838082630809, + "grad_norm": 0.5115802884101868, + "learning_rate": 6.706365071633037e-06, + "loss": 0.09627723693847656, + "step": 5388 + }, + { + "epoch": 0.7509231519542953, + "grad_norm": 0.6169834733009338, + "learning_rate": 6.699323285412222e-06, + "loss": 0.10042738914489746, + "step": 5389 + }, + { + "epoch": 0.7510624956455096, + "grad_norm": 0.3932315409183502, + "learning_rate": 6.692284454298115e-06, + "loss": 0.08554458618164062, + "step": 5390 + }, + { + "epoch": 0.751201839336724, + "grad_norm": 0.39151257276535034, + "learning_rate": 6.685248579854589e-06, + "loss": 0.0811309814453125, + "step": 5391 + }, + { + "epoch": 0.7513411830279384, + "grad_norm": 1.05046808719635, + "learning_rate": 6.678215663644845e-06, + "loss": 0.13650131225585938, + "step": 5392 + }, + { + "epoch": 0.7514805267191528, + "grad_norm": 0.5210810303688049, + "learning_rate": 6.671185707231434e-06, + "loss": 0.0807332992553711, + "step": 5393 + }, + { + "epoch": 0.7516198704103672, + "grad_norm": 0.33675646781921387, + "learning_rate": 6.664158712176256e-06, + "loss": 0.08426475524902344, + "step": 5394 + }, + { + "epoch": 0.7517592141015815, + "grad_norm": 0.3840154707431793, + "learning_rate": 6.657134680040558e-06, + "loss": 0.08184432983398438, + "step": 5395 + }, + { + "epoch": 0.7518985577927959, + "grad_norm": 0.4494657516479492, + "learning_rate": 6.650113612384903e-06, + "loss": 0.08826828002929688, + "step": 5396 + }, + { + "epoch": 0.7520379014840103, + "grad_norm": 0.48738086223602295, + "learning_rate": 6.643095510769229e-06, + "loss": 0.0983428955078125, + "step": 5397 + }, + { + "epoch": 0.7521772451752247, + "grad_norm": 0.7045894265174866, + "learning_rate": 6.63608037675278e-06, + "loss": 0.10644912719726562, + "step": 5398 + }, + { + "epoch": 0.752316588866439, + "grad_norm": 0.48888760805130005, + "learning_rate": 6.629068211894176e-06, + "loss": 0.09390830993652344, + "step": 5399 + }, + { + "epoch": 0.7524559325576534, + "grad_norm": 0.4666992723941803, + "learning_rate": 6.622059017751346e-06, + "loss": 0.09128379821777344, + "step": 5400 + }, + { + "epoch": 0.7525952762488678, + "grad_norm": 0.5271023511886597, + "learning_rate": 6.615052795881576e-06, + "loss": 0.08782577514648438, + "step": 5401 + }, + { + "epoch": 0.7527346199400822, + "grad_norm": 0.5505194664001465, + "learning_rate": 6.6080495478415e-06, + "loss": 0.10228919982910156, + "step": 5402 + }, + { + "epoch": 0.7528739636312965, + "grad_norm": 0.3418956696987152, + "learning_rate": 6.60104927518707e-06, + "loss": 0.07205677032470703, + "step": 5403 + }, + { + "epoch": 0.7530133073225109, + "grad_norm": 0.48155125975608826, + "learning_rate": 6.594051979473582e-06, + "loss": 0.09124088287353516, + "step": 5404 + }, + { + "epoch": 0.7531526510137253, + "grad_norm": 0.39395052194595337, + "learning_rate": 6.58705766225568e-06, + "loss": 0.0864410400390625, + "step": 5405 + }, + { + "epoch": 0.7532919947049397, + "grad_norm": 0.45500844717025757, + "learning_rate": 6.580066325087351e-06, + "loss": 0.09110832214355469, + "step": 5406 + }, + { + "epoch": 0.7534313383961541, + "grad_norm": 0.3909684419631958, + "learning_rate": 6.573077969521892e-06, + "loss": 0.08516883850097656, + "step": 5407 + }, + { + "epoch": 0.7535706820873685, + "grad_norm": 0.29171431064605713, + "learning_rate": 6.566092597111977e-06, + "loss": 0.07258892059326172, + "step": 5408 + }, + { + "epoch": 0.7537100257785829, + "grad_norm": 0.4389742612838745, + "learning_rate": 6.559110209409578e-06, + "loss": 0.0920095443725586, + "step": 5409 + }, + { + "epoch": 0.7538493694697973, + "grad_norm": 0.5246798992156982, + "learning_rate": 6.552130807966035e-06, + "loss": 0.10103416442871094, + "step": 5410 + }, + { + "epoch": 0.7539887131610117, + "grad_norm": 0.6327086091041565, + "learning_rate": 6.5451543943320005e-06, + "loss": 0.09800338745117188, + "step": 5411 + }, + { + "epoch": 0.7541280568522261, + "grad_norm": 0.3251563012599945, + "learning_rate": 6.538180970057482e-06, + "loss": 0.06887626647949219, + "step": 5412 + }, + { + "epoch": 0.7542674005434404, + "grad_norm": 0.5756494402885437, + "learning_rate": 6.531210536691819e-06, + "loss": 0.09775733947753906, + "step": 5413 + }, + { + "epoch": 0.7544067442346548, + "grad_norm": 0.6717303991317749, + "learning_rate": 6.524243095783675e-06, + "loss": 0.10047340393066406, + "step": 5414 + }, + { + "epoch": 0.7545460879258692, + "grad_norm": 0.7060152888298035, + "learning_rate": 6.517278648881054e-06, + "loss": 0.1072845458984375, + "step": 5415 + }, + { + "epoch": 0.7546854316170836, + "grad_norm": 0.501920223236084, + "learning_rate": 6.5103171975312995e-06, + "loss": 0.106292724609375, + "step": 5416 + }, + { + "epoch": 0.754824775308298, + "grad_norm": 0.44024017453193665, + "learning_rate": 6.503358743281098e-06, + "loss": 0.08368968963623047, + "step": 5417 + }, + { + "epoch": 0.7549641189995123, + "grad_norm": 0.5017408132553101, + "learning_rate": 6.496403287676443e-06, + "loss": 0.07895660400390625, + "step": 5418 + }, + { + "epoch": 0.7551034626907267, + "grad_norm": 0.3875166177749634, + "learning_rate": 6.489450832262692e-06, + "loss": 0.08266067504882812, + "step": 5419 + }, + { + "epoch": 0.7552428063819411, + "grad_norm": 0.4901347756385803, + "learning_rate": 6.482501378584511e-06, + "loss": 0.09301280975341797, + "step": 5420 + }, + { + "epoch": 0.7553821500731555, + "grad_norm": 0.43248075246810913, + "learning_rate": 6.475554928185912e-06, + "loss": 0.07671928405761719, + "step": 5421 + }, + { + "epoch": 0.7555214937643698, + "grad_norm": 0.7283135652542114, + "learning_rate": 6.468611482610238e-06, + "loss": 0.09989166259765625, + "step": 5422 + }, + { + "epoch": 0.7556608374555842, + "grad_norm": 0.5689669251441956, + "learning_rate": 6.461671043400166e-06, + "loss": 0.08815574645996094, + "step": 5423 + }, + { + "epoch": 0.7558001811467986, + "grad_norm": 0.5102803707122803, + "learning_rate": 6.454733612097717e-06, + "loss": 0.10833358764648438, + "step": 5424 + }, + { + "epoch": 0.755939524838013, + "grad_norm": 0.4267197847366333, + "learning_rate": 6.4477991902442e-06, + "loss": 0.08136367797851562, + "step": 5425 + }, + { + "epoch": 0.7560788685292273, + "grad_norm": 0.4803551733493805, + "learning_rate": 6.440867779380302e-06, + "loss": 0.09821510314941406, + "step": 5426 + }, + { + "epoch": 0.7562182122204417, + "grad_norm": 0.3570232689380646, + "learning_rate": 6.43393938104603e-06, + "loss": 0.07237482070922852, + "step": 5427 + }, + { + "epoch": 0.7563575559116561, + "grad_norm": 0.40168359875679016, + "learning_rate": 6.427013996780702e-06, + "loss": 0.09006881713867188, + "step": 5428 + }, + { + "epoch": 0.7564968996028705, + "grad_norm": 0.4151839315891266, + "learning_rate": 6.420091628122995e-06, + "loss": 0.08557891845703125, + "step": 5429 + }, + { + "epoch": 0.7566362432940849, + "grad_norm": 0.41898179054260254, + "learning_rate": 6.413172276610886e-06, + "loss": 0.07353401184082031, + "step": 5430 + }, + { + "epoch": 0.7567755869852992, + "grad_norm": 0.5728625059127808, + "learning_rate": 6.406255943781711e-06, + "loss": 0.09370040893554688, + "step": 5431 + }, + { + "epoch": 0.7569149306765136, + "grad_norm": 0.6728006601333618, + "learning_rate": 6.3993426311721095e-06, + "loss": 0.09961318969726562, + "step": 5432 + }, + { + "epoch": 0.757054274367728, + "grad_norm": 0.8022339940071106, + "learning_rate": 6.3924323403180685e-06, + "loss": 0.11972475051879883, + "step": 5433 + }, + { + "epoch": 0.7571936180589424, + "grad_norm": 0.46424534916877747, + "learning_rate": 6.385525072754899e-06, + "loss": 0.08129739761352539, + "step": 5434 + }, + { + "epoch": 0.7573329617501567, + "grad_norm": 0.4555242359638214, + "learning_rate": 6.378620830017237e-06, + "loss": 0.09407424926757812, + "step": 5435 + }, + { + "epoch": 0.7574723054413711, + "grad_norm": 0.528745710849762, + "learning_rate": 6.371719613639036e-06, + "loss": 0.09980535507202148, + "step": 5436 + }, + { + "epoch": 0.7576116491325855, + "grad_norm": 0.5325195789337158, + "learning_rate": 6.3648214251536e-06, + "loss": 0.08066558837890625, + "step": 5437 + }, + { + "epoch": 0.7577509928237999, + "grad_norm": 0.5775245428085327, + "learning_rate": 6.357926266093552e-06, + "loss": 0.0955352783203125, + "step": 5438 + }, + { + "epoch": 0.7578903365150143, + "grad_norm": 0.424261212348938, + "learning_rate": 6.351034137990828e-06, + "loss": 0.0825052261352539, + "step": 5439 + }, + { + "epoch": 0.7580296802062286, + "grad_norm": 0.5613502264022827, + "learning_rate": 6.344145042376715e-06, + "loss": 0.07566690444946289, + "step": 5440 + }, + { + "epoch": 0.758169023897443, + "grad_norm": 0.5466670393943787, + "learning_rate": 6.337258980781797e-06, + "loss": 0.09519195556640625, + "step": 5441 + }, + { + "epoch": 0.7583083675886574, + "grad_norm": 0.5307197570800781, + "learning_rate": 6.330375954736014e-06, + "loss": 0.11113834381103516, + "step": 5442 + }, + { + "epoch": 0.7584477112798718, + "grad_norm": 0.6728624105453491, + "learning_rate": 6.323495965768605e-06, + "loss": 0.10577011108398438, + "step": 5443 + }, + { + "epoch": 0.7585870549710861, + "grad_norm": 0.36222103238105774, + "learning_rate": 6.3166190154081545e-06, + "loss": 0.07632827758789062, + "step": 5444 + }, + { + "epoch": 0.7587263986623005, + "grad_norm": 0.5127343535423279, + "learning_rate": 6.309745105182566e-06, + "loss": 0.07722091674804688, + "step": 5445 + }, + { + "epoch": 0.7588657423535149, + "grad_norm": 0.426237016916275, + "learning_rate": 6.302874236619061e-06, + "loss": 0.08683300018310547, + "step": 5446 + }, + { + "epoch": 0.7590050860447293, + "grad_norm": 0.3579869568347931, + "learning_rate": 6.296006411244184e-06, + "loss": 0.07526588439941406, + "step": 5447 + }, + { + "epoch": 0.7591444297359438, + "grad_norm": 0.5123420357704163, + "learning_rate": 6.289141630583815e-06, + "loss": 0.08375358581542969, + "step": 5448 + }, + { + "epoch": 0.7592837734271581, + "grad_norm": 1.0531171560287476, + "learning_rate": 6.28227989616316e-06, + "loss": 0.10148811340332031, + "step": 5449 + }, + { + "epoch": 0.7594231171183725, + "grad_norm": 0.556954562664032, + "learning_rate": 6.275421209506729e-06, + "loss": 0.10597801208496094, + "step": 5450 + }, + { + "epoch": 0.7595624608095869, + "grad_norm": 0.5750244855880737, + "learning_rate": 6.268565572138361e-06, + "loss": 0.08603477478027344, + "step": 5451 + }, + { + "epoch": 0.7597018045008013, + "grad_norm": 0.4850010573863983, + "learning_rate": 6.2617129855812295e-06, + "loss": 0.09824943542480469, + "step": 5452 + }, + { + "epoch": 0.7598411481920156, + "grad_norm": 0.7605985403060913, + "learning_rate": 6.254863451357829e-06, + "loss": 0.0960693359375, + "step": 5453 + }, + { + "epoch": 0.75998049188323, + "grad_norm": 0.47630807757377625, + "learning_rate": 6.248016970989957e-06, + "loss": 0.08648109436035156, + "step": 5454 + }, + { + "epoch": 0.7601198355744444, + "grad_norm": 0.6079874038696289, + "learning_rate": 6.241173545998749e-06, + "loss": 0.09064865112304688, + "step": 5455 + }, + { + "epoch": 0.7602591792656588, + "grad_norm": 1.076406717300415, + "learning_rate": 6.2343331779046745e-06, + "loss": 0.11617279052734375, + "step": 5456 + }, + { + "epoch": 0.7603985229568732, + "grad_norm": 0.42721027135849, + "learning_rate": 6.227495868227477e-06, + "loss": 0.10055732727050781, + "step": 5457 + }, + { + "epoch": 0.7605378666480875, + "grad_norm": 0.39925241470336914, + "learning_rate": 6.220661618486268e-06, + "loss": 0.07300376892089844, + "step": 5458 + }, + { + "epoch": 0.7606772103393019, + "grad_norm": 0.4555540680885315, + "learning_rate": 6.2138304301994654e-06, + "loss": 0.08638572692871094, + "step": 5459 + }, + { + "epoch": 0.7608165540305163, + "grad_norm": 0.3080964982509613, + "learning_rate": 6.207002304884793e-06, + "loss": 0.07509040832519531, + "step": 5460 + }, + { + "epoch": 0.7609558977217307, + "grad_norm": 0.3623853921890259, + "learning_rate": 6.200177244059313e-06, + "loss": 0.08197784423828125, + "step": 5461 + }, + { + "epoch": 0.761095241412945, + "grad_norm": 0.6817664504051208, + "learning_rate": 6.19335524923939e-06, + "loss": 0.12142372131347656, + "step": 5462 + }, + { + "epoch": 0.7612345851041594, + "grad_norm": 0.43223118782043457, + "learning_rate": 6.186536321940724e-06, + "loss": 0.0708160400390625, + "step": 5463 + }, + { + "epoch": 0.7613739287953738, + "grad_norm": 0.4773719310760498, + "learning_rate": 6.179720463678314e-06, + "loss": 0.09674835205078125, + "step": 5464 + }, + { + "epoch": 0.7615132724865882, + "grad_norm": 0.4745444357395172, + "learning_rate": 6.172907675966495e-06, + "loss": 0.07425594329833984, + "step": 5465 + }, + { + "epoch": 0.7616526161778026, + "grad_norm": 0.38998255133628845, + "learning_rate": 6.16609796031892e-06, + "loss": 0.07412147521972656, + "step": 5466 + }, + { + "epoch": 0.7617919598690169, + "grad_norm": 0.40828582644462585, + "learning_rate": 6.159291318248544e-06, + "loss": 0.07991600036621094, + "step": 5467 + }, + { + "epoch": 0.7619313035602313, + "grad_norm": 0.40047287940979004, + "learning_rate": 6.152487751267641e-06, + "loss": 0.07864093780517578, + "step": 5468 + }, + { + "epoch": 0.7620706472514457, + "grad_norm": 0.7419723868370056, + "learning_rate": 6.145687260887818e-06, + "loss": 0.11982345581054688, + "step": 5469 + }, + { + "epoch": 0.7622099909426601, + "grad_norm": 0.3451470732688904, + "learning_rate": 6.138889848619991e-06, + "loss": 0.06662750244140625, + "step": 5470 + }, + { + "epoch": 0.7623493346338744, + "grad_norm": 0.39911672472953796, + "learning_rate": 6.132095515974385e-06, + "loss": 0.092620849609375, + "step": 5471 + }, + { + "epoch": 0.7624886783250888, + "grad_norm": 0.4567844569683075, + "learning_rate": 6.125304264460541e-06, + "loss": 0.0852813720703125, + "step": 5472 + }, + { + "epoch": 0.7626280220163032, + "grad_norm": 0.5701547265052795, + "learning_rate": 6.118516095587321e-06, + "loss": 0.08917808532714844, + "step": 5473 + }, + { + "epoch": 0.7627673657075176, + "grad_norm": 0.4336518943309784, + "learning_rate": 6.1117310108629156e-06, + "loss": 0.08554935455322266, + "step": 5474 + }, + { + "epoch": 0.762906709398732, + "grad_norm": 0.3876418173313141, + "learning_rate": 6.104949011794796e-06, + "loss": 0.07991933822631836, + "step": 5475 + }, + { + "epoch": 0.7630460530899463, + "grad_norm": 0.47657862305641174, + "learning_rate": 6.098170099889777e-06, + "loss": 0.10055732727050781, + "step": 5476 + }, + { + "epoch": 0.7631853967811607, + "grad_norm": 0.33114683628082275, + "learning_rate": 6.0913942766539855e-06, + "loss": 0.09089851379394531, + "step": 5477 + }, + { + "epoch": 0.7633247404723751, + "grad_norm": 0.4827987253665924, + "learning_rate": 6.084621543592846e-06, + "loss": 0.08465385437011719, + "step": 5478 + }, + { + "epoch": 0.7634640841635895, + "grad_norm": 0.5515408515930176, + "learning_rate": 6.0778519022110985e-06, + "loss": 0.09101486206054688, + "step": 5479 + }, + { + "epoch": 0.7636034278548038, + "grad_norm": 0.5176904201507568, + "learning_rate": 6.071085354012812e-06, + "loss": 0.09743690490722656, + "step": 5480 + }, + { + "epoch": 0.7637427715460182, + "grad_norm": 0.4569050967693329, + "learning_rate": 6.064321900501362e-06, + "loss": 0.09124374389648438, + "step": 5481 + }, + { + "epoch": 0.7638821152372326, + "grad_norm": 0.3964097201824188, + "learning_rate": 6.057561543179429e-06, + "loss": 0.08055496215820312, + "step": 5482 + }, + { + "epoch": 0.764021458928447, + "grad_norm": 0.4325997531414032, + "learning_rate": 6.050804283549005e-06, + "loss": 0.09075355529785156, + "step": 5483 + }, + { + "epoch": 0.7641608026196614, + "grad_norm": 0.39932724833488464, + "learning_rate": 6.0440501231114025e-06, + "loss": 0.0886392593383789, + "step": 5484 + }, + { + "epoch": 0.7643001463108757, + "grad_norm": 0.4766104519367218, + "learning_rate": 6.03729906336725e-06, + "loss": 0.0915369987487793, + "step": 5485 + }, + { + "epoch": 0.7644394900020901, + "grad_norm": 0.4621908366680145, + "learning_rate": 6.030551105816465e-06, + "loss": 0.09322547912597656, + "step": 5486 + }, + { + "epoch": 0.7645788336933045, + "grad_norm": 0.6088845729827881, + "learning_rate": 6.0238062519583015e-06, + "loss": 0.10271549224853516, + "step": 5487 + }, + { + "epoch": 0.764718177384519, + "grad_norm": 0.3842020034790039, + "learning_rate": 6.017064503291307e-06, + "loss": 0.07779979705810547, + "step": 5488 + }, + { + "epoch": 0.7648575210757333, + "grad_norm": 0.5344517827033997, + "learning_rate": 6.01032586131334e-06, + "loss": 0.07506561279296875, + "step": 5489 + }, + { + "epoch": 0.7649968647669477, + "grad_norm": 0.5196774005889893, + "learning_rate": 6.0035903275215755e-06, + "loss": 0.09592437744140625, + "step": 5490 + }, + { + "epoch": 0.7651362084581621, + "grad_norm": 0.532392144203186, + "learning_rate": 5.996857903412503e-06, + "loss": 0.08196258544921875, + "step": 5491 + }, + { + "epoch": 0.7652755521493765, + "grad_norm": 0.4662247896194458, + "learning_rate": 5.990128590481907e-06, + "loss": 0.08536338806152344, + "step": 5492 + }, + { + "epoch": 0.7654148958405909, + "grad_norm": 0.3859352767467499, + "learning_rate": 5.983402390224882e-06, + "loss": 0.07709312438964844, + "step": 5493 + }, + { + "epoch": 0.7655542395318052, + "grad_norm": 0.40545904636383057, + "learning_rate": 5.97667930413584e-06, + "loss": 0.08866119384765625, + "step": 5494 + }, + { + "epoch": 0.7656935832230196, + "grad_norm": 0.3700225353240967, + "learning_rate": 5.969959333708506e-06, + "loss": 0.07640838623046875, + "step": 5495 + }, + { + "epoch": 0.765832926914234, + "grad_norm": 0.6198359727859497, + "learning_rate": 5.963242480435889e-06, + "loss": 0.08697319030761719, + "step": 5496 + }, + { + "epoch": 0.7659722706054484, + "grad_norm": 0.3982057571411133, + "learning_rate": 5.95652874581033e-06, + "loss": 0.08537483215332031, + "step": 5497 + }, + { + "epoch": 0.7661116142966627, + "grad_norm": 0.4954818785190582, + "learning_rate": 5.9498181313234726e-06, + "loss": 0.08593368530273438, + "step": 5498 + }, + { + "epoch": 0.7662509579878771, + "grad_norm": 0.4991724193096161, + "learning_rate": 5.943110638466254e-06, + "loss": 0.08586883544921875, + "step": 5499 + }, + { + "epoch": 0.7663903016790915, + "grad_norm": 0.3525455892086029, + "learning_rate": 5.9364062687289226e-06, + "loss": 0.07288169860839844, + "step": 5500 + }, + { + "epoch": 0.7665296453703059, + "grad_norm": 0.38117286562919617, + "learning_rate": 5.929705023601038e-06, + "loss": 0.08379554748535156, + "step": 5501 + }, + { + "epoch": 0.7666689890615203, + "grad_norm": 0.5630479454994202, + "learning_rate": 5.923006904571476e-06, + "loss": 0.11128425598144531, + "step": 5502 + }, + { + "epoch": 0.7668083327527346, + "grad_norm": 0.2950746417045593, + "learning_rate": 5.9163119131283966e-06, + "loss": 0.06830787658691406, + "step": 5503 + }, + { + "epoch": 0.766947676443949, + "grad_norm": 0.5705780982971191, + "learning_rate": 5.909620050759266e-06, + "loss": 0.10599827766418457, + "step": 5504 + }, + { + "epoch": 0.7670870201351634, + "grad_norm": 0.4322032928466797, + "learning_rate": 5.902931318950871e-06, + "loss": 0.08868598937988281, + "step": 5505 + }, + { + "epoch": 0.7672263638263778, + "grad_norm": 0.3706376254558563, + "learning_rate": 5.896245719189304e-06, + "loss": 0.0884695053100586, + "step": 5506 + }, + { + "epoch": 0.7673657075175921, + "grad_norm": 0.5888532996177673, + "learning_rate": 5.889563252959935e-06, + "loss": 0.08802223205566406, + "step": 5507 + }, + { + "epoch": 0.7675050512088065, + "grad_norm": 0.4594912528991699, + "learning_rate": 5.882883921747473e-06, + "loss": 0.08114433288574219, + "step": 5508 + }, + { + "epoch": 0.7676443949000209, + "grad_norm": 0.33626264333724976, + "learning_rate": 5.876207727035897e-06, + "loss": 0.06605005264282227, + "step": 5509 + }, + { + "epoch": 0.7677837385912353, + "grad_norm": 0.37938350439071655, + "learning_rate": 5.869534670308519e-06, + "loss": 0.08378410339355469, + "step": 5510 + }, + { + "epoch": 0.7679230822824497, + "grad_norm": 0.33104798197746277, + "learning_rate": 5.862864753047926e-06, + "loss": 0.07415294647216797, + "step": 5511 + }, + { + "epoch": 0.768062425973664, + "grad_norm": 0.4331651031970978, + "learning_rate": 5.856197976736029e-06, + "loss": 0.09350299835205078, + "step": 5512 + }, + { + "epoch": 0.7682017696648784, + "grad_norm": 0.5395761728286743, + "learning_rate": 5.8495343428540395e-06, + "loss": 0.09454917907714844, + "step": 5513 + }, + { + "epoch": 0.7683411133560928, + "grad_norm": 0.5424758791923523, + "learning_rate": 5.84287385288246e-06, + "loss": 0.07105875015258789, + "step": 5514 + }, + { + "epoch": 0.7684804570473072, + "grad_norm": 0.41007888317108154, + "learning_rate": 5.836216508301089e-06, + "loss": 0.08751296997070312, + "step": 5515 + }, + { + "epoch": 0.7686198007385215, + "grad_norm": 0.3627960979938507, + "learning_rate": 5.829562310589048e-06, + "loss": 0.08168983459472656, + "step": 5516 + }, + { + "epoch": 0.7687591444297359, + "grad_norm": 0.3343975841999054, + "learning_rate": 5.822911261224748e-06, + "loss": 0.08406352996826172, + "step": 5517 + }, + { + "epoch": 0.7688984881209503, + "grad_norm": 0.6167939305305481, + "learning_rate": 5.816263361685892e-06, + "loss": 0.0852346420288086, + "step": 5518 + }, + { + "epoch": 0.7690378318121647, + "grad_norm": 0.4112289249897003, + "learning_rate": 5.809618613449503e-06, + "loss": 0.07661819458007812, + "step": 5519 + }, + { + "epoch": 0.769177175503379, + "grad_norm": 0.42355456948280334, + "learning_rate": 5.802977017991888e-06, + "loss": 0.09277153015136719, + "step": 5520 + }, + { + "epoch": 0.7693165191945934, + "grad_norm": 0.4204612374305725, + "learning_rate": 5.7963385767886496e-06, + "loss": 0.08790016174316406, + "step": 5521 + }, + { + "epoch": 0.7694558628858078, + "grad_norm": 0.5675767660140991, + "learning_rate": 5.789703291314703e-06, + "loss": 0.10265350341796875, + "step": 5522 + }, + { + "epoch": 0.7695952065770222, + "grad_norm": 0.4079345762729645, + "learning_rate": 5.783071163044259e-06, + "loss": 0.0701136589050293, + "step": 5523 + }, + { + "epoch": 0.7697345502682366, + "grad_norm": 0.3500329852104187, + "learning_rate": 5.776442193450835e-06, + "loss": 0.08195507526397705, + "step": 5524 + }, + { + "epoch": 0.7698738939594509, + "grad_norm": 0.390969842672348, + "learning_rate": 5.769816384007216e-06, + "loss": 0.08203649520874023, + "step": 5525 + }, + { + "epoch": 0.7700132376506653, + "grad_norm": 0.725117564201355, + "learning_rate": 5.7631937361855175e-06, + "loss": 0.12070465087890625, + "step": 5526 + }, + { + "epoch": 0.7701525813418797, + "grad_norm": 0.5093226432800293, + "learning_rate": 5.756574251457141e-06, + "loss": 0.08390140533447266, + "step": 5527 + }, + { + "epoch": 0.7702919250330941, + "grad_norm": 0.4639873206615448, + "learning_rate": 5.74995793129278e-06, + "loss": 0.08137321472167969, + "step": 5528 + }, + { + "epoch": 0.7704312687243086, + "grad_norm": 0.4462485909461975, + "learning_rate": 5.743344777162439e-06, + "loss": 0.09415626525878906, + "step": 5529 + }, + { + "epoch": 0.7705706124155229, + "grad_norm": 0.5261973142623901, + "learning_rate": 5.736734790535394e-06, + "loss": 0.08665084838867188, + "step": 5530 + }, + { + "epoch": 0.7707099561067373, + "grad_norm": 0.5581374168395996, + "learning_rate": 5.7301279728802525e-06, + "loss": 0.08413505554199219, + "step": 5531 + }, + { + "epoch": 0.7708492997979517, + "grad_norm": 0.44960764050483704, + "learning_rate": 5.72352432566488e-06, + "loss": 0.08846282958984375, + "step": 5532 + }, + { + "epoch": 0.7709886434891661, + "grad_norm": 0.5661095380783081, + "learning_rate": 5.716923850356466e-06, + "loss": 0.09904193878173828, + "step": 5533 + }, + { + "epoch": 0.7711279871803804, + "grad_norm": 0.3763776421546936, + "learning_rate": 5.71032654842149e-06, + "loss": 0.08348846435546875, + "step": 5534 + }, + { + "epoch": 0.7712673308715948, + "grad_norm": 0.3582797944545746, + "learning_rate": 5.703732421325716e-06, + "loss": 0.08208847045898438, + "step": 5535 + }, + { + "epoch": 0.7714066745628092, + "grad_norm": 0.7971197962760925, + "learning_rate": 5.697141470534204e-06, + "loss": 0.1168212890625, + "step": 5536 + }, + { + "epoch": 0.7715460182540236, + "grad_norm": 0.5413044691085815, + "learning_rate": 5.690553697511316e-06, + "loss": 0.10740852355957031, + "step": 5537 + }, + { + "epoch": 0.771685361945238, + "grad_norm": 0.4881693720817566, + "learning_rate": 5.683969103720712e-06, + "loss": 0.09154701232910156, + "step": 5538 + }, + { + "epoch": 0.7718247056364523, + "grad_norm": 0.6214826107025146, + "learning_rate": 5.677387690625329e-06, + "loss": 0.0951080322265625, + "step": 5539 + }, + { + "epoch": 0.7719640493276667, + "grad_norm": 0.41043612360954285, + "learning_rate": 5.670809459687414e-06, + "loss": 0.07759857177734375, + "step": 5540 + }, + { + "epoch": 0.7721033930188811, + "grad_norm": 0.33975812792778015, + "learning_rate": 5.66423441236849e-06, + "loss": 0.07067108154296875, + "step": 5541 + }, + { + "epoch": 0.7722427367100955, + "grad_norm": 0.6523184776306152, + "learning_rate": 5.657662550129394e-06, + "loss": 0.09791088104248047, + "step": 5542 + }, + { + "epoch": 0.7723820804013098, + "grad_norm": 0.5760302543640137, + "learning_rate": 5.6510938744302356e-06, + "loss": 0.08871936798095703, + "step": 5543 + }, + { + "epoch": 0.7725214240925242, + "grad_norm": 0.49167659878730774, + "learning_rate": 5.644528386730424e-06, + "loss": 0.0789794921875, + "step": 5544 + }, + { + "epoch": 0.7726607677837386, + "grad_norm": 0.35788965225219727, + "learning_rate": 5.637966088488671e-06, + "loss": 0.07961845397949219, + "step": 5545 + }, + { + "epoch": 0.772800111474953, + "grad_norm": 0.47346049547195435, + "learning_rate": 5.631406981162961e-06, + "loss": 0.09872853755950928, + "step": 5546 + }, + { + "epoch": 0.7729394551661674, + "grad_norm": 0.38782525062561035, + "learning_rate": 5.624851066210575e-06, + "loss": 0.07948493957519531, + "step": 5547 + }, + { + "epoch": 0.7730787988573817, + "grad_norm": 0.38039031624794006, + "learning_rate": 5.618298345088091e-06, + "loss": 0.07245063781738281, + "step": 5548 + }, + { + "epoch": 0.7732181425485961, + "grad_norm": 0.6069446206092834, + "learning_rate": 5.611748819251382e-06, + "loss": 0.12425899505615234, + "step": 5549 + }, + { + "epoch": 0.7733574862398105, + "grad_norm": 0.3773011565208435, + "learning_rate": 5.605202490155588e-06, + "loss": 0.0673990249633789, + "step": 5550 + }, + { + "epoch": 0.7734968299310249, + "grad_norm": 0.623725175857544, + "learning_rate": 5.5986593592551694e-06, + "loss": 0.08447551727294922, + "step": 5551 + }, + { + "epoch": 0.7736361736222392, + "grad_norm": 0.4906350076198578, + "learning_rate": 5.592119428003848e-06, + "loss": 0.09719085693359375, + "step": 5552 + }, + { + "epoch": 0.7737755173134536, + "grad_norm": 0.6741860508918762, + "learning_rate": 5.585582697854657e-06, + "loss": 0.10854911804199219, + "step": 5553 + }, + { + "epoch": 0.773914861004668, + "grad_norm": 0.5228320956230164, + "learning_rate": 5.579049170259896e-06, + "loss": 0.08218526840209961, + "step": 5554 + }, + { + "epoch": 0.7740542046958824, + "grad_norm": 0.6946601867675781, + "learning_rate": 5.572518846671176e-06, + "loss": 0.09097766876220703, + "step": 5555 + }, + { + "epoch": 0.7741935483870968, + "grad_norm": 0.43531742691993713, + "learning_rate": 5.565991728539395e-06, + "loss": 0.07562541961669922, + "step": 5556 + }, + { + "epoch": 0.7743328920783111, + "grad_norm": 0.387584388256073, + "learning_rate": 5.559467817314705e-06, + "loss": 0.07491493225097656, + "step": 5557 + }, + { + "epoch": 0.7744722357695255, + "grad_norm": 0.4174567759037018, + "learning_rate": 5.552947114446583e-06, + "loss": 0.07605314254760742, + "step": 5558 + }, + { + "epoch": 0.7746115794607399, + "grad_norm": 0.5002433061599731, + "learning_rate": 5.546429621383786e-06, + "loss": 0.08672428131103516, + "step": 5559 + }, + { + "epoch": 0.7747509231519543, + "grad_norm": 0.4575958549976349, + "learning_rate": 5.5399153395743425e-06, + "loss": 0.08319282531738281, + "step": 5560 + }, + { + "epoch": 0.7748902668431686, + "grad_norm": 0.3822540044784546, + "learning_rate": 5.533404270465585e-06, + "loss": 0.08188056945800781, + "step": 5561 + }, + { + "epoch": 0.775029610534383, + "grad_norm": 0.4247448742389679, + "learning_rate": 5.526896415504115e-06, + "loss": 0.07839012145996094, + "step": 5562 + }, + { + "epoch": 0.7751689542255974, + "grad_norm": 0.3914552330970764, + "learning_rate": 5.520391776135841e-06, + "loss": 0.0885515809059143, + "step": 5563 + }, + { + "epoch": 0.7753082979168118, + "grad_norm": 0.5026442408561707, + "learning_rate": 5.513890353805933e-06, + "loss": 0.08685684204101562, + "step": 5564 + }, + { + "epoch": 0.7754476416080262, + "grad_norm": 0.5419778227806091, + "learning_rate": 5.507392149958865e-06, + "loss": 0.1046295166015625, + "step": 5565 + }, + { + "epoch": 0.7755869852992405, + "grad_norm": 0.6617847681045532, + "learning_rate": 5.500897166038397e-06, + "loss": 0.09872245788574219, + "step": 5566 + }, + { + "epoch": 0.7757263289904549, + "grad_norm": 0.4573334753513336, + "learning_rate": 5.494405403487557e-06, + "loss": 0.09109210968017578, + "step": 5567 + }, + { + "epoch": 0.7758656726816693, + "grad_norm": 0.3270973861217499, + "learning_rate": 5.487916863748664e-06, + "loss": 0.0687551498413086, + "step": 5568 + }, + { + "epoch": 0.7760050163728838, + "grad_norm": 0.47792038321495056, + "learning_rate": 5.481431548263325e-06, + "loss": 0.09359359741210938, + "step": 5569 + }, + { + "epoch": 0.7761443600640981, + "grad_norm": 0.46077537536621094, + "learning_rate": 5.474949458472438e-06, + "loss": 0.08584022521972656, + "step": 5570 + }, + { + "epoch": 0.7762837037553125, + "grad_norm": 0.5420982837677002, + "learning_rate": 5.468470595816162e-06, + "loss": 0.09032440185546875, + "step": 5571 + }, + { + "epoch": 0.7764230474465269, + "grad_norm": 0.6682063341140747, + "learning_rate": 5.461994961733967e-06, + "loss": 0.09360980987548828, + "step": 5572 + }, + { + "epoch": 0.7765623911377413, + "grad_norm": 0.35550612211227417, + "learning_rate": 5.455522557664576e-06, + "loss": 0.07052135467529297, + "step": 5573 + }, + { + "epoch": 0.7767017348289557, + "grad_norm": 0.3523767292499542, + "learning_rate": 5.449053385046023e-06, + "loss": 0.08538436889648438, + "step": 5574 + }, + { + "epoch": 0.77684107852017, + "grad_norm": 0.4542715549468994, + "learning_rate": 5.442587445315599e-06, + "loss": 0.09320259094238281, + "step": 5575 + }, + { + "epoch": 0.7769804222113844, + "grad_norm": 0.34951385855674744, + "learning_rate": 5.436124739909892e-06, + "loss": 0.07660293579101562, + "step": 5576 + }, + { + "epoch": 0.7771197659025988, + "grad_norm": 0.8790290355682373, + "learning_rate": 5.429665270264774e-06, + "loss": 0.11524009704589844, + "step": 5577 + }, + { + "epoch": 0.7772591095938132, + "grad_norm": 0.6632537841796875, + "learning_rate": 5.423209037815389e-06, + "loss": 0.09040546417236328, + "step": 5578 + }, + { + "epoch": 0.7773984532850275, + "grad_norm": 0.5578469038009644, + "learning_rate": 5.416756043996154e-06, + "loss": 0.09807968139648438, + "step": 5579 + }, + { + "epoch": 0.7775377969762419, + "grad_norm": 0.3623598515987396, + "learning_rate": 5.4103062902407855e-06, + "loss": 0.08054924011230469, + "step": 5580 + }, + { + "epoch": 0.7776771406674563, + "grad_norm": 0.755708634853363, + "learning_rate": 5.403859777982279e-06, + "loss": 0.11249160766601562, + "step": 5581 + }, + { + "epoch": 0.7778164843586707, + "grad_norm": 0.5844423174858093, + "learning_rate": 5.397416508652891e-06, + "loss": 0.10190200805664062, + "step": 5582 + }, + { + "epoch": 0.7779558280498851, + "grad_norm": 0.43972963094711304, + "learning_rate": 5.390976483684167e-06, + "loss": 0.07858467102050781, + "step": 5583 + }, + { + "epoch": 0.7780951717410994, + "grad_norm": 0.4729934334754944, + "learning_rate": 5.384539704506939e-06, + "loss": 0.087432861328125, + "step": 5584 + }, + { + "epoch": 0.7782345154323138, + "grad_norm": 0.36625924706459045, + "learning_rate": 5.378106172551319e-06, + "loss": 0.07037067413330078, + "step": 5585 + }, + { + "epoch": 0.7783738591235282, + "grad_norm": 0.5218802094459534, + "learning_rate": 5.371675889246677e-06, + "loss": 0.09118270874023438, + "step": 5586 + }, + { + "epoch": 0.7785132028147426, + "grad_norm": 0.3243147134780884, + "learning_rate": 5.3652488560216875e-06, + "loss": 0.07727813720703125, + "step": 5587 + }, + { + "epoch": 0.7786525465059569, + "grad_norm": 0.4649814963340759, + "learning_rate": 5.358825074304286e-06, + "loss": 0.08162975311279297, + "step": 5588 + }, + { + "epoch": 0.7787918901971713, + "grad_norm": 0.5347824692726135, + "learning_rate": 5.352404545521683e-06, + "loss": 0.08570671081542969, + "step": 5589 + }, + { + "epoch": 0.7789312338883857, + "grad_norm": 0.6477193236351013, + "learning_rate": 5.345987271100381e-06, + "loss": 0.10363197326660156, + "step": 5590 + }, + { + "epoch": 0.7790705775796001, + "grad_norm": 0.362225741147995, + "learning_rate": 5.339573252466155e-06, + "loss": 0.07631492614746094, + "step": 5591 + }, + { + "epoch": 0.7792099212708145, + "grad_norm": 0.5481685400009155, + "learning_rate": 5.333162491044044e-06, + "loss": 0.07032394409179688, + "step": 5592 + }, + { + "epoch": 0.7793492649620288, + "grad_norm": 0.5519447922706604, + "learning_rate": 5.3267549882583825e-06, + "loss": 0.08947372436523438, + "step": 5593 + }, + { + "epoch": 0.7794886086532432, + "grad_norm": 0.3777078688144684, + "learning_rate": 5.320350745532761e-06, + "loss": 0.07710552215576172, + "step": 5594 + }, + { + "epoch": 0.7796279523444576, + "grad_norm": 0.5183930993080139, + "learning_rate": 5.3139497642900696e-06, + "loss": 0.08939170837402344, + "step": 5595 + }, + { + "epoch": 0.779767296035672, + "grad_norm": 0.416849285364151, + "learning_rate": 5.307552045952445e-06, + "loss": 0.08004474639892578, + "step": 5596 + }, + { + "epoch": 0.7799066397268863, + "grad_norm": 0.35379672050476074, + "learning_rate": 5.3011575919413214e-06, + "loss": 0.07628631591796875, + "step": 5597 + }, + { + "epoch": 0.7800459834181007, + "grad_norm": 0.45168742537498474, + "learning_rate": 5.294766403677407e-06, + "loss": 0.08105182647705078, + "step": 5598 + }, + { + "epoch": 0.7801853271093151, + "grad_norm": 0.5134316086769104, + "learning_rate": 5.288378482580671e-06, + "loss": 0.08800125122070312, + "step": 5599 + }, + { + "epoch": 0.7803246708005295, + "grad_norm": 0.46381181478500366, + "learning_rate": 5.281993830070357e-06, + "loss": 0.07286405563354492, + "step": 5600 + }, + { + "epoch": 0.7804640144917439, + "grad_norm": 0.6220912933349609, + "learning_rate": 5.275612447565e-06, + "loss": 0.09648513793945312, + "step": 5601 + }, + { + "epoch": 0.7806033581829582, + "grad_norm": 0.6051953434944153, + "learning_rate": 5.269234336482396e-06, + "loss": 0.08842086791992188, + "step": 5602 + }, + { + "epoch": 0.7807427018741726, + "grad_norm": 0.4557041823863983, + "learning_rate": 5.262859498239614e-06, + "loss": 0.08661270141601562, + "step": 5603 + }, + { + "epoch": 0.780882045565387, + "grad_norm": 0.5296369791030884, + "learning_rate": 5.256487934252992e-06, + "loss": 0.09417057037353516, + "step": 5604 + }, + { + "epoch": 0.7810213892566014, + "grad_norm": 0.5501565337181091, + "learning_rate": 5.25011964593815e-06, + "loss": 0.09945106506347656, + "step": 5605 + }, + { + "epoch": 0.7811607329478157, + "grad_norm": 0.4134727418422699, + "learning_rate": 5.243754634709983e-06, + "loss": 0.08350944519042969, + "step": 5606 + }, + { + "epoch": 0.7813000766390301, + "grad_norm": 0.38923174142837524, + "learning_rate": 5.237392901982641e-06, + "loss": 0.08512306213378906, + "step": 5607 + }, + { + "epoch": 0.7814394203302445, + "grad_norm": 0.31357836723327637, + "learning_rate": 5.2310344491695585e-06, + "loss": 0.07630205154418945, + "step": 5608 + }, + { + "epoch": 0.781578764021459, + "grad_norm": 0.7080410718917847, + "learning_rate": 5.2246792776834466e-06, + "loss": 0.09808731079101562, + "step": 5609 + }, + { + "epoch": 0.7817181077126734, + "grad_norm": 0.5742586255073547, + "learning_rate": 5.218327388936273e-06, + "loss": 0.09773445129394531, + "step": 5610 + }, + { + "epoch": 0.7818574514038877, + "grad_norm": 0.32293471693992615, + "learning_rate": 5.211978784339275e-06, + "loss": 0.07332420349121094, + "step": 5611 + }, + { + "epoch": 0.7819967950951021, + "grad_norm": 0.4728336036205292, + "learning_rate": 5.205633465302977e-06, + "loss": 0.096649169921875, + "step": 5612 + }, + { + "epoch": 0.7821361387863165, + "grad_norm": 0.39178264141082764, + "learning_rate": 5.199291433237168e-06, + "loss": 0.08494377136230469, + "step": 5613 + }, + { + "epoch": 0.7822754824775309, + "grad_norm": 0.4289361536502838, + "learning_rate": 5.192952689550894e-06, + "loss": 0.0833749771118164, + "step": 5614 + }, + { + "epoch": 0.7824148261687452, + "grad_norm": 0.3662031888961792, + "learning_rate": 5.186617235652478e-06, + "loss": 0.07186126708984375, + "step": 5615 + }, + { + "epoch": 0.7825541698599596, + "grad_norm": 0.3758457601070404, + "learning_rate": 5.180285072949516e-06, + "loss": 0.08744633197784424, + "step": 5616 + }, + { + "epoch": 0.782693513551174, + "grad_norm": 0.27948933839797974, + "learning_rate": 5.173956202848878e-06, + "loss": 0.06302070617675781, + "step": 5617 + }, + { + "epoch": 0.7828328572423884, + "grad_norm": 0.6157004833221436, + "learning_rate": 5.167630626756681e-06, + "loss": 0.10000801086425781, + "step": 5618 + }, + { + "epoch": 0.7829722009336028, + "grad_norm": 0.44030478596687317, + "learning_rate": 5.161308346078335e-06, + "loss": 0.09460830688476562, + "step": 5619 + }, + { + "epoch": 0.7831115446248171, + "grad_norm": 0.563418984413147, + "learning_rate": 5.154989362218501e-06, + "loss": 0.08044624328613281, + "step": 5620 + }, + { + "epoch": 0.7832508883160315, + "grad_norm": 0.42593619227409363, + "learning_rate": 5.148673676581109e-06, + "loss": 0.0923614501953125, + "step": 5621 + }, + { + "epoch": 0.7833902320072459, + "grad_norm": 0.41905221343040466, + "learning_rate": 5.142361290569365e-06, + "loss": 0.09197235107421875, + "step": 5622 + }, + { + "epoch": 0.7835295756984603, + "grad_norm": 0.4670744836330414, + "learning_rate": 5.136052205585735e-06, + "loss": 0.08808326721191406, + "step": 5623 + }, + { + "epoch": 0.7836689193896746, + "grad_norm": 0.640080451965332, + "learning_rate": 5.129746423031967e-06, + "loss": 0.08631324768066406, + "step": 5624 + }, + { + "epoch": 0.783808263080889, + "grad_norm": 0.538173496723175, + "learning_rate": 5.123443944309039e-06, + "loss": 0.09264183044433594, + "step": 5625 + }, + { + "epoch": 0.7839476067721034, + "grad_norm": 0.5762525796890259, + "learning_rate": 5.1171447708172285e-06, + "loss": 0.10431194305419922, + "step": 5626 + }, + { + "epoch": 0.7840869504633178, + "grad_norm": 0.3509763181209564, + "learning_rate": 5.110848903956076e-06, + "loss": 0.0856781005859375, + "step": 5627 + }, + { + "epoch": 0.7842262941545322, + "grad_norm": 0.5934700965881348, + "learning_rate": 5.104556345124363e-06, + "loss": 0.10234642028808594, + "step": 5628 + }, + { + "epoch": 0.7843656378457465, + "grad_norm": 1.0707248449325562, + "learning_rate": 5.098267095720164e-06, + "loss": 0.10138893127441406, + "step": 5629 + }, + { + "epoch": 0.7845049815369609, + "grad_norm": 0.36728331446647644, + "learning_rate": 5.091981157140808e-06, + "loss": 0.08849143981933594, + "step": 5630 + }, + { + "epoch": 0.7846443252281753, + "grad_norm": 0.42935293912887573, + "learning_rate": 5.085698530782885e-06, + "loss": 0.0728302001953125, + "step": 5631 + }, + { + "epoch": 0.7847836689193897, + "grad_norm": 0.4349294602870941, + "learning_rate": 5.079419218042243e-06, + "loss": 0.08720111846923828, + "step": 5632 + }, + { + "epoch": 0.784923012610604, + "grad_norm": 0.3869522511959076, + "learning_rate": 5.073143220314007e-06, + "loss": 0.0824127197265625, + "step": 5633 + }, + { + "epoch": 0.7850623563018184, + "grad_norm": 0.29073867201805115, + "learning_rate": 5.066870538992568e-06, + "loss": 0.07031059265136719, + "step": 5634 + }, + { + "epoch": 0.7852016999930328, + "grad_norm": 0.4189479351043701, + "learning_rate": 5.060601175471567e-06, + "loss": 0.08640670776367188, + "step": 5635 + }, + { + "epoch": 0.7853410436842472, + "grad_norm": 0.8211479187011719, + "learning_rate": 5.054335131143906e-06, + "loss": 0.11723899841308594, + "step": 5636 + }, + { + "epoch": 0.7854803873754616, + "grad_norm": 0.4975667893886566, + "learning_rate": 5.0480724074017625e-06, + "loss": 0.08103084564208984, + "step": 5637 + }, + { + "epoch": 0.7856197310666759, + "grad_norm": 0.6895847320556641, + "learning_rate": 5.041813005636578e-06, + "loss": 0.09902763366699219, + "step": 5638 + }, + { + "epoch": 0.7857590747578903, + "grad_norm": 0.5083357691764832, + "learning_rate": 5.035556927239036e-06, + "loss": 0.09302520751953125, + "step": 5639 + }, + { + "epoch": 0.7858984184491047, + "grad_norm": 0.5982008576393127, + "learning_rate": 5.029304173599107e-06, + "loss": 0.08650779724121094, + "step": 5640 + }, + { + "epoch": 0.7860377621403191, + "grad_norm": 0.6853018403053284, + "learning_rate": 5.023054746106e-06, + "loss": 0.09631156921386719, + "step": 5641 + }, + { + "epoch": 0.7861771058315334, + "grad_norm": 0.44802597165107727, + "learning_rate": 5.016808646148204e-06, + "loss": 0.08593034744262695, + "step": 5642 + }, + { + "epoch": 0.7863164495227478, + "grad_norm": 0.4743650257587433, + "learning_rate": 5.0105658751134464e-06, + "loss": 0.07451629638671875, + "step": 5643 + }, + { + "epoch": 0.7864557932139622, + "grad_norm": 0.5740935206413269, + "learning_rate": 5.0043264343887395e-06, + "loss": 0.09851837158203125, + "step": 5644 + }, + { + "epoch": 0.7865951369051766, + "grad_norm": 0.5705467462539673, + "learning_rate": 4.998090325360346e-06, + "loss": 0.08683490753173828, + "step": 5645 + }, + { + "epoch": 0.786734480596391, + "grad_norm": 0.38961493968963623, + "learning_rate": 4.991857549413784e-06, + "loss": 0.08097457885742188, + "step": 5646 + }, + { + "epoch": 0.7868738242876053, + "grad_norm": 0.3823222219944, + "learning_rate": 4.9856281079338265e-06, + "loss": 0.08551216125488281, + "step": 5647 + }, + { + "epoch": 0.7870131679788197, + "grad_norm": 0.47843030095100403, + "learning_rate": 4.979402002304519e-06, + "loss": 0.08018875122070312, + "step": 5648 + }, + { + "epoch": 0.7871525116700342, + "grad_norm": 0.5785119533538818, + "learning_rate": 4.973179233909167e-06, + "loss": 0.08956718444824219, + "step": 5649 + }, + { + "epoch": 0.7872918553612486, + "grad_norm": 0.45790761709213257, + "learning_rate": 4.966959804130314e-06, + "loss": 0.08982086181640625, + "step": 5650 + }, + { + "epoch": 0.787431199052463, + "grad_norm": 0.5402988195419312, + "learning_rate": 4.96074371434979e-06, + "loss": 0.08949470520019531, + "step": 5651 + }, + { + "epoch": 0.7875705427436773, + "grad_norm": 0.4236096739768982, + "learning_rate": 4.954530965948654e-06, + "loss": 0.09095382690429688, + "step": 5652 + }, + { + "epoch": 0.7877098864348917, + "grad_norm": 0.35560017824172974, + "learning_rate": 4.948321560307248e-06, + "loss": 0.07475662231445312, + "step": 5653 + }, + { + "epoch": 0.7878492301261061, + "grad_norm": 0.39766693115234375, + "learning_rate": 4.942115498805151e-06, + "loss": 0.07636165618896484, + "step": 5654 + }, + { + "epoch": 0.7879885738173205, + "grad_norm": 0.32868796586990356, + "learning_rate": 4.9359127828212125e-06, + "loss": 0.070220947265625, + "step": 5655 + }, + { + "epoch": 0.7881279175085348, + "grad_norm": 0.2997928857803345, + "learning_rate": 4.929713413733548e-06, + "loss": 0.06657791137695312, + "step": 5656 + }, + { + "epoch": 0.7882672611997492, + "grad_norm": 0.2753916084766388, + "learning_rate": 4.92351739291949e-06, + "loss": 0.0748751163482666, + "step": 5657 + }, + { + "epoch": 0.7884066048909636, + "grad_norm": 0.5389400720596313, + "learning_rate": 4.917324721755665e-06, + "loss": 0.0875997543334961, + "step": 5658 + }, + { + "epoch": 0.788545948582178, + "grad_norm": 0.4245748519897461, + "learning_rate": 4.911135401617948e-06, + "loss": 0.08033561706542969, + "step": 5659 + }, + { + "epoch": 0.7886852922733923, + "grad_norm": 0.47056517004966736, + "learning_rate": 4.904949433881457e-06, + "loss": 0.08322906494140625, + "step": 5660 + }, + { + "epoch": 0.7888246359646067, + "grad_norm": 0.5137202739715576, + "learning_rate": 4.8987668199205796e-06, + "loss": 0.08792448043823242, + "step": 5661 + }, + { + "epoch": 0.7889639796558211, + "grad_norm": 0.3429279029369354, + "learning_rate": 4.892587561108942e-06, + "loss": 0.07110261917114258, + "step": 5662 + }, + { + "epoch": 0.7891033233470355, + "grad_norm": 0.6217938661575317, + "learning_rate": 4.886411658819448e-06, + "loss": 0.08912849426269531, + "step": 5663 + }, + { + "epoch": 0.7892426670382499, + "grad_norm": 0.445891797542572, + "learning_rate": 4.880239114424226e-06, + "loss": 0.08952522277832031, + "step": 5664 + }, + { + "epoch": 0.7893820107294642, + "grad_norm": 0.6400933265686035, + "learning_rate": 4.874069929294685e-06, + "loss": 0.09918403625488281, + "step": 5665 + }, + { + "epoch": 0.7895213544206786, + "grad_norm": 0.40412020683288574, + "learning_rate": 4.867904104801477e-06, + "loss": 0.08839559555053711, + "step": 5666 + }, + { + "epoch": 0.789660698111893, + "grad_norm": 0.32948774099349976, + "learning_rate": 4.861741642314511e-06, + "loss": 0.0662384033203125, + "step": 5667 + }, + { + "epoch": 0.7898000418031074, + "grad_norm": 1.0342034101486206, + "learning_rate": 4.85558254320293e-06, + "loss": 0.11777496337890625, + "step": 5668 + }, + { + "epoch": 0.7899393854943217, + "grad_norm": 0.43775102496147156, + "learning_rate": 4.849426808835156e-06, + "loss": 0.07658576965332031, + "step": 5669 + }, + { + "epoch": 0.7900787291855361, + "grad_norm": 0.4004105031490326, + "learning_rate": 4.843274440578856e-06, + "loss": 0.08172464370727539, + "step": 5670 + }, + { + "epoch": 0.7902180728767505, + "grad_norm": 0.44847768545150757, + "learning_rate": 4.837125439800936e-06, + "loss": 0.08153724670410156, + "step": 5671 + }, + { + "epoch": 0.7903574165679649, + "grad_norm": 0.4170943796634674, + "learning_rate": 4.830979807867577e-06, + "loss": 0.09496498107910156, + "step": 5672 + }, + { + "epoch": 0.7904967602591793, + "grad_norm": 0.3655461072921753, + "learning_rate": 4.824837546144183e-06, + "loss": 0.07195472717285156, + "step": 5673 + }, + { + "epoch": 0.7906361039503936, + "grad_norm": 0.4772961437702179, + "learning_rate": 4.818698655995437e-06, + "loss": 0.08633613586425781, + "step": 5674 + }, + { + "epoch": 0.790775447641608, + "grad_norm": 0.6355968713760376, + "learning_rate": 4.812563138785249e-06, + "loss": 0.10305023193359375, + "step": 5675 + }, + { + "epoch": 0.7909147913328224, + "grad_norm": 0.4278135597705841, + "learning_rate": 4.806430995876796e-06, + "loss": 0.08139419555664062, + "step": 5676 + }, + { + "epoch": 0.7910541350240368, + "grad_norm": 0.2929006516933441, + "learning_rate": 4.800302228632505e-06, + "loss": 0.06892013549804688, + "step": 5677 + }, + { + "epoch": 0.7911934787152511, + "grad_norm": 0.5231592655181885, + "learning_rate": 4.7941768384140465e-06, + "loss": 0.0787515640258789, + "step": 5678 + }, + { + "epoch": 0.7913328224064655, + "grad_norm": 0.31696438789367676, + "learning_rate": 4.788054826582334e-06, + "loss": 0.0699005126953125, + "step": 5679 + }, + { + "epoch": 0.7914721660976799, + "grad_norm": 0.46728989481925964, + "learning_rate": 4.781936194497543e-06, + "loss": 0.0941171646118164, + "step": 5680 + }, + { + "epoch": 0.7916115097888943, + "grad_norm": 0.46915775537490845, + "learning_rate": 4.7758209435191e-06, + "loss": 0.09267616271972656, + "step": 5681 + }, + { + "epoch": 0.7917508534801087, + "grad_norm": 0.37583649158477783, + "learning_rate": 4.769709075005673e-06, + "loss": 0.07825088500976562, + "step": 5682 + }, + { + "epoch": 0.791890197171323, + "grad_norm": 0.6129453182220459, + "learning_rate": 4.763600590315167e-06, + "loss": 0.09086418151855469, + "step": 5683 + }, + { + "epoch": 0.7920295408625374, + "grad_norm": 0.6027799844741821, + "learning_rate": 4.757495490804758e-06, + "loss": 0.10019302368164062, + "step": 5684 + }, + { + "epoch": 0.7921688845537518, + "grad_norm": 0.5873324275016785, + "learning_rate": 4.751393777830864e-06, + "loss": 0.10791397094726562, + "step": 5685 + }, + { + "epoch": 0.7923082282449662, + "grad_norm": 0.3738185167312622, + "learning_rate": 4.745295452749137e-06, + "loss": 0.08754205703735352, + "step": 5686 + }, + { + "epoch": 0.7924475719361805, + "grad_norm": 0.4201476275920868, + "learning_rate": 4.739200516914488e-06, + "loss": 0.07528305053710938, + "step": 5687 + }, + { + "epoch": 0.7925869156273949, + "grad_norm": 0.5649603605270386, + "learning_rate": 4.7331089716810866e-06, + "loss": 0.08350861072540283, + "step": 5688 + }, + { + "epoch": 0.7927262593186094, + "grad_norm": 0.4810270667076111, + "learning_rate": 4.727020818402311e-06, + "loss": 0.08884620666503906, + "step": 5689 + }, + { + "epoch": 0.7928656030098238, + "grad_norm": 0.34573453664779663, + "learning_rate": 4.720936058430818e-06, + "loss": 0.07581710815429688, + "step": 5690 + }, + { + "epoch": 0.7930049467010382, + "grad_norm": 0.525547206401825, + "learning_rate": 4.714854693118515e-06, + "loss": 0.0940999984741211, + "step": 5691 + }, + { + "epoch": 0.7931442903922525, + "grad_norm": 0.3925651013851166, + "learning_rate": 4.708776723816528e-06, + "loss": 0.07420969009399414, + "step": 5692 + }, + { + "epoch": 0.7932836340834669, + "grad_norm": 0.5540741682052612, + "learning_rate": 4.702702151875253e-06, + "loss": 0.09130477905273438, + "step": 5693 + }, + { + "epoch": 0.7934229777746813, + "grad_norm": 0.4680781364440918, + "learning_rate": 4.696630978644312e-06, + "loss": 0.08218574523925781, + "step": 5694 + }, + { + "epoch": 0.7935623214658957, + "grad_norm": 0.6124250888824463, + "learning_rate": 4.690563205472589e-06, + "loss": 0.10333991050720215, + "step": 5695 + }, + { + "epoch": 0.79370166515711, + "grad_norm": 0.8028839230537415, + "learning_rate": 4.684498833708198e-06, + "loss": 0.10320472717285156, + "step": 5696 + }, + { + "epoch": 0.7938410088483244, + "grad_norm": 0.410843163728714, + "learning_rate": 4.678437864698507e-06, + "loss": 0.07389450073242188, + "step": 5697 + }, + { + "epoch": 0.7939803525395388, + "grad_norm": 0.36649322509765625, + "learning_rate": 4.672380299790131e-06, + "loss": 0.07136344909667969, + "step": 5698 + }, + { + "epoch": 0.7941196962307532, + "grad_norm": 0.43550553917884827, + "learning_rate": 4.6663261403289165e-06, + "loss": 0.07274818420410156, + "step": 5699 + }, + { + "epoch": 0.7942590399219676, + "grad_norm": 0.39346927404403687, + "learning_rate": 4.660275387659954e-06, + "loss": 0.08456802368164062, + "step": 5700 + }, + { + "epoch": 0.7943983836131819, + "grad_norm": 0.5930477380752563, + "learning_rate": 4.654228043127589e-06, + "loss": 0.07785987854003906, + "step": 5701 + }, + { + "epoch": 0.7945377273043963, + "grad_norm": 0.5130472183227539, + "learning_rate": 4.6481841080754064e-06, + "loss": 0.09330940246582031, + "step": 5702 + }, + { + "epoch": 0.7946770709956107, + "grad_norm": 0.686312735080719, + "learning_rate": 4.642143583846223e-06, + "loss": 0.10816383361816406, + "step": 5703 + }, + { + "epoch": 0.7948164146868251, + "grad_norm": 0.5864723920822144, + "learning_rate": 4.636106471782116e-06, + "loss": 0.10375022888183594, + "step": 5704 + }, + { + "epoch": 0.7949557583780394, + "grad_norm": 0.5456990003585815, + "learning_rate": 4.630072773224381e-06, + "loss": 0.09648895263671875, + "step": 5705 + }, + { + "epoch": 0.7950951020692538, + "grad_norm": 0.4253915250301361, + "learning_rate": 4.62404248951358e-06, + "loss": 0.07839107513427734, + "step": 5706 + }, + { + "epoch": 0.7952344457604682, + "grad_norm": 0.43717122077941895, + "learning_rate": 4.618015621989493e-06, + "loss": 0.07904195785522461, + "step": 5707 + }, + { + "epoch": 0.7953737894516826, + "grad_norm": 0.5903695821762085, + "learning_rate": 4.6119921719911596e-06, + "loss": 0.09080886840820312, + "step": 5708 + }, + { + "epoch": 0.795513133142897, + "grad_norm": 0.5353839993476868, + "learning_rate": 4.605972140856856e-06, + "loss": 0.09074783325195312, + "step": 5709 + }, + { + "epoch": 0.7956524768341113, + "grad_norm": 0.6780092716217041, + "learning_rate": 4.599955529924089e-06, + "loss": 0.08613204956054688, + "step": 5710 + }, + { + "epoch": 0.7957918205253257, + "grad_norm": 0.512488842010498, + "learning_rate": 4.593942340529612e-06, + "loss": 0.08782196044921875, + "step": 5711 + }, + { + "epoch": 0.7959311642165401, + "grad_norm": 0.6922951936721802, + "learning_rate": 4.587932574009417e-06, + "loss": 0.10662269592285156, + "step": 5712 + }, + { + "epoch": 0.7960705079077545, + "grad_norm": 0.47552958130836487, + "learning_rate": 4.58192623169875e-06, + "loss": 0.08951187133789062, + "step": 5713 + }, + { + "epoch": 0.7962098515989688, + "grad_norm": 0.4266712963581085, + "learning_rate": 4.575923314932072e-06, + "loss": 0.07558631896972656, + "step": 5714 + }, + { + "epoch": 0.7963491952901832, + "grad_norm": 0.43594786524772644, + "learning_rate": 4.56992382504309e-06, + "loss": 0.06997394561767578, + "step": 5715 + }, + { + "epoch": 0.7964885389813976, + "grad_norm": 0.39012372493743896, + "learning_rate": 4.563927763364759e-06, + "loss": 0.08131027221679688, + "step": 5716 + }, + { + "epoch": 0.796627882672612, + "grad_norm": 0.5453367233276367, + "learning_rate": 4.557935131229274e-06, + "loss": 0.10774040222167969, + "step": 5717 + }, + { + "epoch": 0.7967672263638264, + "grad_norm": 0.38894590735435486, + "learning_rate": 4.5519459299680465e-06, + "loss": 0.08769035339355469, + "step": 5718 + }, + { + "epoch": 0.7969065700550407, + "grad_norm": 0.45403632521629333, + "learning_rate": 4.545960160911758e-06, + "loss": 0.08070564270019531, + "step": 5719 + }, + { + "epoch": 0.7970459137462551, + "grad_norm": 0.6356760263442993, + "learning_rate": 4.539977825390296e-06, + "loss": 0.08827590942382812, + "step": 5720 + }, + { + "epoch": 0.7971852574374695, + "grad_norm": 0.3998755216598511, + "learning_rate": 4.533998924732801e-06, + "loss": 0.08047866821289062, + "step": 5721 + }, + { + "epoch": 0.7973246011286839, + "grad_norm": 0.31669536232948303, + "learning_rate": 4.528023460267648e-06, + "loss": 0.07604408264160156, + "step": 5722 + }, + { + "epoch": 0.7974639448198982, + "grad_norm": 0.5885946154594421, + "learning_rate": 4.52205143332245e-06, + "loss": 0.1237335205078125, + "step": 5723 + }, + { + "epoch": 0.7976032885111126, + "grad_norm": 0.5600484013557434, + "learning_rate": 4.516082845224063e-06, + "loss": 0.09393692016601562, + "step": 5724 + }, + { + "epoch": 0.797742632202327, + "grad_norm": 0.4626161754131317, + "learning_rate": 4.510117697298564e-06, + "loss": 0.09180641174316406, + "step": 5725 + }, + { + "epoch": 0.7978819758935414, + "grad_norm": 0.4723499119281769, + "learning_rate": 4.504155990871266e-06, + "loss": 0.09148788452148438, + "step": 5726 + }, + { + "epoch": 0.7980213195847558, + "grad_norm": 0.29648101329803467, + "learning_rate": 4.498197727266738e-06, + "loss": 0.06063079833984375, + "step": 5727 + }, + { + "epoch": 0.7981606632759701, + "grad_norm": 0.5404139161109924, + "learning_rate": 4.4922429078087545e-06, + "loss": 0.08923149108886719, + "step": 5728 + }, + { + "epoch": 0.7983000069671845, + "grad_norm": 0.3879506587982178, + "learning_rate": 4.486291533820351e-06, + "loss": 0.07639694213867188, + "step": 5729 + }, + { + "epoch": 0.798439350658399, + "grad_norm": 0.37722048163414, + "learning_rate": 4.480343606623789e-06, + "loss": 0.0796651840209961, + "step": 5730 + }, + { + "epoch": 0.7985786943496134, + "grad_norm": 0.45591723918914795, + "learning_rate": 4.474399127540561e-06, + "loss": 0.08411693572998047, + "step": 5731 + }, + { + "epoch": 0.7987180380408277, + "grad_norm": 0.30879536271095276, + "learning_rate": 4.4684580978913815e-06, + "loss": 0.07451200485229492, + "step": 5732 + }, + { + "epoch": 0.7988573817320421, + "grad_norm": 0.46484968066215515, + "learning_rate": 4.462520518996225e-06, + "loss": 0.10408210754394531, + "step": 5733 + }, + { + "epoch": 0.7989967254232565, + "grad_norm": 0.647974967956543, + "learning_rate": 4.456586392174285e-06, + "loss": 0.08360767364501953, + "step": 5734 + }, + { + "epoch": 0.7991360691144709, + "grad_norm": 0.5050773024559021, + "learning_rate": 4.450655718743988e-06, + "loss": 0.09797096252441406, + "step": 5735 + }, + { + "epoch": 0.7992754128056853, + "grad_norm": 0.47879594564437866, + "learning_rate": 4.444728500022988e-06, + "loss": 0.08106803894042969, + "step": 5736 + }, + { + "epoch": 0.7994147564968996, + "grad_norm": 0.5411527156829834, + "learning_rate": 4.4388047373281815e-06, + "loss": 0.09481430053710938, + "step": 5737 + }, + { + "epoch": 0.799554100188114, + "grad_norm": 0.3811016082763672, + "learning_rate": 4.432884431975699e-06, + "loss": 0.08054924011230469, + "step": 5738 + }, + { + "epoch": 0.7996934438793284, + "grad_norm": 0.620241641998291, + "learning_rate": 4.426967585280888e-06, + "loss": 0.08657169342041016, + "step": 5739 + }, + { + "epoch": 0.7998327875705428, + "grad_norm": 0.5222318172454834, + "learning_rate": 4.4210541985583455e-06, + "loss": 0.08925771713256836, + "step": 5740 + }, + { + "epoch": 0.7999721312617571, + "grad_norm": 0.45213934779167175, + "learning_rate": 4.415144273121883e-06, + "loss": 0.09625434875488281, + "step": 5741 + }, + { + "epoch": 0.8001114749529715, + "grad_norm": 0.8572303652763367, + "learning_rate": 4.409237810284559e-06, + "loss": 0.11771774291992188, + "step": 5742 + }, + { + "epoch": 0.8002508186441859, + "grad_norm": 0.6003172993659973, + "learning_rate": 4.4033348113586465e-06, + "loss": 0.09063148498535156, + "step": 5743 + }, + { + "epoch": 0.8003901623354003, + "grad_norm": 0.2960137128829956, + "learning_rate": 4.39743527765566e-06, + "loss": 0.06714439392089844, + "step": 5744 + }, + { + "epoch": 0.8005295060266147, + "grad_norm": 0.465549498796463, + "learning_rate": 4.391539210486346e-06, + "loss": 0.08212852478027344, + "step": 5745 + }, + { + "epoch": 0.800668849717829, + "grad_norm": 0.7242010831832886, + "learning_rate": 4.385646611160674e-06, + "loss": 0.09867477416992188, + "step": 5746 + }, + { + "epoch": 0.8008081934090434, + "grad_norm": 0.44087377190589905, + "learning_rate": 4.379757480987836e-06, + "loss": 0.07913541793823242, + "step": 5747 + }, + { + "epoch": 0.8009475371002578, + "grad_norm": 0.42828792333602905, + "learning_rate": 4.373871821276272e-06, + "loss": 0.08856678009033203, + "step": 5748 + }, + { + "epoch": 0.8010868807914722, + "grad_norm": 0.3539843261241913, + "learning_rate": 4.367989633333642e-06, + "loss": 0.07413482666015625, + "step": 5749 + }, + { + "epoch": 0.8012262244826865, + "grad_norm": 0.6980888247489929, + "learning_rate": 4.362110918466826e-06, + "loss": 0.11032676696777344, + "step": 5750 + }, + { + "epoch": 0.8013655681739009, + "grad_norm": 0.41728731989860535, + "learning_rate": 4.356235677981952e-06, + "loss": 0.08135128021240234, + "step": 5751 + }, + { + "epoch": 0.8015049118651153, + "grad_norm": 0.47294503450393677, + "learning_rate": 4.350363913184355e-06, + "loss": 0.09424781799316406, + "step": 5752 + }, + { + "epoch": 0.8016442555563297, + "grad_norm": 0.6080089211463928, + "learning_rate": 4.3444956253786044e-06, + "loss": 0.10257530212402344, + "step": 5753 + }, + { + "epoch": 0.801783599247544, + "grad_norm": 0.4683364927768707, + "learning_rate": 4.338630815868505e-06, + "loss": 0.09149646759033203, + "step": 5754 + }, + { + "epoch": 0.8019229429387584, + "grad_norm": 0.44038474559783936, + "learning_rate": 4.3327694859570824e-06, + "loss": 0.08331489562988281, + "step": 5755 + }, + { + "epoch": 0.8020622866299728, + "grad_norm": 0.3694160580635071, + "learning_rate": 4.326911636946603e-06, + "loss": 0.07800674438476562, + "step": 5756 + }, + { + "epoch": 0.8022016303211872, + "grad_norm": 0.647652804851532, + "learning_rate": 4.321057270138525e-06, + "loss": 0.09317970275878906, + "step": 5757 + }, + { + "epoch": 0.8023409740124016, + "grad_norm": 0.44589248299598694, + "learning_rate": 4.315206386833562e-06, + "loss": 0.08190536499023438, + "step": 5758 + }, + { + "epoch": 0.8024803177036159, + "grad_norm": 0.5221806168556213, + "learning_rate": 4.309358988331658e-06, + "loss": 0.10128402709960938, + "step": 5759 + }, + { + "epoch": 0.8026196613948303, + "grad_norm": 0.5143051743507385, + "learning_rate": 4.303515075931957e-06, + "loss": 0.09404277801513672, + "step": 5760 + }, + { + "epoch": 0.8027590050860447, + "grad_norm": 0.5578770637512207, + "learning_rate": 4.297674650932848e-06, + "loss": 0.09896087646484375, + "step": 5761 + }, + { + "epoch": 0.8028983487772591, + "grad_norm": 0.32395726442337036, + "learning_rate": 4.2918377146319505e-06, + "loss": 0.07469463348388672, + "step": 5762 + }, + { + "epoch": 0.8030376924684735, + "grad_norm": 0.3464997410774231, + "learning_rate": 4.286004268326085e-06, + "loss": 0.07236480712890625, + "step": 5763 + }, + { + "epoch": 0.8031770361596878, + "grad_norm": 0.4541209936141968, + "learning_rate": 4.280174313311311e-06, + "loss": 0.08692359924316406, + "step": 5764 + }, + { + "epoch": 0.8033163798509022, + "grad_norm": 0.42618080973625183, + "learning_rate": 4.274347850882916e-06, + "loss": 0.07645988464355469, + "step": 5765 + }, + { + "epoch": 0.8034557235421166, + "grad_norm": 0.46270638704299927, + "learning_rate": 4.26852488233541e-06, + "loss": 0.07584190368652344, + "step": 5766 + }, + { + "epoch": 0.803595067233331, + "grad_norm": 0.4361879229545593, + "learning_rate": 4.26270540896252e-06, + "loss": 0.07712268829345703, + "step": 5767 + }, + { + "epoch": 0.8037344109245453, + "grad_norm": 0.5844576358795166, + "learning_rate": 4.256889432057194e-06, + "loss": 0.0819922685623169, + "step": 5768 + }, + { + "epoch": 0.8038737546157597, + "grad_norm": 0.4824138879776001, + "learning_rate": 4.251076952911615e-06, + "loss": 0.08609962463378906, + "step": 5769 + }, + { + "epoch": 0.8040130983069742, + "grad_norm": 0.446856826543808, + "learning_rate": 4.245267972817189e-06, + "loss": 0.08533191680908203, + "step": 5770 + }, + { + "epoch": 0.8041524419981886, + "grad_norm": 0.6147788166999817, + "learning_rate": 4.239462493064525e-06, + "loss": 0.09339714050292969, + "step": 5771 + }, + { + "epoch": 0.804291785689403, + "grad_norm": 0.4642191529273987, + "learning_rate": 4.233660514943483e-06, + "loss": 0.08295822143554688, + "step": 5772 + }, + { + "epoch": 0.8044311293806173, + "grad_norm": 0.4270544946193695, + "learning_rate": 4.227862039743118e-06, + "loss": 0.09252691268920898, + "step": 5773 + }, + { + "epoch": 0.8045704730718317, + "grad_norm": 0.4490325152873993, + "learning_rate": 4.2220670687517275e-06, + "loss": 0.08411026000976562, + "step": 5774 + }, + { + "epoch": 0.8047098167630461, + "grad_norm": 0.3785026967525482, + "learning_rate": 4.216275603256814e-06, + "loss": 0.07239913940429688, + "step": 5775 + }, + { + "epoch": 0.8048491604542605, + "grad_norm": 0.4442542493343353, + "learning_rate": 4.210487644545112e-06, + "loss": 0.08469772338867188, + "step": 5776 + }, + { + "epoch": 0.8049885041454748, + "grad_norm": 0.5477977395057678, + "learning_rate": 4.204703193902582e-06, + "loss": 0.08867835998535156, + "step": 5777 + }, + { + "epoch": 0.8051278478366892, + "grad_norm": 0.4631172716617584, + "learning_rate": 4.198922252614388e-06, + "loss": 0.09975814819335938, + "step": 5778 + }, + { + "epoch": 0.8052671915279036, + "grad_norm": 0.3939501941204071, + "learning_rate": 4.193144821964918e-06, + "loss": 0.08218097686767578, + "step": 5779 + }, + { + "epoch": 0.805406535219118, + "grad_norm": 0.46420818567276, + "learning_rate": 4.1873709032377926e-06, + "loss": 0.08237075805664062, + "step": 5780 + }, + { + "epoch": 0.8055458789103324, + "grad_norm": 0.5728271007537842, + "learning_rate": 4.181600497715852e-06, + "loss": 0.08856582641601562, + "step": 5781 + }, + { + "epoch": 0.8056852226015467, + "grad_norm": 0.44257646799087524, + "learning_rate": 4.175833606681132e-06, + "loss": 0.09210014343261719, + "step": 5782 + }, + { + "epoch": 0.8058245662927611, + "grad_norm": 0.4503369927406311, + "learning_rate": 4.17007023141492e-06, + "loss": 0.08646202087402344, + "step": 5783 + }, + { + "epoch": 0.8059639099839755, + "grad_norm": 0.3548557758331299, + "learning_rate": 4.164310373197693e-06, + "loss": 0.08450126647949219, + "step": 5784 + }, + { + "epoch": 0.8061032536751899, + "grad_norm": 0.40811264514923096, + "learning_rate": 4.158554033309172e-06, + "loss": 0.08381175994873047, + "step": 5785 + }, + { + "epoch": 0.8062425973664042, + "grad_norm": 0.34150394797325134, + "learning_rate": 4.152801213028273e-06, + "loss": 0.07501983642578125, + "step": 5786 + }, + { + "epoch": 0.8063819410576186, + "grad_norm": 0.3336760699748993, + "learning_rate": 4.147051913633147e-06, + "loss": 0.07858085632324219, + "step": 5787 + }, + { + "epoch": 0.806521284748833, + "grad_norm": 0.6165931224822998, + "learning_rate": 4.1413061364011665e-06, + "loss": 0.0921487808227539, + "step": 5788 + }, + { + "epoch": 0.8066606284400474, + "grad_norm": 0.39254945516586304, + "learning_rate": 4.135563882608893e-06, + "loss": 0.07955360412597656, + "step": 5789 + }, + { + "epoch": 0.8067999721312618, + "grad_norm": 0.5412662625312805, + "learning_rate": 4.129825153532132e-06, + "loss": 0.07840347290039062, + "step": 5790 + }, + { + "epoch": 0.8069393158224761, + "grad_norm": 0.4754251539707184, + "learning_rate": 4.124089950445906e-06, + "loss": 0.08501148223876953, + "step": 5791 + }, + { + "epoch": 0.8070786595136905, + "grad_norm": 0.5443492531776428, + "learning_rate": 4.118358274624435e-06, + "loss": 0.09568977355957031, + "step": 5792 + }, + { + "epoch": 0.8072180032049049, + "grad_norm": 0.3544732332229614, + "learning_rate": 4.112630127341175e-06, + "loss": 0.06668996810913086, + "step": 5793 + }, + { + "epoch": 0.8073573468961193, + "grad_norm": 0.37045806646347046, + "learning_rate": 4.106905509868781e-06, + "loss": 0.08075809478759766, + "step": 5794 + }, + { + "epoch": 0.8074966905873336, + "grad_norm": 0.4959336221218109, + "learning_rate": 4.101184423479143e-06, + "loss": 0.07982826232910156, + "step": 5795 + }, + { + "epoch": 0.807636034278548, + "grad_norm": 0.48801594972610474, + "learning_rate": 4.0954668694433455e-06, + "loss": 0.08231544494628906, + "step": 5796 + }, + { + "epoch": 0.8077753779697624, + "grad_norm": 0.40695759654045105, + "learning_rate": 4.0897528490317025e-06, + "loss": 0.08162689208984375, + "step": 5797 + }, + { + "epoch": 0.8079147216609768, + "grad_norm": 0.28917038440704346, + "learning_rate": 4.084042363513745e-06, + "loss": 0.07293319702148438, + "step": 5798 + }, + { + "epoch": 0.8080540653521912, + "grad_norm": 0.6434286236763, + "learning_rate": 4.078335414158206e-06, + "loss": 0.09202766418457031, + "step": 5799 + }, + { + "epoch": 0.8081934090434055, + "grad_norm": 0.6085471510887146, + "learning_rate": 4.0726320022330345e-06, + "loss": 0.08406925201416016, + "step": 5800 + }, + { + "epoch": 0.8083327527346199, + "grad_norm": 0.38263455033302307, + "learning_rate": 4.066932129005403e-06, + "loss": 0.08362007141113281, + "step": 5801 + }, + { + "epoch": 0.8084720964258343, + "grad_norm": 0.48378312587738037, + "learning_rate": 4.061235795741702e-06, + "loss": 0.07299184799194336, + "step": 5802 + }, + { + "epoch": 0.8086114401170487, + "grad_norm": 0.5100583434104919, + "learning_rate": 4.055543003707514e-06, + "loss": 0.10513687133789062, + "step": 5803 + }, + { + "epoch": 0.808750783808263, + "grad_norm": 0.33301547169685364, + "learning_rate": 4.049853754167656e-06, + "loss": 0.06615924835205078, + "step": 5804 + }, + { + "epoch": 0.8088901274994774, + "grad_norm": 0.4020823836326599, + "learning_rate": 4.0441680483861415e-06, + "loss": 0.08934974670410156, + "step": 5805 + }, + { + "epoch": 0.8090294711906918, + "grad_norm": 0.3623424172401428, + "learning_rate": 4.038485887626214e-06, + "loss": 0.07421684265136719, + "step": 5806 + }, + { + "epoch": 0.8091688148819062, + "grad_norm": 0.8266277313232422, + "learning_rate": 4.032807273150308e-06, + "loss": 0.10510063171386719, + "step": 5807 + }, + { + "epoch": 0.8093081585731206, + "grad_norm": 0.6600053310394287, + "learning_rate": 4.02713220622009e-06, + "loss": 0.08871269226074219, + "step": 5808 + }, + { + "epoch": 0.8094475022643349, + "grad_norm": 0.6739139556884766, + "learning_rate": 4.021460688096435e-06, + "loss": 0.12927627563476562, + "step": 5809 + }, + { + "epoch": 0.8095868459555494, + "grad_norm": 0.6140069365501404, + "learning_rate": 4.015792720039418e-06, + "loss": 0.09095263481140137, + "step": 5810 + }, + { + "epoch": 0.8097261896467638, + "grad_norm": 0.5452148914337158, + "learning_rate": 4.010128303308327e-06, + "loss": 0.10063743591308594, + "step": 5811 + }, + { + "epoch": 0.8098655333379782, + "grad_norm": 0.5778965950012207, + "learning_rate": 4.004467439161672e-06, + "loss": 0.08234596252441406, + "step": 5812 + }, + { + "epoch": 0.8100048770291925, + "grad_norm": 0.881761372089386, + "learning_rate": 3.998810128857174e-06, + "loss": 0.09010887145996094, + "step": 5813 + }, + { + "epoch": 0.8101442207204069, + "grad_norm": 0.3751412630081177, + "learning_rate": 3.993156373651752e-06, + "loss": 0.07754707336425781, + "step": 5814 + }, + { + "epoch": 0.8102835644116213, + "grad_norm": 0.40961238741874695, + "learning_rate": 3.987506174801536e-06, + "loss": 0.07653999328613281, + "step": 5815 + }, + { + "epoch": 0.8104229081028357, + "grad_norm": 0.6074581146240234, + "learning_rate": 3.981859533561876e-06, + "loss": 0.09638214111328125, + "step": 5816 + }, + { + "epoch": 0.8105622517940501, + "grad_norm": 0.30127349495887756, + "learning_rate": 3.976216451187334e-06, + "loss": 0.07214927673339844, + "step": 5817 + }, + { + "epoch": 0.8107015954852644, + "grad_norm": 0.529371976852417, + "learning_rate": 3.97057692893166e-06, + "loss": 0.086883544921875, + "step": 5818 + }, + { + "epoch": 0.8108409391764788, + "grad_norm": 0.35729289054870605, + "learning_rate": 3.964940968047835e-06, + "loss": 0.07527542114257812, + "step": 5819 + }, + { + "epoch": 0.8109802828676932, + "grad_norm": 0.5451917052268982, + "learning_rate": 3.959308569788052e-06, + "loss": 0.08998298645019531, + "step": 5820 + }, + { + "epoch": 0.8111196265589076, + "grad_norm": 0.22824743390083313, + "learning_rate": 3.953679735403677e-06, + "loss": 0.06707191467285156, + "step": 5821 + }, + { + "epoch": 0.811258970250122, + "grad_norm": 0.29729539155960083, + "learning_rate": 3.948054466145324e-06, + "loss": 0.06891632080078125, + "step": 5822 + }, + { + "epoch": 0.8113983139413363, + "grad_norm": 0.4107012450695038, + "learning_rate": 3.942432763262794e-06, + "loss": 0.08848762512207031, + "step": 5823 + }, + { + "epoch": 0.8115376576325507, + "grad_norm": 0.5455485582351685, + "learning_rate": 3.9368146280051104e-06, + "loss": 0.08231472969055176, + "step": 5824 + }, + { + "epoch": 0.8116770013237651, + "grad_norm": 0.5207926630973816, + "learning_rate": 3.931200061620486e-06, + "loss": 0.09907150268554688, + "step": 5825 + }, + { + "epoch": 0.8118163450149795, + "grad_norm": 0.4055742919445038, + "learning_rate": 3.925589065356346e-06, + "loss": 0.09813976287841797, + "step": 5826 + }, + { + "epoch": 0.8119556887061938, + "grad_norm": 0.4752921462059021, + "learning_rate": 3.919981640459336e-06, + "loss": 0.08859825134277344, + "step": 5827 + }, + { + "epoch": 0.8120950323974082, + "grad_norm": 0.40570810437202454, + "learning_rate": 3.914377788175287e-06, + "loss": 0.085601806640625, + "step": 5828 + }, + { + "epoch": 0.8122343760886226, + "grad_norm": 0.5227399468421936, + "learning_rate": 3.908777509749255e-06, + "loss": 0.09682083129882812, + "step": 5829 + }, + { + "epoch": 0.812373719779837, + "grad_norm": 0.4436444044113159, + "learning_rate": 3.903180806425495e-06, + "loss": 0.08163261413574219, + "step": 5830 + }, + { + "epoch": 0.8125130634710513, + "grad_norm": 0.40744802355766296, + "learning_rate": 3.897587679447463e-06, + "loss": 0.07729911804199219, + "step": 5831 + }, + { + "epoch": 0.8126524071622657, + "grad_norm": 0.4271412789821625, + "learning_rate": 3.891998130057819e-06, + "loss": 0.07632827758789062, + "step": 5832 + }, + { + "epoch": 0.8127917508534801, + "grad_norm": 0.49299582839012146, + "learning_rate": 3.886412159498439e-06, + "loss": 0.08993721008300781, + "step": 5833 + }, + { + "epoch": 0.8129310945446945, + "grad_norm": 0.36860448122024536, + "learning_rate": 3.880829769010402e-06, + "loss": 0.07874774932861328, + "step": 5834 + }, + { + "epoch": 0.8130704382359089, + "grad_norm": 0.5998327136039734, + "learning_rate": 3.875250959833982e-06, + "loss": 0.09673595428466797, + "step": 5835 + }, + { + "epoch": 0.8132097819271232, + "grad_norm": 0.33926430344581604, + "learning_rate": 3.869675733208662e-06, + "loss": 0.06697273254394531, + "step": 5836 + }, + { + "epoch": 0.8133491256183376, + "grad_norm": 0.3797609806060791, + "learning_rate": 3.8641040903731335e-06, + "loss": 0.08599138259887695, + "step": 5837 + }, + { + "epoch": 0.813488469309552, + "grad_norm": 0.4011307954788208, + "learning_rate": 3.85853603256529e-06, + "loss": 0.08261680603027344, + "step": 5838 + }, + { + "epoch": 0.8136278130007664, + "grad_norm": 0.4490615427494049, + "learning_rate": 3.852971561022218e-06, + "loss": 0.08521652221679688, + "step": 5839 + }, + { + "epoch": 0.8137671566919807, + "grad_norm": 0.5292568206787109, + "learning_rate": 3.8474106769802255e-06, + "loss": 0.08574676513671875, + "step": 5840 + }, + { + "epoch": 0.8139065003831951, + "grad_norm": 0.4326326549053192, + "learning_rate": 3.841853381674814e-06, + "loss": 0.08141708374023438, + "step": 5841 + }, + { + "epoch": 0.8140458440744095, + "grad_norm": 0.4751594364643097, + "learning_rate": 3.836299676340684e-06, + "loss": 0.08203315734863281, + "step": 5842 + }, + { + "epoch": 0.8141851877656239, + "grad_norm": 0.4885973632335663, + "learning_rate": 3.83074956221174e-06, + "loss": 0.09226799011230469, + "step": 5843 + }, + { + "epoch": 0.8143245314568383, + "grad_norm": 0.5332130789756775, + "learning_rate": 3.825203040521091e-06, + "loss": 0.10114669799804688, + "step": 5844 + }, + { + "epoch": 0.8144638751480526, + "grad_norm": 0.5523147583007812, + "learning_rate": 3.819660112501053e-06, + "loss": 0.08977258205413818, + "step": 5845 + }, + { + "epoch": 0.814603218839267, + "grad_norm": 0.5492939352989197, + "learning_rate": 3.814120779383137e-06, + "loss": 0.09902572631835938, + "step": 5846 + }, + { + "epoch": 0.8147425625304814, + "grad_norm": 0.5327862501144409, + "learning_rate": 3.8085850423980475e-06, + "loss": 0.09153366088867188, + "step": 5847 + }, + { + "epoch": 0.8148819062216958, + "grad_norm": 0.8211087584495544, + "learning_rate": 3.8030529027757057e-06, + "loss": 0.09057807922363281, + "step": 5848 + }, + { + "epoch": 0.8150212499129101, + "grad_norm": 0.5961267948150635, + "learning_rate": 3.797524361745231e-06, + "loss": 0.10108613967895508, + "step": 5849 + }, + { + "epoch": 0.8151605936041246, + "grad_norm": 0.5317868590354919, + "learning_rate": 3.7919994205349287e-06, + "loss": 0.10469865798950195, + "step": 5850 + }, + { + "epoch": 0.815299937295339, + "grad_norm": 0.4638480544090271, + "learning_rate": 3.7864780803723267e-06, + "loss": 0.08966827392578125, + "step": 5851 + }, + { + "epoch": 0.8154392809865534, + "grad_norm": 0.5035756230354309, + "learning_rate": 3.7809603424841346e-06, + "loss": 0.088897705078125, + "step": 5852 + }, + { + "epoch": 0.8155786246777678, + "grad_norm": 0.4524787664413452, + "learning_rate": 3.7754462080962604e-06, + "loss": 0.08759307861328125, + "step": 5853 + }, + { + "epoch": 0.8157179683689821, + "grad_norm": 0.35147038102149963, + "learning_rate": 3.769935678433827e-06, + "loss": 0.07490253448486328, + "step": 5854 + }, + { + "epoch": 0.8158573120601965, + "grad_norm": 0.48902904987335205, + "learning_rate": 3.7644287547211476e-06, + "loss": 0.08611011505126953, + "step": 5855 + }, + { + "epoch": 0.8159966557514109, + "grad_norm": 0.5060574412345886, + "learning_rate": 3.7589254381817397e-06, + "loss": 0.08285999298095703, + "step": 5856 + }, + { + "epoch": 0.8161359994426253, + "grad_norm": 0.6399025917053223, + "learning_rate": 3.753425730038307e-06, + "loss": 0.0977621078491211, + "step": 5857 + }, + { + "epoch": 0.8162753431338396, + "grad_norm": 0.4353226125240326, + "learning_rate": 3.7479296315127588e-06, + "loss": 0.082611083984375, + "step": 5858 + }, + { + "epoch": 0.816414686825054, + "grad_norm": 0.6677847504615784, + "learning_rate": 3.7424371438262096e-06, + "loss": 0.08802604675292969, + "step": 5859 + }, + { + "epoch": 0.8165540305162684, + "grad_norm": 0.5562253594398499, + "learning_rate": 3.7369482681989565e-06, + "loss": 0.07745552062988281, + "step": 5860 + }, + { + "epoch": 0.8166933742074828, + "grad_norm": 0.688869297504425, + "learning_rate": 3.7314630058505063e-06, + "loss": 0.09396171569824219, + "step": 5861 + }, + { + "epoch": 0.8168327178986972, + "grad_norm": 0.3601404130458832, + "learning_rate": 3.725981357999562e-06, + "loss": 0.07665729522705078, + "step": 5862 + }, + { + "epoch": 0.8169720615899115, + "grad_norm": 0.3831263780593872, + "learning_rate": 3.72050332586402e-06, + "loss": 0.08382797241210938, + "step": 5863 + }, + { + "epoch": 0.8171114052811259, + "grad_norm": 0.5050780177116394, + "learning_rate": 3.715028910660967e-06, + "loss": 0.09756851196289062, + "step": 5864 + }, + { + "epoch": 0.8172507489723403, + "grad_norm": 0.34086036682128906, + "learning_rate": 3.709558113606697e-06, + "loss": 0.07021045684814453, + "step": 5865 + }, + { + "epoch": 0.8173900926635547, + "grad_norm": 0.488167941570282, + "learning_rate": 3.704090935916702e-06, + "loss": 0.09235715866088867, + "step": 5866 + }, + { + "epoch": 0.817529436354769, + "grad_norm": 0.49987974762916565, + "learning_rate": 3.6986273788056592e-06, + "loss": 0.08983135223388672, + "step": 5867 + }, + { + "epoch": 0.8176687800459834, + "grad_norm": 0.35127729177474976, + "learning_rate": 3.6931674434874397e-06, + "loss": 0.07606220245361328, + "step": 5868 + }, + { + "epoch": 0.8178081237371978, + "grad_norm": 0.46522533893585205, + "learning_rate": 3.6877111311751246e-06, + "loss": 0.08575439453125, + "step": 5869 + }, + { + "epoch": 0.8179474674284122, + "grad_norm": 0.3935365080833435, + "learning_rate": 3.682258443080986e-06, + "loss": 0.08019065856933594, + "step": 5870 + }, + { + "epoch": 0.8180868111196266, + "grad_norm": 0.448045551776886, + "learning_rate": 3.676809380416475e-06, + "loss": 0.08939743041992188, + "step": 5871 + }, + { + "epoch": 0.8182261548108409, + "grad_norm": 0.42901530861854553, + "learning_rate": 3.671363944392259e-06, + "loss": 0.08589088916778564, + "step": 5872 + }, + { + "epoch": 0.8183654985020553, + "grad_norm": 0.30918946862220764, + "learning_rate": 3.6659221362181827e-06, + "loss": 0.07700061798095703, + "step": 5873 + }, + { + "epoch": 0.8185048421932697, + "grad_norm": 0.5849636793136597, + "learning_rate": 3.660483957103298e-06, + "loss": 0.09083366394042969, + "step": 5874 + }, + { + "epoch": 0.8186441858844841, + "grad_norm": 0.45966729521751404, + "learning_rate": 3.655049408255835e-06, + "loss": 0.10453414916992188, + "step": 5875 + }, + { + "epoch": 0.8187835295756984, + "grad_norm": 0.515082061290741, + "learning_rate": 3.649618490883233e-06, + "loss": 0.08757781982421875, + "step": 5876 + }, + { + "epoch": 0.8189228732669128, + "grad_norm": 0.49627628922462463, + "learning_rate": 3.6441912061921205e-06, + "loss": 0.09876441955566406, + "step": 5877 + }, + { + "epoch": 0.8190622169581272, + "grad_norm": 0.3639180660247803, + "learning_rate": 3.638767555388314e-06, + "loss": 0.07647895812988281, + "step": 5878 + }, + { + "epoch": 0.8192015606493416, + "grad_norm": 0.5664830803871155, + "learning_rate": 3.6333475396768168e-06, + "loss": 0.08533096313476562, + "step": 5879 + }, + { + "epoch": 0.819340904340556, + "grad_norm": 0.49109628796577454, + "learning_rate": 3.6279311602618416e-06, + "loss": 0.10094451904296875, + "step": 5880 + }, + { + "epoch": 0.8194802480317703, + "grad_norm": 0.4223637580871582, + "learning_rate": 3.6225184183467856e-06, + "loss": 0.08112907409667969, + "step": 5881 + }, + { + "epoch": 0.8196195917229847, + "grad_norm": 0.3496672213077545, + "learning_rate": 3.6171093151342264e-06, + "loss": 0.0843496322631836, + "step": 5882 + }, + { + "epoch": 0.8197589354141991, + "grad_norm": 0.4367719888687134, + "learning_rate": 3.611703851825956e-06, + "loss": 0.07679271697998047, + "step": 5883 + }, + { + "epoch": 0.8198982791054135, + "grad_norm": 0.46120238304138184, + "learning_rate": 3.6063020296229344e-06, + "loss": 0.09718787670135498, + "step": 5884 + }, + { + "epoch": 0.8200376227966278, + "grad_norm": 0.8470661044120789, + "learning_rate": 3.60090384972533e-06, + "loss": 0.09649085998535156, + "step": 5885 + }, + { + "epoch": 0.8201769664878422, + "grad_norm": 0.2630860507488251, + "learning_rate": 3.595509313332488e-06, + "loss": 0.0696573257446289, + "step": 5886 + }, + { + "epoch": 0.8203163101790566, + "grad_norm": 0.5845994353294373, + "learning_rate": 3.5901184216429585e-06, + "loss": 0.10410308837890625, + "step": 5887 + }, + { + "epoch": 0.820455653870271, + "grad_norm": 0.40897873044013977, + "learning_rate": 3.584731175854479e-06, + "loss": 0.0687255859375, + "step": 5888 + }, + { + "epoch": 0.8205949975614854, + "grad_norm": 0.33347436785697937, + "learning_rate": 3.5793475771639562e-06, + "loss": 0.07927131652832031, + "step": 5889 + }, + { + "epoch": 0.8207343412526998, + "grad_norm": 0.3757036626338959, + "learning_rate": 3.5739676267675115e-06, + "loss": 0.08461570739746094, + "step": 5890 + }, + { + "epoch": 0.8208736849439142, + "grad_norm": 0.6109104156494141, + "learning_rate": 3.568591325860453e-06, + "loss": 0.08914947509765625, + "step": 5891 + }, + { + "epoch": 0.8210130286351286, + "grad_norm": 0.5242673754692078, + "learning_rate": 3.563218675637261e-06, + "loss": 0.09836769104003906, + "step": 5892 + }, + { + "epoch": 0.821152372326343, + "grad_norm": 0.4417755603790283, + "learning_rate": 3.5578496772916205e-06, + "loss": 0.1034231185913086, + "step": 5893 + }, + { + "epoch": 0.8212917160175573, + "grad_norm": 0.41944971680641174, + "learning_rate": 3.552484332016408e-06, + "loss": 0.08065986633300781, + "step": 5894 + }, + { + "epoch": 0.8214310597087717, + "grad_norm": 0.5294489860534668, + "learning_rate": 3.547122641003671e-06, + "loss": 0.0964202880859375, + "step": 5895 + }, + { + "epoch": 0.8215704033999861, + "grad_norm": 0.7673649191856384, + "learning_rate": 3.5417646054446554e-06, + "loss": 0.10668754577636719, + "step": 5896 + }, + { + "epoch": 0.8217097470912005, + "grad_norm": 0.5230942964553833, + "learning_rate": 3.536410226529794e-06, + "loss": 0.08182144165039062, + "step": 5897 + }, + { + "epoch": 0.8218490907824149, + "grad_norm": 0.3899085819721222, + "learning_rate": 3.5310595054487173e-06, + "loss": 0.0662832260131836, + "step": 5898 + }, + { + "epoch": 0.8219884344736292, + "grad_norm": 0.45950502157211304, + "learning_rate": 3.525712443390226e-06, + "loss": 0.08721160888671875, + "step": 5899 + }, + { + "epoch": 0.8221277781648436, + "grad_norm": 0.516615092754364, + "learning_rate": 3.5203690415423086e-06, + "loss": 0.08637237548828125, + "step": 5900 + }, + { + "epoch": 0.822267121856058, + "grad_norm": 0.3813399076461792, + "learning_rate": 3.5150293010921543e-06, + "loss": 0.08820915222167969, + "step": 5901 + }, + { + "epoch": 0.8224064655472724, + "grad_norm": 0.6455485820770264, + "learning_rate": 3.5096932232261384e-06, + "loss": 0.09549140930175781, + "step": 5902 + }, + { + "epoch": 0.8225458092384867, + "grad_norm": 0.6456183791160583, + "learning_rate": 3.504360809129801e-06, + "loss": 0.07840728759765625, + "step": 5903 + }, + { + "epoch": 0.8226851529297011, + "grad_norm": 0.4570358395576477, + "learning_rate": 3.4990320599878948e-06, + "loss": 0.09734725952148438, + "step": 5904 + }, + { + "epoch": 0.8228244966209155, + "grad_norm": 0.6742041110992432, + "learning_rate": 3.493706976984337e-06, + "loss": 0.08670508861541748, + "step": 5905 + }, + { + "epoch": 0.8229638403121299, + "grad_norm": 0.8149358034133911, + "learning_rate": 3.4883855613022476e-06, + "loss": 0.12465858459472656, + "step": 5906 + }, + { + "epoch": 0.8231031840033443, + "grad_norm": 0.6061658263206482, + "learning_rate": 3.483067814123917e-06, + "loss": 0.0886383056640625, + "step": 5907 + }, + { + "epoch": 0.8232425276945586, + "grad_norm": 0.41177693009376526, + "learning_rate": 3.477753736630829e-06, + "loss": 0.08495140075683594, + "step": 5908 + }, + { + "epoch": 0.823381871385773, + "grad_norm": 0.4123777747154236, + "learning_rate": 3.4724433300036565e-06, + "loss": 0.07734870910644531, + "step": 5909 + }, + { + "epoch": 0.8235212150769874, + "grad_norm": 0.42657747864723206, + "learning_rate": 3.467136595422247e-06, + "loss": 0.08509254455566406, + "step": 5910 + }, + { + "epoch": 0.8236605587682018, + "grad_norm": 0.6895407438278198, + "learning_rate": 3.4618335340656263e-06, + "loss": 0.09589719772338867, + "step": 5911 + }, + { + "epoch": 0.8237999024594161, + "grad_norm": 0.31703630089759827, + "learning_rate": 3.456534147112023e-06, + "loss": 0.06620675325393677, + "step": 5912 + }, + { + "epoch": 0.8239392461506305, + "grad_norm": 0.5417622327804565, + "learning_rate": 3.451238435738844e-06, + "loss": 0.08562374114990234, + "step": 5913 + }, + { + "epoch": 0.8240785898418449, + "grad_norm": 0.45793724060058594, + "learning_rate": 3.445946401122666e-06, + "loss": 0.09283065795898438, + "step": 5914 + }, + { + "epoch": 0.8242179335330593, + "grad_norm": 1.0299822092056274, + "learning_rate": 3.4406580444392647e-06, + "loss": 0.11438369750976562, + "step": 5915 + }, + { + "epoch": 0.8243572772242737, + "grad_norm": 0.568740725517273, + "learning_rate": 3.435373366863586e-06, + "loss": 0.10115432739257812, + "step": 5916 + }, + { + "epoch": 0.824496620915488, + "grad_norm": 0.4646168351173401, + "learning_rate": 3.430092369569773e-06, + "loss": 0.08695030212402344, + "step": 5917 + }, + { + "epoch": 0.8246359646067024, + "grad_norm": 0.7585343718528748, + "learning_rate": 3.4248150537311344e-06, + "loss": 0.10900306701660156, + "step": 5918 + }, + { + "epoch": 0.8247753082979168, + "grad_norm": 0.5057116150856018, + "learning_rate": 3.4195414205201718e-06, + "loss": 0.09801673889160156, + "step": 5919 + }, + { + "epoch": 0.8249146519891312, + "grad_norm": 0.5349431037902832, + "learning_rate": 3.4142714711085765e-06, + "loss": 0.08615326881408691, + "step": 5920 + }, + { + "epoch": 0.8250539956803455, + "grad_norm": 0.49028462171554565, + "learning_rate": 3.409005206667193e-06, + "loss": 0.07734489440917969, + "step": 5921 + }, + { + "epoch": 0.8251933393715599, + "grad_norm": 0.46526023745536804, + "learning_rate": 3.4037426283660734e-06, + "loss": 0.08983135223388672, + "step": 5922 + }, + { + "epoch": 0.8253326830627743, + "grad_norm": 0.562627911567688, + "learning_rate": 3.3984837373744406e-06, + "loss": 0.07441425323486328, + "step": 5923 + }, + { + "epoch": 0.8254720267539887, + "grad_norm": 0.6608875393867493, + "learning_rate": 3.3932285348607108e-06, + "loss": 0.08731603622436523, + "step": 5924 + }, + { + "epoch": 0.825611370445203, + "grad_norm": 0.5862588882446289, + "learning_rate": 3.387977021992459e-06, + "loss": 0.09076118469238281, + "step": 5925 + }, + { + "epoch": 0.8257507141364174, + "grad_norm": 0.5300047993659973, + "learning_rate": 3.38272919993645e-06, + "loss": 0.08691740036010742, + "step": 5926 + }, + { + "epoch": 0.8258900578276318, + "grad_norm": 0.6163104176521301, + "learning_rate": 3.377485069858639e-06, + "loss": 0.09760475158691406, + "step": 5927 + }, + { + "epoch": 0.8260294015188462, + "grad_norm": 0.3881622850894928, + "learning_rate": 3.372244632924142e-06, + "loss": 0.07861137390136719, + "step": 5928 + }, + { + "epoch": 0.8261687452100606, + "grad_norm": 1.0702879428863525, + "learning_rate": 3.3670078902972693e-06, + "loss": 0.1144247055053711, + "step": 5929 + }, + { + "epoch": 0.8263080889012749, + "grad_norm": 0.38450732827186584, + "learning_rate": 3.361774843141512e-06, + "loss": 0.08362960815429688, + "step": 5930 + }, + { + "epoch": 0.8264474325924894, + "grad_norm": 0.658143937587738, + "learning_rate": 3.3565454926195252e-06, + "loss": 0.08719301223754883, + "step": 5931 + }, + { + "epoch": 0.8265867762837038, + "grad_norm": 0.38871052861213684, + "learning_rate": 3.35131983989315e-06, + "loss": 0.07458305358886719, + "step": 5932 + }, + { + "epoch": 0.8267261199749182, + "grad_norm": 0.5647913813591003, + "learning_rate": 3.3460978861234095e-06, + "loss": 0.0999298095703125, + "step": 5933 + }, + { + "epoch": 0.8268654636661326, + "grad_norm": 0.35336533188819885, + "learning_rate": 3.3408796324705085e-06, + "loss": 0.06287145614624023, + "step": 5934 + }, + { + "epoch": 0.8270048073573469, + "grad_norm": 0.4523754417896271, + "learning_rate": 3.335665080093815e-06, + "loss": 0.10455894470214844, + "step": 5935 + }, + { + "epoch": 0.8271441510485613, + "grad_norm": 0.40182051062583923, + "learning_rate": 3.3304542301518915e-06, + "loss": 0.08408355712890625, + "step": 5936 + }, + { + "epoch": 0.8272834947397757, + "grad_norm": 0.4178657829761505, + "learning_rate": 3.325247083802463e-06, + "loss": 0.07397079467773438, + "step": 5937 + }, + { + "epoch": 0.8274228384309901, + "grad_norm": 0.40952590107917786, + "learning_rate": 3.320043642202444e-06, + "loss": 0.07666683197021484, + "step": 5938 + }, + { + "epoch": 0.8275621821222044, + "grad_norm": 0.4857163429260254, + "learning_rate": 3.3148439065079142e-06, + "loss": 0.08585357666015625, + "step": 5939 + }, + { + "epoch": 0.8277015258134188, + "grad_norm": 0.47247380018234253, + "learning_rate": 3.309647877874138e-06, + "loss": 0.08048820495605469, + "step": 5940 + }, + { + "epoch": 0.8278408695046332, + "grad_norm": 0.9093536734580994, + "learning_rate": 3.304455557455564e-06, + "loss": 0.10306930541992188, + "step": 5941 + }, + { + "epoch": 0.8279802131958476, + "grad_norm": 0.4814162850379944, + "learning_rate": 3.299266946405797e-06, + "loss": 0.0774374008178711, + "step": 5942 + }, + { + "epoch": 0.828119556887062, + "grad_norm": 0.36232271790504456, + "learning_rate": 3.294082045877627e-06, + "loss": 0.08179473876953125, + "step": 5943 + }, + { + "epoch": 0.8282589005782763, + "grad_norm": 0.5208905339241028, + "learning_rate": 3.2889008570230228e-06, + "loss": 0.09419441223144531, + "step": 5944 + }, + { + "epoch": 0.8283982442694907, + "grad_norm": 0.39492279291152954, + "learning_rate": 3.2837233809931314e-06, + "loss": 0.07908058166503906, + "step": 5945 + }, + { + "epoch": 0.8285375879607051, + "grad_norm": 0.4313192367553711, + "learning_rate": 3.278549618938267e-06, + "loss": 0.08601093292236328, + "step": 5946 + }, + { + "epoch": 0.8286769316519195, + "grad_norm": 0.4257965385913849, + "learning_rate": 3.2733795720079133e-06, + "loss": 0.08858489990234375, + "step": 5947 + }, + { + "epoch": 0.8288162753431338, + "grad_norm": 0.3546040654182434, + "learning_rate": 3.268213241350746e-06, + "loss": 0.08228683471679688, + "step": 5948 + }, + { + "epoch": 0.8289556190343482, + "grad_norm": 0.40592458844184875, + "learning_rate": 3.263050628114606e-06, + "loss": 0.08298587799072266, + "step": 5949 + }, + { + "epoch": 0.8290949627255626, + "grad_norm": 0.768074631690979, + "learning_rate": 3.2578917334465034e-06, + "loss": 0.08834075927734375, + "step": 5950 + }, + { + "epoch": 0.829234306416777, + "grad_norm": 0.48771265149116516, + "learning_rate": 3.2527365584926264e-06, + "loss": 0.09134101867675781, + "step": 5951 + }, + { + "epoch": 0.8293736501079914, + "grad_norm": 0.5219302177429199, + "learning_rate": 3.2475851043983496e-06, + "loss": 0.09159469604492188, + "step": 5952 + }, + { + "epoch": 0.8295129937992057, + "grad_norm": 0.9504607915878296, + "learning_rate": 3.2424373723081892e-06, + "loss": 0.1055145263671875, + "step": 5953 + }, + { + "epoch": 0.8296523374904201, + "grad_norm": 0.33042415976524353, + "learning_rate": 3.2372933633658633e-06, + "loss": 0.0720815658569336, + "step": 5954 + }, + { + "epoch": 0.8297916811816345, + "grad_norm": 0.3537304997444153, + "learning_rate": 3.2321530787142508e-06, + "loss": 0.07662487030029297, + "step": 5955 + }, + { + "epoch": 0.8299310248728489, + "grad_norm": 0.36762019991874695, + "learning_rate": 3.227016519495414e-06, + "loss": 0.06995320320129395, + "step": 5956 + }, + { + "epoch": 0.8300703685640632, + "grad_norm": 0.49264606833457947, + "learning_rate": 3.221883686850573e-06, + "loss": 0.09782028198242188, + "step": 5957 + }, + { + "epoch": 0.8302097122552776, + "grad_norm": 0.3600722551345825, + "learning_rate": 3.2167545819201227e-06, + "loss": 0.08136415481567383, + "step": 5958 + }, + { + "epoch": 0.830349055946492, + "grad_norm": 0.4615797698497772, + "learning_rate": 3.2116292058436383e-06, + "loss": 0.10813140869140625, + "step": 5959 + }, + { + "epoch": 0.8304883996377064, + "grad_norm": 0.5631641745567322, + "learning_rate": 3.2065075597598573e-06, + "loss": 0.09917640686035156, + "step": 5960 + }, + { + "epoch": 0.8306277433289208, + "grad_norm": 0.43149465322494507, + "learning_rate": 3.201389644806692e-06, + "loss": 0.0753011703491211, + "step": 5961 + }, + { + "epoch": 0.8307670870201351, + "grad_norm": 0.3988392949104309, + "learning_rate": 3.1962754621212345e-06, + "loss": 0.06791973114013672, + "step": 5962 + }, + { + "epoch": 0.8309064307113495, + "grad_norm": 0.6473017334938049, + "learning_rate": 3.1911650128397342e-06, + "loss": 0.08786201477050781, + "step": 5963 + }, + { + "epoch": 0.8310457744025639, + "grad_norm": 0.5664608478546143, + "learning_rate": 3.1860582980976117e-06, + "loss": 0.08923530578613281, + "step": 5964 + }, + { + "epoch": 0.8311851180937783, + "grad_norm": 0.4596116244792938, + "learning_rate": 3.180955319029464e-06, + "loss": 0.08684921264648438, + "step": 5965 + }, + { + "epoch": 0.8313244617849926, + "grad_norm": 0.3243304193019867, + "learning_rate": 3.175856076769066e-06, + "loss": 0.06894683837890625, + "step": 5966 + }, + { + "epoch": 0.831463805476207, + "grad_norm": 0.7892335057258606, + "learning_rate": 3.170760572449345e-06, + "loss": 0.11102104187011719, + "step": 5967 + }, + { + "epoch": 0.8316031491674214, + "grad_norm": 0.45918866991996765, + "learning_rate": 3.1656688072024024e-06, + "loss": 0.0932149887084961, + "step": 5968 + }, + { + "epoch": 0.8317424928586358, + "grad_norm": 0.500602126121521, + "learning_rate": 3.160580782159517e-06, + "loss": 0.0908193588256836, + "step": 5969 + }, + { + "epoch": 0.8318818365498502, + "grad_norm": 0.47510308027267456, + "learning_rate": 3.155496498451136e-06, + "loss": 0.07393741607666016, + "step": 5970 + }, + { + "epoch": 0.8320211802410646, + "grad_norm": 0.4622367024421692, + "learning_rate": 3.1504159572068604e-06, + "loss": 0.0855855941772461, + "step": 5971 + }, + { + "epoch": 0.832160523932279, + "grad_norm": 0.9538397192955017, + "learning_rate": 3.1453391595554783e-06, + "loss": 0.13312530517578125, + "step": 5972 + }, + { + "epoch": 0.8322998676234934, + "grad_norm": 0.6269262433052063, + "learning_rate": 3.140266106624941e-06, + "loss": 0.0987691879272461, + "step": 5973 + }, + { + "epoch": 0.8324392113147078, + "grad_norm": 0.2883225083351135, + "learning_rate": 3.1351967995423594e-06, + "loss": 0.0736236572265625, + "step": 5974 + }, + { + "epoch": 0.8325785550059221, + "grad_norm": 0.6756308674812317, + "learning_rate": 3.1301312394340157e-06, + "loss": 0.11436843872070312, + "step": 5975 + }, + { + "epoch": 0.8327178986971365, + "grad_norm": 0.6296929121017456, + "learning_rate": 3.1250694274253623e-06, + "loss": 0.09361076354980469, + "step": 5976 + }, + { + "epoch": 0.8328572423883509, + "grad_norm": 0.5656833648681641, + "learning_rate": 3.120011364641029e-06, + "loss": 0.10709381103515625, + "step": 5977 + }, + { + "epoch": 0.8329965860795653, + "grad_norm": 0.6407100558280945, + "learning_rate": 3.114957052204792e-06, + "loss": 0.08644866943359375, + "step": 5978 + }, + { + "epoch": 0.8331359297707797, + "grad_norm": 0.4717958867549896, + "learning_rate": 3.1099064912396002e-06, + "loss": 0.095794677734375, + "step": 5979 + }, + { + "epoch": 0.833275273461994, + "grad_norm": 0.5858399868011475, + "learning_rate": 3.1048596828675805e-06, + "loss": 0.0793619155883789, + "step": 5980 + }, + { + "epoch": 0.8334146171532084, + "grad_norm": 0.5985553860664368, + "learning_rate": 3.0998166282100215e-06, + "loss": 0.08995246887207031, + "step": 5981 + }, + { + "epoch": 0.8335539608444228, + "grad_norm": 0.42458266019821167, + "learning_rate": 3.0947773283873638e-06, + "loss": 0.07137489318847656, + "step": 5982 + }, + { + "epoch": 0.8336933045356372, + "grad_norm": 0.4150932729244232, + "learning_rate": 3.0897417845192356e-06, + "loss": 0.08608818054199219, + "step": 5983 + }, + { + "epoch": 0.8338326482268515, + "grad_norm": 0.31773069500923157, + "learning_rate": 3.084709997724411e-06, + "loss": 0.07187080383300781, + "step": 5984 + }, + { + "epoch": 0.8339719919180659, + "grad_norm": 0.5215685963630676, + "learning_rate": 3.079681969120849e-06, + "loss": 0.09665393829345703, + "step": 5985 + }, + { + "epoch": 0.8341113356092803, + "grad_norm": 0.5456759929656982, + "learning_rate": 3.0746576998256495e-06, + "loss": 0.08801651000976562, + "step": 5986 + }, + { + "epoch": 0.8342506793004947, + "grad_norm": 0.39879152178764343, + "learning_rate": 3.0696371909550947e-06, + "loss": 0.0805974006652832, + "step": 5987 + }, + { + "epoch": 0.8343900229917091, + "grad_norm": 0.4411539137363434, + "learning_rate": 3.0646204436246372e-06, + "loss": 0.09842586517333984, + "step": 5988 + }, + { + "epoch": 0.8345293666829234, + "grad_norm": 0.43748801946640015, + "learning_rate": 3.0596074589488743e-06, + "loss": 0.08090496063232422, + "step": 5989 + }, + { + "epoch": 0.8346687103741378, + "grad_norm": 0.4746513366699219, + "learning_rate": 3.054598238041575e-06, + "loss": 0.08180809020996094, + "step": 5990 + }, + { + "epoch": 0.8348080540653522, + "grad_norm": 0.5895968079566956, + "learning_rate": 3.04959278201568e-06, + "loss": 0.09442329406738281, + "step": 5991 + }, + { + "epoch": 0.8349473977565666, + "grad_norm": 0.430332750082016, + "learning_rate": 3.044591091983282e-06, + "loss": 0.07255744934082031, + "step": 5992 + }, + { + "epoch": 0.835086741447781, + "grad_norm": 0.401567280292511, + "learning_rate": 3.0395931690556435e-06, + "loss": 0.08411026000976562, + "step": 5993 + }, + { + "epoch": 0.8352260851389953, + "grad_norm": 0.5682185292243958, + "learning_rate": 3.0345990143431938e-06, + "loss": 0.08762836456298828, + "step": 5994 + }, + { + "epoch": 0.8353654288302097, + "grad_norm": 0.4266284704208374, + "learning_rate": 3.029608628955518e-06, + "loss": 0.08845329284667969, + "step": 5995 + }, + { + "epoch": 0.8355047725214241, + "grad_norm": 0.3852643370628357, + "learning_rate": 3.0246220140013593e-06, + "loss": 0.0713348388671875, + "step": 5996 + }, + { + "epoch": 0.8356441162126385, + "grad_norm": 0.6894209384918213, + "learning_rate": 3.019639170588633e-06, + "loss": 0.08593463897705078, + "step": 5997 + }, + { + "epoch": 0.8357834599038528, + "grad_norm": 0.5010825991630554, + "learning_rate": 3.0146600998244226e-06, + "loss": 0.07788705825805664, + "step": 5998 + }, + { + "epoch": 0.8359228035950672, + "grad_norm": 0.5979059934616089, + "learning_rate": 3.009684802814954e-06, + "loss": 0.10147857666015625, + "step": 5999 + }, + { + "epoch": 0.8360621472862816, + "grad_norm": 0.5561180710792542, + "learning_rate": 3.004713280665621e-06, + "loss": 0.09587287902832031, + "step": 6000 + }, + { + "epoch": 0.836201490977496, + "grad_norm": 0.4883810579776764, + "learning_rate": 2.999745534480989e-06, + "loss": 0.07012748718261719, + "step": 6001 + }, + { + "epoch": 0.8363408346687103, + "grad_norm": 0.3833581507205963, + "learning_rate": 2.9947815653647816e-06, + "loss": 0.07900142669677734, + "step": 6002 + }, + { + "epoch": 0.8364801783599247, + "grad_norm": 0.599585771560669, + "learning_rate": 2.98982137441987e-06, + "loss": 0.09912109375, + "step": 6003 + }, + { + "epoch": 0.8366195220511391, + "grad_norm": 0.3759322166442871, + "learning_rate": 2.9848649627483063e-06, + "loss": 0.08031272888183594, + "step": 6004 + }, + { + "epoch": 0.8367588657423535, + "grad_norm": 0.7706219553947449, + "learning_rate": 2.97991233145128e-06, + "loss": 0.11397933959960938, + "step": 6005 + }, + { + "epoch": 0.8368982094335679, + "grad_norm": 0.46661460399627686, + "learning_rate": 2.974963481629163e-06, + "loss": 0.08516502380371094, + "step": 6006 + }, + { + "epoch": 0.8370375531247822, + "grad_norm": 0.6683273315429688, + "learning_rate": 2.970018414381466e-06, + "loss": 0.09126949310302734, + "step": 6007 + }, + { + "epoch": 0.8371768968159966, + "grad_norm": 0.5166552662849426, + "learning_rate": 2.9650771308068773e-06, + "loss": 0.08934259414672852, + "step": 6008 + }, + { + "epoch": 0.837316240507211, + "grad_norm": 0.4562753438949585, + "learning_rate": 2.960139632003243e-06, + "loss": 0.08925247192382812, + "step": 6009 + }, + { + "epoch": 0.8374555841984254, + "grad_norm": 0.3380433917045593, + "learning_rate": 2.955205919067552e-06, + "loss": 0.07094192504882812, + "step": 6010 + }, + { + "epoch": 0.8375949278896399, + "grad_norm": 0.39143574237823486, + "learning_rate": 2.950275993095966e-06, + "loss": 0.08753776550292969, + "step": 6011 + }, + { + "epoch": 0.8377342715808542, + "grad_norm": 0.42327871918678284, + "learning_rate": 2.945349855183799e-06, + "loss": 0.08401679992675781, + "step": 6012 + }, + { + "epoch": 0.8378736152720686, + "grad_norm": 0.5252458453178406, + "learning_rate": 2.940427506425536e-06, + "loss": 0.08413410186767578, + "step": 6013 + }, + { + "epoch": 0.838012958963283, + "grad_norm": 0.3666932284832001, + "learning_rate": 2.9355089479148003e-06, + "loss": 0.08011531829833984, + "step": 6014 + }, + { + "epoch": 0.8381523026544974, + "grad_norm": 0.47995129227638245, + "learning_rate": 2.9305941807443903e-06, + "loss": 0.0848093032836914, + "step": 6015 + }, + { + "epoch": 0.8382916463457117, + "grad_norm": 0.512650728225708, + "learning_rate": 2.925683206006249e-06, + "loss": 0.08888816833496094, + "step": 6016 + }, + { + "epoch": 0.8384309900369261, + "grad_norm": 0.7961052656173706, + "learning_rate": 2.9207760247914895e-06, + "loss": 0.1107635498046875, + "step": 6017 + }, + { + "epoch": 0.8385703337281405, + "grad_norm": 0.31499141454696655, + "learning_rate": 2.915872638190369e-06, + "loss": 0.07097625732421875, + "step": 6018 + }, + { + "epoch": 0.8387096774193549, + "grad_norm": 0.6147186160087585, + "learning_rate": 2.9109730472923093e-06, + "loss": 0.09524822235107422, + "step": 6019 + }, + { + "epoch": 0.8388490211105692, + "grad_norm": 0.41703835129737854, + "learning_rate": 2.9060772531858996e-06, + "loss": 0.09167671203613281, + "step": 6020 + }, + { + "epoch": 0.8389883648017836, + "grad_norm": 0.6653115153312683, + "learning_rate": 2.9011852569588537e-06, + "loss": 0.08867263793945312, + "step": 6021 + }, + { + "epoch": 0.839127708492998, + "grad_norm": 0.4983729124069214, + "learning_rate": 2.896297059698072e-06, + "loss": 0.09094047546386719, + "step": 6022 + }, + { + "epoch": 0.8392670521842124, + "grad_norm": 0.7209923267364502, + "learning_rate": 2.891412662489599e-06, + "loss": 0.11021995544433594, + "step": 6023 + }, + { + "epoch": 0.8394063958754268, + "grad_norm": 0.4218944311141968, + "learning_rate": 2.8865320664186412e-06, + "loss": 0.09153461456298828, + "step": 6024 + }, + { + "epoch": 0.8395457395666411, + "grad_norm": 0.42556217312812805, + "learning_rate": 2.8816552725695524e-06, + "loss": 0.06873035430908203, + "step": 6025 + }, + { + "epoch": 0.8396850832578555, + "grad_norm": 0.45775336027145386, + "learning_rate": 2.8767822820258362e-06, + "loss": 0.07966804504394531, + "step": 6026 + }, + { + "epoch": 0.8398244269490699, + "grad_norm": 0.649269700050354, + "learning_rate": 2.8719130958701736e-06, + "loss": 0.09256458282470703, + "step": 6027 + }, + { + "epoch": 0.8399637706402843, + "grad_norm": 0.41413840651512146, + "learning_rate": 2.867047715184377e-06, + "loss": 0.0796051025390625, + "step": 6028 + }, + { + "epoch": 0.8401031143314986, + "grad_norm": 0.35253944993019104, + "learning_rate": 2.8621861410494234e-06, + "loss": 0.08159160614013672, + "step": 6029 + }, + { + "epoch": 0.840242458022713, + "grad_norm": 0.5272922515869141, + "learning_rate": 2.8573283745454515e-06, + "loss": 0.08675003051757812, + "step": 6030 + }, + { + "epoch": 0.8403818017139274, + "grad_norm": 0.355324923992157, + "learning_rate": 2.8524744167517427e-06, + "loss": 0.07793617248535156, + "step": 6031 + }, + { + "epoch": 0.8405211454051418, + "grad_norm": 0.8668853044509888, + "learning_rate": 2.847624268746727e-06, + "loss": 0.10993456840515137, + "step": 6032 + }, + { + "epoch": 0.8406604890963562, + "grad_norm": 0.40986600518226624, + "learning_rate": 2.842777931608005e-06, + "loss": 0.07297611236572266, + "step": 6033 + }, + { + "epoch": 0.8407998327875705, + "grad_norm": 0.6576427221298218, + "learning_rate": 2.8379354064123245e-06, + "loss": 0.10568618774414062, + "step": 6034 + }, + { + "epoch": 0.8409391764787849, + "grad_norm": 0.5431908369064331, + "learning_rate": 2.8330966942355752e-06, + "loss": 0.1014862060546875, + "step": 6035 + }, + { + "epoch": 0.8410785201699993, + "grad_norm": 0.35776641964912415, + "learning_rate": 2.828261796152818e-06, + "loss": 0.07204008102416992, + "step": 6036 + }, + { + "epoch": 0.8412178638612137, + "grad_norm": 0.4832927882671356, + "learning_rate": 2.8234307132382486e-06, + "loss": 0.08180618286132812, + "step": 6037 + }, + { + "epoch": 0.841357207552428, + "grad_norm": 0.32566455006599426, + "learning_rate": 2.818603446565231e-06, + "loss": 0.08555412292480469, + "step": 6038 + }, + { + "epoch": 0.8414965512436424, + "grad_norm": 0.405605286359787, + "learning_rate": 2.813779997206265e-06, + "loss": 0.08811187744140625, + "step": 6039 + }, + { + "epoch": 0.8416358949348568, + "grad_norm": 0.4999258518218994, + "learning_rate": 2.8089603662330155e-06, + "loss": 0.08571672439575195, + "step": 6040 + }, + { + "epoch": 0.8417752386260712, + "grad_norm": 0.5392831563949585, + "learning_rate": 2.8041445547162994e-06, + "loss": 0.10816764831542969, + "step": 6041 + }, + { + "epoch": 0.8419145823172856, + "grad_norm": 0.4736953675746918, + "learning_rate": 2.7993325637260738e-06, + "loss": 0.08478450775146484, + "step": 6042 + }, + { + "epoch": 0.8420539260084999, + "grad_norm": 0.5950091481208801, + "learning_rate": 2.79452439433145e-06, + "loss": 0.10583305358886719, + "step": 6043 + }, + { + "epoch": 0.8421932696997143, + "grad_norm": 0.3952693045139313, + "learning_rate": 2.7897200476007015e-06, + "loss": 0.08120346069335938, + "step": 6044 + }, + { + "epoch": 0.8423326133909287, + "grad_norm": 1.3060052394866943, + "learning_rate": 2.7849195246012417e-06, + "loss": 0.10817432403564453, + "step": 6045 + }, + { + "epoch": 0.8424719570821431, + "grad_norm": 0.38848820328712463, + "learning_rate": 2.780122826399634e-06, + "loss": 0.07954788208007812, + "step": 6046 + }, + { + "epoch": 0.8426113007733574, + "grad_norm": 0.38177233934402466, + "learning_rate": 2.775329954061603e-06, + "loss": 0.07421016693115234, + "step": 6047 + }, + { + "epoch": 0.8427506444645718, + "grad_norm": 0.35631081461906433, + "learning_rate": 2.770540908652004e-06, + "loss": 0.08315563201904297, + "step": 6048 + }, + { + "epoch": 0.8428899881557862, + "grad_norm": 0.5318734049797058, + "learning_rate": 2.765755691234866e-06, + "loss": 0.09890937805175781, + "step": 6049 + }, + { + "epoch": 0.8430293318470006, + "grad_norm": 0.42962127923965454, + "learning_rate": 2.7609743028733427e-06, + "loss": 0.0788726806640625, + "step": 6050 + }, + { + "epoch": 0.8431686755382151, + "grad_norm": 0.41827553510665894, + "learning_rate": 2.7561967446297555e-06, + "loss": 0.08664512634277344, + "step": 6051 + }, + { + "epoch": 0.8433080192294294, + "grad_norm": 0.5268584489822388, + "learning_rate": 2.75142301756558e-06, + "loss": 0.09116172790527344, + "step": 6052 + }, + { + "epoch": 0.8434473629206438, + "grad_norm": 0.5213503241539001, + "learning_rate": 2.7466531227414074e-06, + "loss": 0.09652626514434814, + "step": 6053 + }, + { + "epoch": 0.8435867066118582, + "grad_norm": 0.3566455543041229, + "learning_rate": 2.7418870612170123e-06, + "loss": 0.07954978942871094, + "step": 6054 + }, + { + "epoch": 0.8437260503030726, + "grad_norm": 0.5133810043334961, + "learning_rate": 2.737124834051301e-06, + "loss": 0.10096931457519531, + "step": 6055 + }, + { + "epoch": 0.843865393994287, + "grad_norm": 0.49040716886520386, + "learning_rate": 2.732366442302339e-06, + "loss": 0.09162712097167969, + "step": 6056 + }, + { + "epoch": 0.8440047376855013, + "grad_norm": 0.3334300220012665, + "learning_rate": 2.727611887027326e-06, + "loss": 0.0789947509765625, + "step": 6057 + }, + { + "epoch": 0.8441440813767157, + "grad_norm": 0.4183584749698639, + "learning_rate": 2.722861169282611e-06, + "loss": 0.07651329040527344, + "step": 6058 + }, + { + "epoch": 0.8442834250679301, + "grad_norm": 0.5091472268104553, + "learning_rate": 2.7181142901237077e-06, + "loss": 0.08580684661865234, + "step": 6059 + }, + { + "epoch": 0.8444227687591445, + "grad_norm": 0.766981303691864, + "learning_rate": 2.71337125060525e-06, + "loss": 0.1256275177001953, + "step": 6060 + }, + { + "epoch": 0.8445621124503588, + "grad_norm": 0.4034634828567505, + "learning_rate": 2.7086320517810416e-06, + "loss": 0.07483911514282227, + "step": 6061 + }, + { + "epoch": 0.8447014561415732, + "grad_norm": 1.1516835689544678, + "learning_rate": 2.7038966947040247e-06, + "loss": 0.12220001220703125, + "step": 6062 + }, + { + "epoch": 0.8448407998327876, + "grad_norm": 0.37151452898979187, + "learning_rate": 2.6991651804262886e-06, + "loss": 0.0754852294921875, + "step": 6063 + }, + { + "epoch": 0.844980143524002, + "grad_norm": 0.44599512219429016, + "learning_rate": 2.694437509999057e-06, + "loss": 0.08682632446289062, + "step": 6064 + }, + { + "epoch": 0.8451194872152163, + "grad_norm": 0.35527172684669495, + "learning_rate": 2.6897136844727214e-06, + "loss": 0.07898521423339844, + "step": 6065 + }, + { + "epoch": 0.8452588309064307, + "grad_norm": 0.3602190613746643, + "learning_rate": 2.6849937048968056e-06, + "loss": 0.07267189025878906, + "step": 6066 + }, + { + "epoch": 0.8453981745976451, + "grad_norm": 0.48532864451408386, + "learning_rate": 2.680277572319978e-06, + "loss": 0.08330440521240234, + "step": 6067 + }, + { + "epoch": 0.8455375182888595, + "grad_norm": 0.34575405716896057, + "learning_rate": 2.675565287790063e-06, + "loss": 0.0781557559967041, + "step": 6068 + }, + { + "epoch": 0.8456768619800739, + "grad_norm": 0.38432562351226807, + "learning_rate": 2.6708568523540114e-06, + "loss": 0.07896709442138672, + "step": 6069 + }, + { + "epoch": 0.8458162056712882, + "grad_norm": 0.2960702180862427, + "learning_rate": 2.6661522670579398e-06, + "loss": 0.07082176208496094, + "step": 6070 + }, + { + "epoch": 0.8459555493625026, + "grad_norm": 0.30766573548316956, + "learning_rate": 2.6614515329470923e-06, + "loss": 0.07265663146972656, + "step": 6071 + }, + { + "epoch": 0.846094893053717, + "grad_norm": 0.3287564814090729, + "learning_rate": 2.656754651065869e-06, + "loss": 0.07221412658691406, + "step": 6072 + }, + { + "epoch": 0.8462342367449314, + "grad_norm": 0.3494502604007721, + "learning_rate": 2.652061622457813e-06, + "loss": 0.07805061340332031, + "step": 6073 + }, + { + "epoch": 0.8463735804361457, + "grad_norm": 0.4696792662143707, + "learning_rate": 2.647372448165606e-06, + "loss": 0.08802223205566406, + "step": 6074 + }, + { + "epoch": 0.8465129241273601, + "grad_norm": 0.44258975982666016, + "learning_rate": 2.6426871292310675e-06, + "loss": 0.09045219421386719, + "step": 6075 + }, + { + "epoch": 0.8466522678185745, + "grad_norm": 0.4334302246570587, + "learning_rate": 2.638005666695176e-06, + "loss": 0.0851907730102539, + "step": 6076 + }, + { + "epoch": 0.8467916115097889, + "grad_norm": 0.5525688529014587, + "learning_rate": 2.6333280615980483e-06, + "loss": 0.09095573425292969, + "step": 6077 + }, + { + "epoch": 0.8469309552010033, + "grad_norm": 0.5871329307556152, + "learning_rate": 2.6286543149789355e-06, + "loss": 0.08803510665893555, + "step": 6078 + }, + { + "epoch": 0.8470702988922176, + "grad_norm": 0.4610791504383087, + "learning_rate": 2.6239844278762384e-06, + "loss": 0.08990478515625, + "step": 6079 + }, + { + "epoch": 0.847209642583432, + "grad_norm": 0.5270422697067261, + "learning_rate": 2.6193184013274975e-06, + "loss": 0.10570240020751953, + "step": 6080 + }, + { + "epoch": 0.8473489862746464, + "grad_norm": 0.39597219228744507, + "learning_rate": 2.614656236369406e-06, + "loss": 0.07190895080566406, + "step": 6081 + }, + { + "epoch": 0.8474883299658608, + "grad_norm": 0.6103023290634155, + "learning_rate": 2.60999793403778e-06, + "loss": 0.09181404113769531, + "step": 6082 + }, + { + "epoch": 0.8476276736570751, + "grad_norm": 0.5329571962356567, + "learning_rate": 2.605343495367596e-06, + "loss": 0.08711647987365723, + "step": 6083 + }, + { + "epoch": 0.8477670173482895, + "grad_norm": 0.6217190027236938, + "learning_rate": 2.6006929213929576e-06, + "loss": 0.09747123718261719, + "step": 6084 + }, + { + "epoch": 0.8479063610395039, + "grad_norm": 0.4242474436759949, + "learning_rate": 2.5960462131471233e-06, + "loss": 0.07675552368164062, + "step": 6085 + }, + { + "epoch": 0.8480457047307183, + "grad_norm": 0.4809899926185608, + "learning_rate": 2.5914033716624754e-06, + "loss": 0.08896255493164062, + "step": 6086 + }, + { + "epoch": 0.8481850484219327, + "grad_norm": 0.35762274265289307, + "learning_rate": 2.5867643979705535e-06, + "loss": 0.07684326171875, + "step": 6087 + }, + { + "epoch": 0.848324392113147, + "grad_norm": 0.4555775225162506, + "learning_rate": 2.582129293102038e-06, + "loss": 0.07338261604309082, + "step": 6088 + }, + { + "epoch": 0.8484637358043614, + "grad_norm": 0.4775376319885254, + "learning_rate": 2.577498058086736e-06, + "loss": 0.0842132568359375, + "step": 6089 + }, + { + "epoch": 0.8486030794955758, + "grad_norm": 0.48936885595321655, + "learning_rate": 2.5728706939535976e-06, + "loss": 0.08602333068847656, + "step": 6090 + }, + { + "epoch": 0.8487424231867903, + "grad_norm": 0.5264249444007874, + "learning_rate": 2.568247201730727e-06, + "loss": 0.09004974365234375, + "step": 6091 + }, + { + "epoch": 0.8488817668780047, + "grad_norm": 0.471238911151886, + "learning_rate": 2.5636275824453515e-06, + "loss": 0.07563114166259766, + "step": 6092 + }, + { + "epoch": 0.849021110569219, + "grad_norm": 0.6905117034912109, + "learning_rate": 2.559011837123846e-06, + "loss": 0.08969306945800781, + "step": 6093 + }, + { + "epoch": 0.8491604542604334, + "grad_norm": 0.6818282604217529, + "learning_rate": 2.554399966791732e-06, + "loss": 0.12866592407226562, + "step": 6094 + }, + { + "epoch": 0.8492997979516478, + "grad_norm": 0.5005202889442444, + "learning_rate": 2.5497919724736564e-06, + "loss": 0.07881313562393188, + "step": 6095 + }, + { + "epoch": 0.8494391416428622, + "grad_norm": 0.5570763945579529, + "learning_rate": 2.545187855193403e-06, + "loss": 0.10041141510009766, + "step": 6096 + }, + { + "epoch": 0.8495784853340765, + "grad_norm": 0.43135374784469604, + "learning_rate": 2.5405876159739083e-06, + "loss": 0.0788583755493164, + "step": 6097 + }, + { + "epoch": 0.8497178290252909, + "grad_norm": 0.5154529213905334, + "learning_rate": 2.535991255837247e-06, + "loss": 0.1033935546875, + "step": 6098 + }, + { + "epoch": 0.8498571727165053, + "grad_norm": 0.5461426377296448, + "learning_rate": 2.5313987758046164e-06, + "loss": 0.10405349731445312, + "step": 6099 + }, + { + "epoch": 0.8499965164077197, + "grad_norm": 0.48018980026245117, + "learning_rate": 2.526810176896357e-06, + "loss": 0.08325481414794922, + "step": 6100 + }, + { + "epoch": 0.850135860098934, + "grad_norm": 0.46214258670806885, + "learning_rate": 2.5222254601319595e-06, + "loss": 0.08641242980957031, + "step": 6101 + }, + { + "epoch": 0.8502752037901484, + "grad_norm": 0.4060104191303253, + "learning_rate": 2.5176446265300424e-06, + "loss": 0.0796041488647461, + "step": 6102 + }, + { + "epoch": 0.8504145474813628, + "grad_norm": 0.44657155871391296, + "learning_rate": 2.5130676771083585e-06, + "loss": 0.07765960693359375, + "step": 6103 + }, + { + "epoch": 0.8505538911725772, + "grad_norm": 0.4974817633628845, + "learning_rate": 2.5084946128838007e-06, + "loss": 0.08197832107543945, + "step": 6104 + }, + { + "epoch": 0.8506932348637916, + "grad_norm": 0.6038941144943237, + "learning_rate": 2.5039254348724096e-06, + "loss": 0.09979438781738281, + "step": 6105 + }, + { + "epoch": 0.8508325785550059, + "grad_norm": 0.5630016326904297, + "learning_rate": 2.499360144089342e-06, + "loss": 0.07856035232543945, + "step": 6106 + }, + { + "epoch": 0.8509719222462203, + "grad_norm": 0.40001899003982544, + "learning_rate": 2.4947987415489004e-06, + "loss": 0.08426284790039062, + "step": 6107 + }, + { + "epoch": 0.8511112659374347, + "grad_norm": 0.6233255863189697, + "learning_rate": 2.490241228264527e-06, + "loss": 0.0901947021484375, + "step": 6108 + }, + { + "epoch": 0.8512506096286491, + "grad_norm": 0.7019109725952148, + "learning_rate": 2.4856876052488032e-06, + "loss": 0.09579133987426758, + "step": 6109 + }, + { + "epoch": 0.8513899533198634, + "grad_norm": 0.4231496751308441, + "learning_rate": 2.481137873513435e-06, + "loss": 0.07473945617675781, + "step": 6110 + }, + { + "epoch": 0.8515292970110778, + "grad_norm": 0.5664945244789124, + "learning_rate": 2.476592034069265e-06, + "loss": 0.09049320220947266, + "step": 6111 + }, + { + "epoch": 0.8516686407022922, + "grad_norm": 0.31244105100631714, + "learning_rate": 2.4720500879262787e-06, + "loss": 0.07141304016113281, + "step": 6112 + }, + { + "epoch": 0.8518079843935066, + "grad_norm": 0.5461548566818237, + "learning_rate": 2.4675120360935977e-06, + "loss": 0.08037137985229492, + "step": 6113 + }, + { + "epoch": 0.851947328084721, + "grad_norm": 0.6030833721160889, + "learning_rate": 2.4629778795794623e-06, + "loss": 0.0953826904296875, + "step": 6114 + }, + { + "epoch": 0.8520866717759353, + "grad_norm": 0.44353488087654114, + "learning_rate": 2.458447619391271e-06, + "loss": 0.09991645812988281, + "step": 6115 + }, + { + "epoch": 0.8522260154671497, + "grad_norm": 0.5490710139274597, + "learning_rate": 2.453921256535534e-06, + "loss": 0.10242652893066406, + "step": 6116 + }, + { + "epoch": 0.8523653591583641, + "grad_norm": 0.45041587948799133, + "learning_rate": 2.449398792017914e-06, + "loss": 0.08989524841308594, + "step": 6117 + }, + { + "epoch": 0.8525047028495785, + "grad_norm": 0.7949641346931458, + "learning_rate": 2.4448802268431914e-06, + "loss": 0.09644699096679688, + "step": 6118 + }, + { + "epoch": 0.8526440465407928, + "grad_norm": 0.3029862940311432, + "learning_rate": 2.440365562015292e-06, + "loss": 0.07400894165039062, + "step": 6119 + }, + { + "epoch": 0.8527833902320072, + "grad_norm": 0.4435700476169586, + "learning_rate": 2.4358547985372806e-06, + "loss": 0.08330726623535156, + "step": 6120 + }, + { + "epoch": 0.8529227339232216, + "grad_norm": 0.5995326638221741, + "learning_rate": 2.431347937411328e-06, + "loss": 0.08452987670898438, + "step": 6121 + }, + { + "epoch": 0.853062077614436, + "grad_norm": 0.8980262279510498, + "learning_rate": 2.426844979638763e-06, + "loss": 0.10148811340332031, + "step": 6122 + }, + { + "epoch": 0.8532014213056504, + "grad_norm": 0.51682049036026, + "learning_rate": 2.4223459262200422e-06, + "loss": 0.08647346496582031, + "step": 6123 + }, + { + "epoch": 0.8533407649968647, + "grad_norm": 0.4803987443447113, + "learning_rate": 2.4178507781547577e-06, + "loss": 0.08463859558105469, + "step": 6124 + }, + { + "epoch": 0.8534801086880791, + "grad_norm": 0.29568299651145935, + "learning_rate": 2.4133595364416174e-06, + "loss": 0.06678199768066406, + "step": 6125 + }, + { + "epoch": 0.8536194523792935, + "grad_norm": 0.4570411145687103, + "learning_rate": 2.4088722020784828e-06, + "loss": 0.07572174072265625, + "step": 6126 + }, + { + "epoch": 0.8537587960705079, + "grad_norm": 0.5936608910560608, + "learning_rate": 2.404388776062332e-06, + "loss": 0.09504318237304688, + "step": 6127 + }, + { + "epoch": 0.8538981397617222, + "grad_norm": 0.6748477816581726, + "learning_rate": 2.399909259389277e-06, + "loss": 0.10178565979003906, + "step": 6128 + }, + { + "epoch": 0.8540374834529366, + "grad_norm": 0.6466284394264221, + "learning_rate": 2.3954336530545684e-06, + "loss": 0.08366775512695312, + "step": 6129 + }, + { + "epoch": 0.854176827144151, + "grad_norm": 0.6110556125640869, + "learning_rate": 2.3909619580525867e-06, + "loss": 0.11384201049804688, + "step": 6130 + }, + { + "epoch": 0.8543161708353654, + "grad_norm": 0.33672353625297546, + "learning_rate": 2.3864941753768345e-06, + "loss": 0.08042144775390625, + "step": 6131 + }, + { + "epoch": 0.8544555145265799, + "grad_norm": 0.4578322470188141, + "learning_rate": 2.3820303060199513e-06, + "loss": 0.10376930236816406, + "step": 6132 + }, + { + "epoch": 0.8545948582177942, + "grad_norm": 0.4372299015522003, + "learning_rate": 2.3775703509737102e-06, + "loss": 0.09068107604980469, + "step": 6133 + }, + { + "epoch": 0.8547342019090086, + "grad_norm": 0.3784751892089844, + "learning_rate": 2.3731143112290124e-06, + "loss": 0.07615470886230469, + "step": 6134 + }, + { + "epoch": 0.854873545600223, + "grad_norm": 0.5575991272926331, + "learning_rate": 2.368662187775883e-06, + "loss": 0.09649896621704102, + "step": 6135 + }, + { + "epoch": 0.8550128892914374, + "grad_norm": 0.5639243125915527, + "learning_rate": 2.3642139816034872e-06, + "loss": 0.09221553802490234, + "step": 6136 + }, + { + "epoch": 0.8551522329826517, + "grad_norm": 0.5247772932052612, + "learning_rate": 2.359769693700111e-06, + "loss": 0.0988454818725586, + "step": 6137 + }, + { + "epoch": 0.8552915766738661, + "grad_norm": 0.35249024629592896, + "learning_rate": 2.3553293250531794e-06, + "loss": 0.06801414489746094, + "step": 6138 + }, + { + "epoch": 0.8554309203650805, + "grad_norm": 0.361608624458313, + "learning_rate": 2.350892876649231e-06, + "loss": 0.08695602416992188, + "step": 6139 + }, + { + "epoch": 0.8555702640562949, + "grad_norm": 0.487290620803833, + "learning_rate": 2.3464603494739513e-06, + "loss": 0.08508872985839844, + "step": 6140 + }, + { + "epoch": 0.8557096077475093, + "grad_norm": 0.5598408579826355, + "learning_rate": 2.3420317445121497e-06, + "loss": 0.09947013854980469, + "step": 6141 + }, + { + "epoch": 0.8558489514387236, + "grad_norm": 0.5617973804473877, + "learning_rate": 2.3376070627477555e-06, + "loss": 0.0838470458984375, + "step": 6142 + }, + { + "epoch": 0.855988295129938, + "grad_norm": 0.39126962423324585, + "learning_rate": 2.333186305163828e-06, + "loss": 0.0734567642211914, + "step": 6143 + }, + { + "epoch": 0.8561276388211524, + "grad_norm": 0.48690131306648254, + "learning_rate": 2.3287694727425623e-06, + "loss": 0.09020519256591797, + "step": 6144 + }, + { + "epoch": 0.8562669825123668, + "grad_norm": 0.40547120571136475, + "learning_rate": 2.3243565664652844e-06, + "loss": 0.07031917572021484, + "step": 6145 + }, + { + "epoch": 0.8564063262035811, + "grad_norm": 0.6245911121368408, + "learning_rate": 2.31994758731243e-06, + "loss": 0.11643409729003906, + "step": 6146 + }, + { + "epoch": 0.8565456698947955, + "grad_norm": 0.4466635286808014, + "learning_rate": 2.3155425362635863e-06, + "loss": 0.07921791076660156, + "step": 6147 + }, + { + "epoch": 0.8566850135860099, + "grad_norm": 0.31835639476776123, + "learning_rate": 2.311141414297442e-06, + "loss": 0.07761955261230469, + "step": 6148 + }, + { + "epoch": 0.8568243572772243, + "grad_norm": 0.4146215319633484, + "learning_rate": 2.3067442223918345e-06, + "loss": 0.08607006072998047, + "step": 6149 + }, + { + "epoch": 0.8569637009684387, + "grad_norm": 0.709247887134552, + "learning_rate": 2.3023509615237138e-06, + "loss": 0.1041860580444336, + "step": 6150 + }, + { + "epoch": 0.857103044659653, + "grad_norm": 0.4569947421550751, + "learning_rate": 2.2979616326691658e-06, + "loss": 0.06868648529052734, + "step": 6151 + }, + { + "epoch": 0.8572423883508674, + "grad_norm": 0.41479671001434326, + "learning_rate": 2.2935762368034075e-06, + "loss": 0.07218027114868164, + "step": 6152 + }, + { + "epoch": 0.8573817320420818, + "grad_norm": 0.33387628197669983, + "learning_rate": 2.289194774900756e-06, + "loss": 0.08126115798950195, + "step": 6153 + }, + { + "epoch": 0.8575210757332962, + "grad_norm": 0.5050033926963806, + "learning_rate": 2.2848172479346806e-06, + "loss": 0.07758235931396484, + "step": 6154 + }, + { + "epoch": 0.8576604194245105, + "grad_norm": 0.5953513383865356, + "learning_rate": 2.280443656877769e-06, + "loss": 0.10680389404296875, + "step": 6155 + }, + { + "epoch": 0.8577997631157249, + "grad_norm": 0.45897993445396423, + "learning_rate": 2.276074002701736e-06, + "loss": 0.08248710632324219, + "step": 6156 + }, + { + "epoch": 0.8579391068069393, + "grad_norm": 0.5093229413032532, + "learning_rate": 2.271708286377414e-06, + "loss": 0.07955741882324219, + "step": 6157 + }, + { + "epoch": 0.8580784504981537, + "grad_norm": 0.6725553274154663, + "learning_rate": 2.267346508874766e-06, + "loss": 0.08455848693847656, + "step": 6158 + }, + { + "epoch": 0.858217794189368, + "grad_norm": 0.4154289960861206, + "learning_rate": 2.262988671162882e-06, + "loss": 0.07352066040039062, + "step": 6159 + }, + { + "epoch": 0.8583571378805824, + "grad_norm": 0.3946221172809601, + "learning_rate": 2.258634774209969e-06, + "loss": 0.07795047760009766, + "step": 6160 + }, + { + "epoch": 0.8584964815717968, + "grad_norm": 0.6382601261138916, + "learning_rate": 2.2542848189833675e-06, + "loss": 0.07896757125854492, + "step": 6161 + }, + { + "epoch": 0.8586358252630112, + "grad_norm": 0.35859569907188416, + "learning_rate": 2.249938806449539e-06, + "loss": 0.07119560241699219, + "step": 6162 + }, + { + "epoch": 0.8587751689542256, + "grad_norm": 0.5127848386764526, + "learning_rate": 2.2455967375740716e-06, + "loss": 0.0974273681640625, + "step": 6163 + }, + { + "epoch": 0.8589145126454399, + "grad_norm": 0.3860238790512085, + "learning_rate": 2.241258613321664e-06, + "loss": 0.07908439636230469, + "step": 6164 + }, + { + "epoch": 0.8590538563366543, + "grad_norm": 0.456916868686676, + "learning_rate": 2.2369244346561516e-06, + "loss": 0.08671903610229492, + "step": 6165 + }, + { + "epoch": 0.8591932000278687, + "grad_norm": 0.419044554233551, + "learning_rate": 2.2325942025404968e-06, + "loss": 0.08510780334472656, + "step": 6166 + }, + { + "epoch": 0.8593325437190831, + "grad_norm": 0.6141331791877747, + "learning_rate": 2.2282679179367684e-06, + "loss": 0.10396862030029297, + "step": 6167 + }, + { + "epoch": 0.8594718874102975, + "grad_norm": 0.4606867730617523, + "learning_rate": 2.2239455818061793e-06, + "loss": 0.078399658203125, + "step": 6168 + }, + { + "epoch": 0.8596112311015118, + "grad_norm": 0.7025746703147888, + "learning_rate": 2.219627195109042e-06, + "loss": 0.09209823608398438, + "step": 6169 + }, + { + "epoch": 0.8597505747927262, + "grad_norm": 0.3533877730369568, + "learning_rate": 2.2153127588048127e-06, + "loss": 0.07525444030761719, + "step": 6170 + }, + { + "epoch": 0.8598899184839406, + "grad_norm": 0.758362889289856, + "learning_rate": 2.2110022738520543e-06, + "loss": 0.10401153564453125, + "step": 6171 + }, + { + "epoch": 0.8600292621751551, + "grad_norm": 0.3673829436302185, + "learning_rate": 2.20669574120846e-06, + "loss": 0.07842445373535156, + "step": 6172 + }, + { + "epoch": 0.8601686058663695, + "grad_norm": 0.45473161339759827, + "learning_rate": 2.202393161830849e-06, + "loss": 0.09203529357910156, + "step": 6173 + }, + { + "epoch": 0.8603079495575838, + "grad_norm": 0.43547025322914124, + "learning_rate": 2.1980945366751503e-06, + "loss": 0.08632707595825195, + "step": 6174 + }, + { + "epoch": 0.8604472932487982, + "grad_norm": 1.0598269701004028, + "learning_rate": 2.1937998666964176e-06, + "loss": 0.12509536743164062, + "step": 6175 + }, + { + "epoch": 0.8605866369400126, + "grad_norm": 0.4926418960094452, + "learning_rate": 2.189509152848832e-06, + "loss": 0.09158134460449219, + "step": 6176 + }, + { + "epoch": 0.860725980631227, + "grad_norm": 0.5311089158058167, + "learning_rate": 2.185222396085698e-06, + "loss": 0.08346271514892578, + "step": 6177 + }, + { + "epoch": 0.8608653243224413, + "grad_norm": 0.4150973856449127, + "learning_rate": 2.1809395973594263e-06, + "loss": 0.06918478012084961, + "step": 6178 + }, + { + "epoch": 0.8610046680136557, + "grad_norm": 0.30050769448280334, + "learning_rate": 2.176660757621558e-06, + "loss": 0.07230281829833984, + "step": 6179 + }, + { + "epoch": 0.8611440117048701, + "grad_norm": 0.40347322821617126, + "learning_rate": 2.1723858778227537e-06, + "loss": 0.06775283813476562, + "step": 6180 + }, + { + "epoch": 0.8612833553960845, + "grad_norm": 0.6469497680664062, + "learning_rate": 2.1681149589128016e-06, + "loss": 0.08130359649658203, + "step": 6181 + }, + { + "epoch": 0.8614226990872988, + "grad_norm": 0.5099116563796997, + "learning_rate": 2.1638480018405916e-06, + "loss": 0.08535480499267578, + "step": 6182 + }, + { + "epoch": 0.8615620427785132, + "grad_norm": 0.295362263917923, + "learning_rate": 2.15958500755415e-06, + "loss": 0.06638813018798828, + "step": 6183 + }, + { + "epoch": 0.8617013864697276, + "grad_norm": 0.4107067883014679, + "learning_rate": 2.1553259770006196e-06, + "loss": 0.07637453079223633, + "step": 6184 + }, + { + "epoch": 0.861840730160942, + "grad_norm": 0.5474122166633606, + "learning_rate": 2.1510709111262584e-06, + "loss": 0.07942008972167969, + "step": 6185 + }, + { + "epoch": 0.8619800738521564, + "grad_norm": 0.4998341500759125, + "learning_rate": 2.1468198108764373e-06, + "loss": 0.10013198852539062, + "step": 6186 + }, + { + "epoch": 0.8621194175433707, + "grad_norm": 0.6380084753036499, + "learning_rate": 2.1425726771956578e-06, + "loss": 0.08970832824707031, + "step": 6187 + }, + { + "epoch": 0.8622587612345851, + "grad_norm": 0.4719351828098297, + "learning_rate": 2.1383295110275437e-06, + "loss": 0.09814262390136719, + "step": 6188 + }, + { + "epoch": 0.8623981049257995, + "grad_norm": 0.5953404307365417, + "learning_rate": 2.1340903133148205e-06, + "loss": 0.12268447875976562, + "step": 6189 + }, + { + "epoch": 0.8625374486170139, + "grad_norm": 0.49304622411727905, + "learning_rate": 2.1298550849993437e-06, + "loss": 0.07784843444824219, + "step": 6190 + }, + { + "epoch": 0.8626767923082282, + "grad_norm": 0.599367082118988, + "learning_rate": 2.1256238270220853e-06, + "loss": 0.10297393798828125, + "step": 6191 + }, + { + "epoch": 0.8628161359994426, + "grad_norm": 0.4347241222858429, + "learning_rate": 2.1213965403231328e-06, + "loss": 0.09063339233398438, + "step": 6192 + }, + { + "epoch": 0.862955479690657, + "grad_norm": 0.5508264303207397, + "learning_rate": 2.117173225841691e-06, + "loss": 0.09412384033203125, + "step": 6193 + }, + { + "epoch": 0.8630948233818714, + "grad_norm": 0.4478569030761719, + "learning_rate": 2.112953884516091e-06, + "loss": 0.07237052917480469, + "step": 6194 + }, + { + "epoch": 0.8632341670730858, + "grad_norm": 0.4460628926753998, + "learning_rate": 2.1087385172837705e-06, + "loss": 0.07827186584472656, + "step": 6195 + }, + { + "epoch": 0.8633735107643001, + "grad_norm": 0.48068755865097046, + "learning_rate": 2.1045271250812817e-06, + "loss": 0.09836387634277344, + "step": 6196 + }, + { + "epoch": 0.8635128544555145, + "grad_norm": 0.5551115274429321, + "learning_rate": 2.100319708844307e-06, + "loss": 0.08900260925292969, + "step": 6197 + }, + { + "epoch": 0.8636521981467289, + "grad_norm": 0.5295537114143372, + "learning_rate": 2.0961162695076397e-06, + "loss": 0.0921621322631836, + "step": 6198 + }, + { + "epoch": 0.8637915418379433, + "grad_norm": 0.615614116191864, + "learning_rate": 2.091916808005179e-06, + "loss": 0.09561538696289062, + "step": 6199 + }, + { + "epoch": 0.8639308855291576, + "grad_norm": 0.3918980360031128, + "learning_rate": 2.08772132526996e-06, + "loss": 0.08166980743408203, + "step": 6200 + }, + { + "epoch": 0.864070229220372, + "grad_norm": 0.49990177154541016, + "learning_rate": 2.083529822234116e-06, + "loss": 0.09120368957519531, + "step": 6201 + }, + { + "epoch": 0.8642095729115864, + "grad_norm": 0.4225783944129944, + "learning_rate": 2.079342299828908e-06, + "loss": 0.0888671875, + "step": 6202 + }, + { + "epoch": 0.8643489166028008, + "grad_norm": 0.42293062806129456, + "learning_rate": 2.075158758984701e-06, + "loss": 0.07914161682128906, + "step": 6203 + }, + { + "epoch": 0.8644882602940152, + "grad_norm": 0.6441664695739746, + "learning_rate": 2.070979200630987e-06, + "loss": 0.10328865051269531, + "step": 6204 + }, + { + "epoch": 0.8646276039852295, + "grad_norm": 0.5994672179222107, + "learning_rate": 2.0668036256963743e-06, + "loss": 0.08933401107788086, + "step": 6205 + }, + { + "epoch": 0.8647669476764439, + "grad_norm": 0.35590359568595886, + "learning_rate": 2.0626320351085716e-06, + "loss": 0.06458282470703125, + "step": 6206 + }, + { + "epoch": 0.8649062913676583, + "grad_norm": 0.42792901396751404, + "learning_rate": 2.058464429794409e-06, + "loss": 0.07863807678222656, + "step": 6207 + }, + { + "epoch": 0.8650456350588727, + "grad_norm": 0.3933212161064148, + "learning_rate": 2.054300810679839e-06, + "loss": 0.0817517638206482, + "step": 6208 + }, + { + "epoch": 0.865184978750087, + "grad_norm": 0.4249594211578369, + "learning_rate": 2.0501411786899263e-06, + "loss": 0.083892822265625, + "step": 6209 + }, + { + "epoch": 0.8653243224413014, + "grad_norm": 0.4245107173919678, + "learning_rate": 2.045985534748842e-06, + "loss": 0.07518672943115234, + "step": 6210 + }, + { + "epoch": 0.8654636661325158, + "grad_norm": 0.43646037578582764, + "learning_rate": 2.0418338797798686e-06, + "loss": 0.08399009704589844, + "step": 6211 + }, + { + "epoch": 0.8656030098237303, + "grad_norm": 0.40840375423431396, + "learning_rate": 2.0376862147054164e-06, + "loss": 0.08592796325683594, + "step": 6212 + }, + { + "epoch": 0.8657423535149447, + "grad_norm": 0.3971848785877228, + "learning_rate": 2.0335425404470045e-06, + "loss": 0.08320045471191406, + "step": 6213 + }, + { + "epoch": 0.865881697206159, + "grad_norm": 0.42626991868019104, + "learning_rate": 2.0294028579252557e-06, + "loss": 0.08357810974121094, + "step": 6214 + }, + { + "epoch": 0.8660210408973734, + "grad_norm": 0.32838377356529236, + "learning_rate": 2.025267168059919e-06, + "loss": 0.0734090805053711, + "step": 6215 + }, + { + "epoch": 0.8661603845885878, + "grad_norm": 0.38070330023765564, + "learning_rate": 2.0211354717698437e-06, + "loss": 0.07888555526733398, + "step": 6216 + }, + { + "epoch": 0.8662997282798022, + "grad_norm": 0.35628244280815125, + "learning_rate": 2.017007769973005e-06, + "loss": 0.07375717163085938, + "step": 6217 + }, + { + "epoch": 0.8664390719710166, + "grad_norm": 0.48202094435691833, + "learning_rate": 2.01288406358648e-06, + "loss": 0.08688163757324219, + "step": 6218 + }, + { + "epoch": 0.8665784156622309, + "grad_norm": 0.6946505308151245, + "learning_rate": 2.00876435352646e-06, + "loss": 0.09705829620361328, + "step": 6219 + }, + { + "epoch": 0.8667177593534453, + "grad_norm": 0.41293779015541077, + "learning_rate": 2.00464864070826e-06, + "loss": 0.08126640319824219, + "step": 6220 + }, + { + "epoch": 0.8668571030446597, + "grad_norm": 0.5782420039176941, + "learning_rate": 2.0005369260462904e-06, + "loss": 0.09080123901367188, + "step": 6221 + }, + { + "epoch": 0.8669964467358741, + "grad_norm": 0.5506097674369812, + "learning_rate": 1.996429210454078e-06, + "loss": 0.0953369140625, + "step": 6222 + }, + { + "epoch": 0.8671357904270884, + "grad_norm": 0.6022987365722656, + "learning_rate": 1.9923254948442648e-06, + "loss": 0.10653305053710938, + "step": 6223 + }, + { + "epoch": 0.8672751341183028, + "grad_norm": 0.35688847303390503, + "learning_rate": 1.98822578012861e-06, + "loss": 0.07562637329101562, + "step": 6224 + }, + { + "epoch": 0.8674144778095172, + "grad_norm": 0.4629690647125244, + "learning_rate": 1.9841300672179662e-06, + "loss": 0.08714675903320312, + "step": 6225 + }, + { + "epoch": 0.8675538215007316, + "grad_norm": 0.5595200061798096, + "learning_rate": 1.9800383570223157e-06, + "loss": 0.10527324676513672, + "step": 6226 + }, + { + "epoch": 0.867693165191946, + "grad_norm": 0.30318427085876465, + "learning_rate": 1.97595065045074e-06, + "loss": 0.06885814666748047, + "step": 6227 + }, + { + "epoch": 0.8678325088831603, + "grad_norm": 0.5297080278396606, + "learning_rate": 1.9718669484114315e-06, + "loss": 0.07992744445800781, + "step": 6228 + }, + { + "epoch": 0.8679718525743747, + "grad_norm": 0.3980204463005066, + "learning_rate": 1.9677872518116948e-06, + "loss": 0.07517051696777344, + "step": 6229 + }, + { + "epoch": 0.8681111962655891, + "grad_norm": 0.8659873604774475, + "learning_rate": 1.963711561557955e-06, + "loss": 0.08452606201171875, + "step": 6230 + }, + { + "epoch": 0.8682505399568035, + "grad_norm": 0.5752677917480469, + "learning_rate": 1.959639878555728e-06, + "loss": 0.10335159301757812, + "step": 6231 + }, + { + "epoch": 0.8683898836480178, + "grad_norm": 0.6310433149337769, + "learning_rate": 1.95557220370965e-06, + "loss": 0.09230422973632812, + "step": 6232 + }, + { + "epoch": 0.8685292273392322, + "grad_norm": 0.4317304193973541, + "learning_rate": 1.9515085379234656e-06, + "loss": 0.07992172241210938, + "step": 6233 + }, + { + "epoch": 0.8686685710304466, + "grad_norm": 0.4051436483860016, + "learning_rate": 1.9474488821000357e-06, + "loss": 0.07796764373779297, + "step": 6234 + }, + { + "epoch": 0.868807914721661, + "grad_norm": 0.4242820143699646, + "learning_rate": 1.943393237141311e-06, + "loss": 0.08485889434814453, + "step": 6235 + }, + { + "epoch": 0.8689472584128753, + "grad_norm": 0.37107914686203003, + "learning_rate": 1.9393416039483724e-06, + "loss": 0.07068824768066406, + "step": 6236 + }, + { + "epoch": 0.8690866021040897, + "grad_norm": 0.5409899950027466, + "learning_rate": 1.9352939834214004e-06, + "loss": 0.08830547332763672, + "step": 6237 + }, + { + "epoch": 0.8692259457953041, + "grad_norm": 0.5692727565765381, + "learning_rate": 1.931250376459679e-06, + "loss": 0.09973335266113281, + "step": 6238 + }, + { + "epoch": 0.8693652894865185, + "grad_norm": 0.484394907951355, + "learning_rate": 1.9272107839616062e-06, + "loss": 0.07626819610595703, + "step": 6239 + }, + { + "epoch": 0.8695046331777329, + "grad_norm": 0.6615424156188965, + "learning_rate": 1.923175206824688e-06, + "loss": 0.08857154846191406, + "step": 6240 + }, + { + "epoch": 0.8696439768689472, + "grad_norm": 0.39871126413345337, + "learning_rate": 1.9191436459455406e-06, + "loss": 0.09214210510253906, + "step": 6241 + }, + { + "epoch": 0.8697833205601616, + "grad_norm": 0.5574257969856262, + "learning_rate": 1.915116102219883e-06, + "loss": 0.09149551391601562, + "step": 6242 + }, + { + "epoch": 0.869922664251376, + "grad_norm": 0.7005349397659302, + "learning_rate": 1.911092576542537e-06, + "loss": 0.0981062650680542, + "step": 6243 + }, + { + "epoch": 0.8700620079425904, + "grad_norm": 0.7010791301727295, + "learning_rate": 1.9070730698074458e-06, + "loss": 0.09667015075683594, + "step": 6244 + }, + { + "epoch": 0.8702013516338047, + "grad_norm": 0.4579945504665375, + "learning_rate": 1.9030575829076525e-06, + "loss": 0.08154678344726562, + "step": 6245 + }, + { + "epoch": 0.8703406953250191, + "grad_norm": 0.5241400003433228, + "learning_rate": 1.8990461167353014e-06, + "loss": 0.0797882080078125, + "step": 6246 + }, + { + "epoch": 0.8704800390162335, + "grad_norm": 0.5410441160202026, + "learning_rate": 1.8950386721816549e-06, + "loss": 0.09391593933105469, + "step": 6247 + }, + { + "epoch": 0.8706193827074479, + "grad_norm": 0.6013146042823792, + "learning_rate": 1.8910352501370677e-06, + "loss": 0.09564018249511719, + "step": 6248 + }, + { + "epoch": 0.8707587263986623, + "grad_norm": 0.46614933013916016, + "learning_rate": 1.887035851491017e-06, + "loss": 0.08067035675048828, + "step": 6249 + }, + { + "epoch": 0.8708980700898766, + "grad_norm": 0.5102871060371399, + "learning_rate": 1.8830404771320721e-06, + "loss": 0.07986068725585938, + "step": 6250 + }, + { + "epoch": 0.871037413781091, + "grad_norm": 0.5834581851959229, + "learning_rate": 1.8790491279479139e-06, + "loss": 0.08818435668945312, + "step": 6251 + }, + { + "epoch": 0.8711767574723055, + "grad_norm": 0.4127286672592163, + "learning_rate": 1.8750618048253377e-06, + "loss": 0.08026695251464844, + "step": 6252 + }, + { + "epoch": 0.8713161011635199, + "grad_norm": 0.40194007754325867, + "learning_rate": 1.8710785086502237e-06, + "loss": 0.07964897155761719, + "step": 6253 + }, + { + "epoch": 0.8714554448547343, + "grad_norm": 1.3593603372573853, + "learning_rate": 1.867099240307575e-06, + "loss": 0.1092844009399414, + "step": 6254 + }, + { + "epoch": 0.8715947885459486, + "grad_norm": 0.4033900201320648, + "learning_rate": 1.8631240006814933e-06, + "loss": 0.07756614685058594, + "step": 6255 + }, + { + "epoch": 0.871734132237163, + "grad_norm": 0.7864959239959717, + "learning_rate": 1.85915279065519e-06, + "loss": 0.1277303695678711, + "step": 6256 + }, + { + "epoch": 0.8718734759283774, + "grad_norm": 0.5001303553581238, + "learning_rate": 1.85518561111097e-06, + "loss": 0.08765029907226562, + "step": 6257 + }, + { + "epoch": 0.8720128196195918, + "grad_norm": 0.544670820236206, + "learning_rate": 1.85122246293026e-06, + "loss": 0.10047054290771484, + "step": 6258 + }, + { + "epoch": 0.8721521633108061, + "grad_norm": 0.407781183719635, + "learning_rate": 1.847263346993573e-06, + "loss": 0.0866861343383789, + "step": 6259 + }, + { + "epoch": 0.8722915070020205, + "grad_norm": 0.43025222420692444, + "learning_rate": 1.8433082641805323e-06, + "loss": 0.09141826629638672, + "step": 6260 + }, + { + "epoch": 0.8724308506932349, + "grad_norm": 0.4151306450366974, + "learning_rate": 1.8393572153698724e-06, + "loss": 0.088409423828125, + "step": 6261 + }, + { + "epoch": 0.8725701943844493, + "grad_norm": 0.39542481303215027, + "learning_rate": 1.835410201439427e-06, + "loss": 0.08873939514160156, + "step": 6262 + }, + { + "epoch": 0.8727095380756636, + "grad_norm": 0.5482836961746216, + "learning_rate": 1.83146722326613e-06, + "loss": 0.10812187194824219, + "step": 6263 + }, + { + "epoch": 0.872848881766878, + "grad_norm": 0.4237959086894989, + "learning_rate": 1.8275282817260187e-06, + "loss": 0.08676815032958984, + "step": 6264 + }, + { + "epoch": 0.8729882254580924, + "grad_norm": 0.7128036618232727, + "learning_rate": 1.8235933776942394e-06, + "loss": 0.11777877807617188, + "step": 6265 + }, + { + "epoch": 0.8731275691493068, + "grad_norm": 0.4708336591720581, + "learning_rate": 1.8196625120450396e-06, + "loss": 0.09075069427490234, + "step": 6266 + }, + { + "epoch": 0.8732669128405212, + "grad_norm": 0.33907124400138855, + "learning_rate": 1.8157356856517626e-06, + "loss": 0.08202171325683594, + "step": 6267 + }, + { + "epoch": 0.8734062565317355, + "grad_norm": 0.3840540945529938, + "learning_rate": 1.8118128993868667e-06, + "loss": 0.0824432373046875, + "step": 6268 + }, + { + "epoch": 0.8735456002229499, + "grad_norm": 0.43448251485824585, + "learning_rate": 1.8078941541218964e-06, + "loss": 0.06835675239562988, + "step": 6269 + }, + { + "epoch": 0.8736849439141643, + "grad_norm": 0.5353960394859314, + "learning_rate": 1.8039794507275155e-06, + "loss": 0.10261344909667969, + "step": 6270 + }, + { + "epoch": 0.8738242876053787, + "grad_norm": 0.5479403138160706, + "learning_rate": 1.8000687900734748e-06, + "loss": 0.08745980262756348, + "step": 6271 + }, + { + "epoch": 0.873963631296593, + "grad_norm": 0.4520357847213745, + "learning_rate": 1.796162173028637e-06, + "loss": 0.07937431335449219, + "step": 6272 + }, + { + "epoch": 0.8741029749878074, + "grad_norm": 0.41527578234672546, + "learning_rate": 1.7922596004609682e-06, + "loss": 0.086029052734375, + "step": 6273 + }, + { + "epoch": 0.8742423186790218, + "grad_norm": 0.5224247574806213, + "learning_rate": 1.7883610732375278e-06, + "loss": 0.08316230773925781, + "step": 6274 + }, + { + "epoch": 0.8743816623702362, + "grad_norm": 0.9027870893478394, + "learning_rate": 1.784466592224472e-06, + "loss": 0.095733642578125, + "step": 6275 + }, + { + "epoch": 0.8745210060614506, + "grad_norm": 0.4600382447242737, + "learning_rate": 1.7805761582870729e-06, + "loss": 0.09066581726074219, + "step": 6276 + }, + { + "epoch": 0.8746603497526649, + "grad_norm": 0.43881139159202576, + "learning_rate": 1.7766897722896993e-06, + "loss": 0.08886432647705078, + "step": 6277 + }, + { + "epoch": 0.8747996934438793, + "grad_norm": 0.30593910813331604, + "learning_rate": 1.7728074350958068e-06, + "loss": 0.06254339218139648, + "step": 6278 + }, + { + "epoch": 0.8749390371350937, + "grad_norm": 0.5238625407218933, + "learning_rate": 1.7689291475679748e-06, + "loss": 0.09732818603515625, + "step": 6279 + }, + { + "epoch": 0.8750783808263081, + "grad_norm": 0.35391443967819214, + "learning_rate": 1.7650549105678583e-06, + "loss": 0.0754547119140625, + "step": 6280 + }, + { + "epoch": 0.8752177245175224, + "grad_norm": 0.4713365137577057, + "learning_rate": 1.7611847249562352e-06, + "loss": 0.09496212005615234, + "step": 6281 + }, + { + "epoch": 0.8753570682087368, + "grad_norm": 0.5342576503753662, + "learning_rate": 1.7573185915929625e-06, + "loss": 0.09433174133300781, + "step": 6282 + }, + { + "epoch": 0.8754964118999512, + "grad_norm": 0.5783568024635315, + "learning_rate": 1.7534565113370106e-06, + "loss": 0.0820608139038086, + "step": 6283 + }, + { + "epoch": 0.8756357555911656, + "grad_norm": 0.36649298667907715, + "learning_rate": 1.749598485046451e-06, + "loss": 0.07433891296386719, + "step": 6284 + }, + { + "epoch": 0.87577509928238, + "grad_norm": 0.37167203426361084, + "learning_rate": 1.7457445135784423e-06, + "loss": 0.08332633972167969, + "step": 6285 + }, + { + "epoch": 0.8759144429735943, + "grad_norm": 0.5681793689727783, + "learning_rate": 1.7418945977892488e-06, + "loss": 0.08565521240234375, + "step": 6286 + }, + { + "epoch": 0.8760537866648087, + "grad_norm": 0.5762673020362854, + "learning_rate": 1.7380487385342371e-06, + "loss": 0.08577793836593628, + "step": 6287 + }, + { + "epoch": 0.8761931303560231, + "grad_norm": 0.5322290062904358, + "learning_rate": 1.7342069366678705e-06, + "loss": 0.10392379760742188, + "step": 6288 + }, + { + "epoch": 0.8763324740472375, + "grad_norm": 0.5654887557029724, + "learning_rate": 1.7303691930437062e-06, + "loss": 0.09124374389648438, + "step": 6289 + }, + { + "epoch": 0.8764718177384518, + "grad_norm": 0.5062574744224548, + "learning_rate": 1.726535508514402e-06, + "loss": 0.07695865631103516, + "step": 6290 + }, + { + "epoch": 0.8766111614296662, + "grad_norm": 0.6927103400230408, + "learning_rate": 1.7227058839317213e-06, + "loss": 0.1305704116821289, + "step": 6291 + }, + { + "epoch": 0.8767505051208806, + "grad_norm": 0.3633204698562622, + "learning_rate": 1.718880320146512e-06, + "loss": 0.06301212310791016, + "step": 6292 + }, + { + "epoch": 0.8768898488120951, + "grad_norm": 0.517778217792511, + "learning_rate": 1.7150588180087302e-06, + "loss": 0.08899879455566406, + "step": 6293 + }, + { + "epoch": 0.8770291925033095, + "grad_norm": 0.7972524166107178, + "learning_rate": 1.7112413783674276e-06, + "loss": 0.08428764343261719, + "step": 6294 + }, + { + "epoch": 0.8771685361945238, + "grad_norm": 0.3129339814186096, + "learning_rate": 1.7074280020707568e-06, + "loss": 0.0727548599243164, + "step": 6295 + }, + { + "epoch": 0.8773078798857382, + "grad_norm": 0.4866119921207428, + "learning_rate": 1.7036186899659513e-06, + "loss": 0.08776187896728516, + "step": 6296 + }, + { + "epoch": 0.8774472235769526, + "grad_norm": 0.4957486689090729, + "learning_rate": 1.6998134428993606e-06, + "loss": 0.09513282775878906, + "step": 6297 + }, + { + "epoch": 0.877586567268167, + "grad_norm": 0.35257670283317566, + "learning_rate": 1.6960122617164243e-06, + "loss": 0.07496833801269531, + "step": 6298 + }, + { + "epoch": 0.8777259109593814, + "grad_norm": 0.3709847927093506, + "learning_rate": 1.6922151472616733e-06, + "loss": 0.08827400207519531, + "step": 6299 + }, + { + "epoch": 0.8778652546505957, + "grad_norm": 0.3986945152282715, + "learning_rate": 1.688422100378746e-06, + "loss": 0.07700061798095703, + "step": 6300 + }, + { + "epoch": 0.8780045983418101, + "grad_norm": 0.740483820438385, + "learning_rate": 1.684633121910364e-06, + "loss": 0.08341789245605469, + "step": 6301 + }, + { + "epoch": 0.8781439420330245, + "grad_norm": 0.48858562111854553, + "learning_rate": 1.6808482126983584e-06, + "loss": 0.08703994750976562, + "step": 6302 + }, + { + "epoch": 0.8782832857242389, + "grad_norm": 0.6888519525527954, + "learning_rate": 1.6770673735836452e-06, + "loss": 0.08544158935546875, + "step": 6303 + }, + { + "epoch": 0.8784226294154532, + "grad_norm": 0.5261240005493164, + "learning_rate": 1.6732906054062392e-06, + "loss": 0.09234046936035156, + "step": 6304 + }, + { + "epoch": 0.8785619731066676, + "grad_norm": 0.3555193245410919, + "learning_rate": 1.6695179090052604e-06, + "loss": 0.08086013793945312, + "step": 6305 + }, + { + "epoch": 0.878701316797882, + "grad_norm": 0.47593972086906433, + "learning_rate": 1.6657492852189095e-06, + "loss": 0.07700574398040771, + "step": 6306 + }, + { + "epoch": 0.8788406604890964, + "grad_norm": 0.5962252616882324, + "learning_rate": 1.6619847348844854e-06, + "loss": 0.11449241638183594, + "step": 6307 + }, + { + "epoch": 0.8789800041803107, + "grad_norm": 0.7706310153007507, + "learning_rate": 1.6582242588383902e-06, + "loss": 0.09152030944824219, + "step": 6308 + }, + { + "epoch": 0.8791193478715251, + "grad_norm": 0.589863657951355, + "learning_rate": 1.6544678579161178e-06, + "loss": 0.09484291076660156, + "step": 6309 + }, + { + "epoch": 0.8792586915627395, + "grad_norm": 0.7766344547271729, + "learning_rate": 1.6507155329522517e-06, + "loss": 0.09229469299316406, + "step": 6310 + }, + { + "epoch": 0.8793980352539539, + "grad_norm": 0.5800797343254089, + "learning_rate": 1.6469672847804697e-06, + "loss": 0.09793281555175781, + "step": 6311 + }, + { + "epoch": 0.8795373789451683, + "grad_norm": 0.38304856419563293, + "learning_rate": 1.6432231142335498e-06, + "loss": 0.06971406936645508, + "step": 6312 + }, + { + "epoch": 0.8796767226363826, + "grad_norm": 0.4364495277404785, + "learning_rate": 1.6394830221433643e-06, + "loss": 0.07965850830078125, + "step": 6313 + }, + { + "epoch": 0.879816066327597, + "grad_norm": 0.7478083968162537, + "learning_rate": 1.635747009340871e-06, + "loss": 0.12259292602539062, + "step": 6314 + }, + { + "epoch": 0.8799554100188114, + "grad_norm": 0.46636641025543213, + "learning_rate": 1.6320150766561283e-06, + "loss": 0.0996561050415039, + "step": 6315 + }, + { + "epoch": 0.8800947537100258, + "grad_norm": 0.3458871841430664, + "learning_rate": 1.6282872249182923e-06, + "loss": 0.08397769927978516, + "step": 6316 + }, + { + "epoch": 0.8802340974012401, + "grad_norm": 0.4822901487350464, + "learning_rate": 1.624563454955601e-06, + "loss": 0.07341158390045166, + "step": 6317 + }, + { + "epoch": 0.8803734410924545, + "grad_norm": 0.7191197872161865, + "learning_rate": 1.620843767595388e-06, + "loss": 0.0814666748046875, + "step": 6318 + }, + { + "epoch": 0.8805127847836689, + "grad_norm": 0.44242310523986816, + "learning_rate": 1.6171281636640856e-06, + "loss": 0.08624649047851562, + "step": 6319 + }, + { + "epoch": 0.8806521284748833, + "grad_norm": 0.4188825190067291, + "learning_rate": 1.6134166439872224e-06, + "loss": 0.08341789245605469, + "step": 6320 + }, + { + "epoch": 0.8807914721660977, + "grad_norm": 0.9095209836959839, + "learning_rate": 1.6097092093894074e-06, + "loss": 0.11203193664550781, + "step": 6321 + }, + { + "epoch": 0.880930815857312, + "grad_norm": 0.35737431049346924, + "learning_rate": 1.606005860694344e-06, + "loss": 0.07877731323242188, + "step": 6322 + }, + { + "epoch": 0.8810701595485264, + "grad_norm": 0.34806835651397705, + "learning_rate": 1.6023065987248388e-06, + "loss": 0.0754852294921875, + "step": 6323 + }, + { + "epoch": 0.8812095032397408, + "grad_norm": 0.35264816880226135, + "learning_rate": 1.598611424302783e-06, + "loss": 0.07625389099121094, + "step": 6324 + }, + { + "epoch": 0.8813488469309552, + "grad_norm": 0.5449998378753662, + "learning_rate": 1.5949203382491529e-06, + "loss": 0.0832977294921875, + "step": 6325 + }, + { + "epoch": 0.8814881906221695, + "grad_norm": 0.4589337706565857, + "learning_rate": 1.5912333413840331e-06, + "loss": 0.0918121337890625, + "step": 6326 + }, + { + "epoch": 0.8816275343133839, + "grad_norm": 0.42438414692878723, + "learning_rate": 1.587550434526588e-06, + "loss": 0.09199142456054688, + "step": 6327 + }, + { + "epoch": 0.8817668780045983, + "grad_norm": 0.40238797664642334, + "learning_rate": 1.5838716184950653e-06, + "loss": 0.06925392150878906, + "step": 6328 + }, + { + "epoch": 0.8819062216958127, + "grad_norm": 0.35082492232322693, + "learning_rate": 1.5801968941068247e-06, + "loss": 0.0799245834350586, + "step": 6329 + }, + { + "epoch": 0.882045565387027, + "grad_norm": 0.6989976763725281, + "learning_rate": 1.5765262621783062e-06, + "loss": 0.08462905883789062, + "step": 6330 + }, + { + "epoch": 0.8821849090782414, + "grad_norm": 0.46880578994750977, + "learning_rate": 1.572859723525031e-06, + "loss": 0.08308029174804688, + "step": 6331 + }, + { + "epoch": 0.8823242527694558, + "grad_norm": 0.30709362030029297, + "learning_rate": 1.5691972789616338e-06, + "loss": 0.07713890075683594, + "step": 6332 + }, + { + "epoch": 0.8824635964606703, + "grad_norm": 0.5104765892028809, + "learning_rate": 1.565538929301813e-06, + "loss": 0.08289146423339844, + "step": 6333 + }, + { + "epoch": 0.8826029401518847, + "grad_norm": 0.4990752339363098, + "learning_rate": 1.5618846753583805e-06, + "loss": 0.0770115852355957, + "step": 6334 + }, + { + "epoch": 0.882742283843099, + "grad_norm": 0.476068377494812, + "learning_rate": 1.558234517943218e-06, + "loss": 0.09142875671386719, + "step": 6335 + }, + { + "epoch": 0.8828816275343134, + "grad_norm": 0.458980530500412, + "learning_rate": 1.5545884578673165e-06, + "loss": 0.08397674560546875, + "step": 6336 + }, + { + "epoch": 0.8830209712255278, + "grad_norm": 0.5025445818901062, + "learning_rate": 1.5509464959407438e-06, + "loss": 0.08781051635742188, + "step": 6337 + }, + { + "epoch": 0.8831603149167422, + "grad_norm": 0.5024409890174866, + "learning_rate": 1.5473086329726638e-06, + "loss": 0.08874130249023438, + "step": 6338 + }, + { + "epoch": 0.8832996586079566, + "grad_norm": 0.500133216381073, + "learning_rate": 1.543674869771319e-06, + "loss": 0.08372211456298828, + "step": 6339 + }, + { + "epoch": 0.8834390022991709, + "grad_norm": 0.6413952708244324, + "learning_rate": 1.540045207144052e-06, + "loss": 0.08389091491699219, + "step": 6340 + }, + { + "epoch": 0.8835783459903853, + "grad_norm": 0.41215160489082336, + "learning_rate": 1.5364196458972957e-06, + "loss": 0.07223224639892578, + "step": 6341 + }, + { + "epoch": 0.8837176896815997, + "grad_norm": 0.4764760434627533, + "learning_rate": 1.5327981868365638e-06, + "loss": 0.08948707580566406, + "step": 6342 + }, + { + "epoch": 0.8838570333728141, + "grad_norm": 0.40400072932243347, + "learning_rate": 1.529180830766459e-06, + "loss": 0.08302164077758789, + "step": 6343 + }, + { + "epoch": 0.8839963770640284, + "grad_norm": 0.3861705958843231, + "learning_rate": 1.5255675784906764e-06, + "loss": 0.08445167541503906, + "step": 6344 + }, + { + "epoch": 0.8841357207552428, + "grad_norm": 0.6048950552940369, + "learning_rate": 1.5219584308120028e-06, + "loss": 0.08463382720947266, + "step": 6345 + }, + { + "epoch": 0.8842750644464572, + "grad_norm": 0.3846758008003235, + "learning_rate": 1.518353388532301e-06, + "loss": 0.08160018920898438, + "step": 6346 + }, + { + "epoch": 0.8844144081376716, + "grad_norm": 0.36984318494796753, + "learning_rate": 1.5147524524525392e-06, + "loss": 0.08013057708740234, + "step": 6347 + }, + { + "epoch": 0.884553751828886, + "grad_norm": 0.45989078283309937, + "learning_rate": 1.511155623372751e-06, + "loss": 0.09262943267822266, + "step": 6348 + }, + { + "epoch": 0.8846930955201003, + "grad_norm": 0.5449662804603577, + "learning_rate": 1.507562902092079e-06, + "loss": 0.09355545043945312, + "step": 6349 + }, + { + "epoch": 0.8848324392113147, + "grad_norm": 0.5927035212516785, + "learning_rate": 1.5039742894087384e-06, + "loss": 0.09884357452392578, + "step": 6350 + }, + { + "epoch": 0.8849717829025291, + "grad_norm": 0.3301393985748291, + "learning_rate": 1.50038978612004e-06, + "loss": 0.07071685791015625, + "step": 6351 + }, + { + "epoch": 0.8851111265937435, + "grad_norm": 0.6321145296096802, + "learning_rate": 1.4968093930223804e-06, + "loss": 0.09295082092285156, + "step": 6352 + }, + { + "epoch": 0.8852504702849578, + "grad_norm": 0.4258576035499573, + "learning_rate": 1.4932331109112387e-06, + "loss": 0.08598804473876953, + "step": 6353 + }, + { + "epoch": 0.8853898139761722, + "grad_norm": 0.5394376516342163, + "learning_rate": 1.489660940581179e-06, + "loss": 0.08057403564453125, + "step": 6354 + }, + { + "epoch": 0.8855291576673866, + "grad_norm": 0.621181309223175, + "learning_rate": 1.4860928828258604e-06, + "loss": 0.10129165649414062, + "step": 6355 + }, + { + "epoch": 0.885668501358601, + "grad_norm": 0.42420393228530884, + "learning_rate": 1.4825289384380282e-06, + "loss": 0.08558464050292969, + "step": 6356 + }, + { + "epoch": 0.8858078450498154, + "grad_norm": 0.44859930872917175, + "learning_rate": 1.4789691082095004e-06, + "loss": 0.07982826232910156, + "step": 6357 + }, + { + "epoch": 0.8859471887410297, + "grad_norm": 0.5977414846420288, + "learning_rate": 1.4754133929311975e-06, + "loss": 0.09561729431152344, + "step": 6358 + }, + { + "epoch": 0.8860865324322441, + "grad_norm": 0.3958943784236908, + "learning_rate": 1.4718617933931146e-06, + "loss": 0.08152961730957031, + "step": 6359 + }, + { + "epoch": 0.8862258761234585, + "grad_norm": 0.5097781419754028, + "learning_rate": 1.4683143103843355e-06, + "loss": 0.07590675354003906, + "step": 6360 + }, + { + "epoch": 0.8863652198146729, + "grad_norm": 0.4506061375141144, + "learning_rate": 1.464770944693028e-06, + "loss": 0.08349227905273438, + "step": 6361 + }, + { + "epoch": 0.8865045635058872, + "grad_norm": 0.4598330855369568, + "learning_rate": 1.4612316971064555e-06, + "loss": 0.0760812759399414, + "step": 6362 + }, + { + "epoch": 0.8866439071971016, + "grad_norm": 0.8149619698524475, + "learning_rate": 1.4576965684109534e-06, + "loss": 0.11867141723632812, + "step": 6363 + }, + { + "epoch": 0.886783250888316, + "grad_norm": 0.4109499752521515, + "learning_rate": 1.4541655593919402e-06, + "loss": 0.09175491333007812, + "step": 6364 + }, + { + "epoch": 0.8869225945795304, + "grad_norm": 0.465846449136734, + "learning_rate": 1.4506386708339325e-06, + "loss": 0.08578348159790039, + "step": 6365 + }, + { + "epoch": 0.8870619382707448, + "grad_norm": 0.5545276403427124, + "learning_rate": 1.4471159035205262e-06, + "loss": 0.0914459228515625, + "step": 6366 + }, + { + "epoch": 0.8872012819619591, + "grad_norm": 0.9236868023872375, + "learning_rate": 1.4435972582343948e-06, + "loss": 0.1016225814819336, + "step": 6367 + }, + { + "epoch": 0.8873406256531735, + "grad_norm": 0.41003742814064026, + "learning_rate": 1.4400827357573043e-06, + "loss": 0.09086036682128906, + "step": 6368 + }, + { + "epoch": 0.8874799693443879, + "grad_norm": 0.5237320065498352, + "learning_rate": 1.4365723368700968e-06, + "loss": 0.08029747009277344, + "step": 6369 + }, + { + "epoch": 0.8876193130356023, + "grad_norm": 0.424459308385849, + "learning_rate": 1.4330660623527081e-06, + "loss": 0.08044815063476562, + "step": 6370 + }, + { + "epoch": 0.8877586567268166, + "grad_norm": 0.40138277411460876, + "learning_rate": 1.4295639129841466e-06, + "loss": 0.08036136627197266, + "step": 6371 + }, + { + "epoch": 0.887898000418031, + "grad_norm": 0.3817543089389801, + "learning_rate": 1.4260658895425162e-06, + "loss": 0.07809066772460938, + "step": 6372 + }, + { + "epoch": 0.8880373441092455, + "grad_norm": 0.4389804005622864, + "learning_rate": 1.4225719928049953e-06, + "loss": 0.08138847351074219, + "step": 6373 + }, + { + "epoch": 0.8881766878004599, + "grad_norm": 0.5515167117118835, + "learning_rate": 1.4190822235478496e-06, + "loss": 0.10170555114746094, + "step": 6374 + }, + { + "epoch": 0.8883160314916743, + "grad_norm": 0.36344870924949646, + "learning_rate": 1.415596582546419e-06, + "loss": 0.07912826538085938, + "step": 6375 + }, + { + "epoch": 0.8884553751828886, + "grad_norm": 0.5920867919921875, + "learning_rate": 1.4121150705751396e-06, + "loss": 0.08712196350097656, + "step": 6376 + }, + { + "epoch": 0.888594718874103, + "grad_norm": 0.3525214195251465, + "learning_rate": 1.4086376884075282e-06, + "loss": 0.06971549987792969, + "step": 6377 + }, + { + "epoch": 0.8887340625653174, + "grad_norm": 0.4676486849784851, + "learning_rate": 1.4051644368161688e-06, + "loss": 0.08191585540771484, + "step": 6378 + }, + { + "epoch": 0.8888734062565318, + "grad_norm": 0.35559090971946716, + "learning_rate": 1.4016953165727487e-06, + "loss": 0.07597541809082031, + "step": 6379 + }, + { + "epoch": 0.8890127499477462, + "grad_norm": 0.4510944187641144, + "learning_rate": 1.398230328448018e-06, + "loss": 0.08312606811523438, + "step": 6380 + }, + { + "epoch": 0.8891520936389605, + "grad_norm": 0.417629599571228, + "learning_rate": 1.3947694732118278e-06, + "loss": 0.0817713737487793, + "step": 6381 + }, + { + "epoch": 0.8892914373301749, + "grad_norm": 0.4177243113517761, + "learning_rate": 1.3913127516330916e-06, + "loss": 0.0850973129272461, + "step": 6382 + }, + { + "epoch": 0.8894307810213893, + "grad_norm": 0.5650798082351685, + "learning_rate": 1.3878601644798173e-06, + "loss": 0.09558820724487305, + "step": 6383 + }, + { + "epoch": 0.8895701247126037, + "grad_norm": 0.5262091755867004, + "learning_rate": 1.3844117125190982e-06, + "loss": 0.08255577087402344, + "step": 6384 + }, + { + "epoch": 0.889709468403818, + "grad_norm": 0.4494807720184326, + "learning_rate": 1.3809673965170923e-06, + "loss": 0.07415103912353516, + "step": 6385 + }, + { + "epoch": 0.8898488120950324, + "grad_norm": 0.5449673533439636, + "learning_rate": 1.3775272172390497e-06, + "loss": 0.10709571838378906, + "step": 6386 + }, + { + "epoch": 0.8899881557862468, + "grad_norm": 0.48424261808395386, + "learning_rate": 1.3740911754493014e-06, + "loss": 0.09095096588134766, + "step": 6387 + }, + { + "epoch": 0.8901274994774612, + "grad_norm": 0.41402631998062134, + "learning_rate": 1.3706592719112588e-06, + "loss": 0.08230209350585938, + "step": 6388 + }, + { + "epoch": 0.8902668431686755, + "grad_norm": 0.47235023975372314, + "learning_rate": 1.3672315073874098e-06, + "loss": 0.09070205688476562, + "step": 6389 + }, + { + "epoch": 0.8904061868598899, + "grad_norm": 0.5694523453712463, + "learning_rate": 1.3638078826393296e-06, + "loss": 0.08778858184814453, + "step": 6390 + }, + { + "epoch": 0.8905455305511043, + "grad_norm": 0.39232346415519714, + "learning_rate": 1.3603883984276656e-06, + "loss": 0.07765388488769531, + "step": 6391 + }, + { + "epoch": 0.8906848742423187, + "grad_norm": 0.39279985427856445, + "learning_rate": 1.3569730555121452e-06, + "loss": 0.07848548889160156, + "step": 6392 + }, + { + "epoch": 0.8908242179335331, + "grad_norm": 0.4650666415691376, + "learning_rate": 1.353561854651586e-06, + "loss": 0.08896446228027344, + "step": 6393 + }, + { + "epoch": 0.8909635616247474, + "grad_norm": 0.5000255107879639, + "learning_rate": 1.3501547966038775e-06, + "loss": 0.08603096008300781, + "step": 6394 + }, + { + "epoch": 0.8911029053159618, + "grad_norm": 0.7026093006134033, + "learning_rate": 1.3467518821259963e-06, + "loss": 0.08909416198730469, + "step": 6395 + }, + { + "epoch": 0.8912422490071762, + "grad_norm": 0.5603627562522888, + "learning_rate": 1.3433531119739794e-06, + "loss": 0.08630180358886719, + "step": 6396 + }, + { + "epoch": 0.8913815926983906, + "grad_norm": 0.6309481263160706, + "learning_rate": 1.3399584869029613e-06, + "loss": 0.11292648315429688, + "step": 6397 + }, + { + "epoch": 0.891520936389605, + "grad_norm": 0.45708900690078735, + "learning_rate": 1.3365680076671561e-06, + "loss": 0.08647346496582031, + "step": 6398 + }, + { + "epoch": 0.8916602800808193, + "grad_norm": 0.3597449064254761, + "learning_rate": 1.3331816750198412e-06, + "loss": 0.08284568786621094, + "step": 6399 + }, + { + "epoch": 0.8917996237720337, + "grad_norm": 0.4600237309932709, + "learning_rate": 1.3297994897133927e-06, + "loss": 0.08671951293945312, + "step": 6400 + }, + { + "epoch": 0.8919389674632481, + "grad_norm": 0.39665377140045166, + "learning_rate": 1.326421452499247e-06, + "loss": 0.08498859405517578, + "step": 6401 + }, + { + "epoch": 0.8920783111544625, + "grad_norm": 0.5649403929710388, + "learning_rate": 1.3230475641279306e-06, + "loss": 0.09745025634765625, + "step": 6402 + }, + { + "epoch": 0.8922176548456768, + "grad_norm": 0.4218357503414154, + "learning_rate": 1.3196778253490417e-06, + "loss": 0.08579254150390625, + "step": 6403 + }, + { + "epoch": 0.8923569985368912, + "grad_norm": 0.7025514841079712, + "learning_rate": 1.3163122369112591e-06, + "loss": 0.10044479370117188, + "step": 6404 + }, + { + "epoch": 0.8924963422281056, + "grad_norm": 0.38520827889442444, + "learning_rate": 1.312950799562347e-06, + "loss": 0.07383346557617188, + "step": 6405 + }, + { + "epoch": 0.89263568591932, + "grad_norm": 0.46286869049072266, + "learning_rate": 1.3095935140491323e-06, + "loss": 0.08055496215820312, + "step": 6406 + }, + { + "epoch": 0.8927750296105343, + "grad_norm": 0.33698034286499023, + "learning_rate": 1.3062403811175272e-06, + "loss": 0.07186508178710938, + "step": 6407 + }, + { + "epoch": 0.8929143733017487, + "grad_norm": 0.3547825813293457, + "learning_rate": 1.302891401512525e-06, + "loss": 0.07463645935058594, + "step": 6408 + }, + { + "epoch": 0.8930537169929631, + "grad_norm": 0.6862715482711792, + "learning_rate": 1.2995465759781944e-06, + "loss": 0.09093666076660156, + "step": 6409 + }, + { + "epoch": 0.8931930606841775, + "grad_norm": 0.3969494700431824, + "learning_rate": 1.2962059052576703e-06, + "loss": 0.07637226581573486, + "step": 6410 + }, + { + "epoch": 0.8933324043753919, + "grad_norm": 0.5649289488792419, + "learning_rate": 1.2928693900931856e-06, + "loss": 0.098297119140625, + "step": 6411 + }, + { + "epoch": 0.8934717480666062, + "grad_norm": 0.3889913856983185, + "learning_rate": 1.2895370312260247e-06, + "loss": 0.08074092864990234, + "step": 6412 + }, + { + "epoch": 0.8936110917578207, + "grad_norm": 0.3446768820285797, + "learning_rate": 1.2862088293965736e-06, + "loss": 0.07930564880371094, + "step": 6413 + }, + { + "epoch": 0.8937504354490351, + "grad_norm": 0.47524386644363403, + "learning_rate": 1.2828847853442738e-06, + "loss": 0.07949066162109375, + "step": 6414 + }, + { + "epoch": 0.8938897791402495, + "grad_norm": 0.4290257692337036, + "learning_rate": 1.2795648998076572e-06, + "loss": 0.077239990234375, + "step": 6415 + }, + { + "epoch": 0.8940291228314639, + "grad_norm": 0.40120795369148254, + "learning_rate": 1.2762491735243264e-06, + "loss": 0.0703887939453125, + "step": 6416 + }, + { + "epoch": 0.8941684665226782, + "grad_norm": 0.37266433238983154, + "learning_rate": 1.2729376072309597e-06, + "loss": 0.0791635513305664, + "step": 6417 + }, + { + "epoch": 0.8943078102138926, + "grad_norm": 0.6105780005455017, + "learning_rate": 1.2696302016633078e-06, + "loss": 0.09148025512695312, + "step": 6418 + }, + { + "epoch": 0.894447153905107, + "grad_norm": 0.46047353744506836, + "learning_rate": 1.266326957556201e-06, + "loss": 0.09693336486816406, + "step": 6419 + }, + { + "epoch": 0.8945864975963214, + "grad_norm": 0.6748453974723816, + "learning_rate": 1.2630278756435522e-06, + "loss": 0.09923076629638672, + "step": 6420 + }, + { + "epoch": 0.8947258412875357, + "grad_norm": 0.40677177906036377, + "learning_rate": 1.2597329566583372e-06, + "loss": 0.0769658088684082, + "step": 6421 + }, + { + "epoch": 0.8948651849787501, + "grad_norm": 0.5407921671867371, + "learning_rate": 1.2564422013326083e-06, + "loss": 0.10676002502441406, + "step": 6422 + }, + { + "epoch": 0.8950045286699645, + "grad_norm": 0.781747579574585, + "learning_rate": 1.2531556103974984e-06, + "loss": 0.0881643295288086, + "step": 6423 + }, + { + "epoch": 0.8951438723611789, + "grad_norm": 0.5875821113586426, + "learning_rate": 1.249873184583219e-06, + "loss": 0.10013866424560547, + "step": 6424 + }, + { + "epoch": 0.8952832160523932, + "grad_norm": 0.41099223494529724, + "learning_rate": 1.2465949246190422e-06, + "loss": 0.08477401733398438, + "step": 6425 + }, + { + "epoch": 0.8954225597436076, + "grad_norm": 0.44821012020111084, + "learning_rate": 1.2433208312333255e-06, + "loss": 0.07482528686523438, + "step": 6426 + }, + { + "epoch": 0.895561903434822, + "grad_norm": 0.6248829960823059, + "learning_rate": 1.2400509051535026e-06, + "loss": 0.10037422180175781, + "step": 6427 + }, + { + "epoch": 0.8957012471260364, + "grad_norm": 0.3791103661060333, + "learning_rate": 1.2367851471060654e-06, + "loss": 0.0788583755493164, + "step": 6428 + }, + { + "epoch": 0.8958405908172508, + "grad_norm": 0.5026759505271912, + "learning_rate": 1.2335235578165983e-06, + "loss": 0.08874320983886719, + "step": 6429 + }, + { + "epoch": 0.8959799345084651, + "grad_norm": 0.5596069097518921, + "learning_rate": 1.2302661380097547e-06, + "loss": 0.08631706237792969, + "step": 6430 + }, + { + "epoch": 0.8961192781996795, + "grad_norm": 0.4722602069377899, + "learning_rate": 1.227012888409249e-06, + "loss": 0.08994364738464355, + "step": 6431 + }, + { + "epoch": 0.8962586218908939, + "grad_norm": 0.6783243417739868, + "learning_rate": 1.2237638097378902e-06, + "loss": 0.09749794006347656, + "step": 6432 + }, + { + "epoch": 0.8963979655821083, + "grad_norm": 0.3757634460926056, + "learning_rate": 1.2205189027175402e-06, + "loss": 0.07761001586914062, + "step": 6433 + }, + { + "epoch": 0.8965373092733226, + "grad_norm": 0.6837706565856934, + "learning_rate": 1.2172781680691515e-06, + "loss": 0.09367942810058594, + "step": 6434 + }, + { + "epoch": 0.896676652964537, + "grad_norm": 0.5794640779495239, + "learning_rate": 1.2140416065127324e-06, + "loss": 0.10168647766113281, + "step": 6435 + }, + { + "epoch": 0.8968159966557514, + "grad_norm": 0.39136478304862976, + "learning_rate": 1.2108092187673791e-06, + "loss": 0.07783317565917969, + "step": 6436 + }, + { + "epoch": 0.8969553403469658, + "grad_norm": 0.4368220567703247, + "learning_rate": 1.2075810055512549e-06, + "loss": 0.08948326110839844, + "step": 6437 + }, + { + "epoch": 0.8970946840381802, + "grad_norm": 0.48430946469306946, + "learning_rate": 1.2043569675815924e-06, + "loss": 0.08983802795410156, + "step": 6438 + }, + { + "epoch": 0.8972340277293945, + "grad_norm": 0.5460370779037476, + "learning_rate": 1.201137105574699e-06, + "loss": 0.0887460708618164, + "step": 6439 + }, + { + "epoch": 0.8973733714206089, + "grad_norm": 0.6198356747627258, + "learning_rate": 1.1979214202459532e-06, + "loss": 0.10152339935302734, + "step": 6440 + }, + { + "epoch": 0.8975127151118233, + "grad_norm": 0.5421640872955322, + "learning_rate": 1.1947099123098126e-06, + "loss": 0.09560537338256836, + "step": 6441 + }, + { + "epoch": 0.8976520588030377, + "grad_norm": 0.6842681169509888, + "learning_rate": 1.1915025824797976e-06, + "loss": 0.10262680053710938, + "step": 6442 + }, + { + "epoch": 0.897791402494252, + "grad_norm": 0.47645917534828186, + "learning_rate": 1.1882994314685003e-06, + "loss": 0.08349990844726562, + "step": 6443 + }, + { + "epoch": 0.8979307461854664, + "grad_norm": 0.4418428838253021, + "learning_rate": 1.1851004599875915e-06, + "loss": 0.07695960998535156, + "step": 6444 + }, + { + "epoch": 0.8980700898766808, + "grad_norm": 0.47461268305778503, + "learning_rate": 1.1819056687478114e-06, + "loss": 0.08094596862792969, + "step": 6445 + }, + { + "epoch": 0.8982094335678952, + "grad_norm": 1.044643759727478, + "learning_rate": 1.1787150584589657e-06, + "loss": 0.09734702110290527, + "step": 6446 + }, + { + "epoch": 0.8983487772591096, + "grad_norm": 0.5594095587730408, + "learning_rate": 1.1755286298299339e-06, + "loss": 0.08805203437805176, + "step": 6447 + }, + { + "epoch": 0.8984881209503239, + "grad_norm": 0.3776874840259552, + "learning_rate": 1.1723463835686765e-06, + "loss": 0.07898330688476562, + "step": 6448 + }, + { + "epoch": 0.8986274646415383, + "grad_norm": 0.6132358312606812, + "learning_rate": 1.169168320382208e-06, + "loss": 0.08117485046386719, + "step": 6449 + }, + { + "epoch": 0.8987668083327527, + "grad_norm": 0.383466899394989, + "learning_rate": 1.165994440976621e-06, + "loss": 0.0841522216796875, + "step": 6450 + }, + { + "epoch": 0.8989061520239671, + "grad_norm": 0.3943922817707062, + "learning_rate": 1.162824746057083e-06, + "loss": 0.07530975341796875, + "step": 6451 + }, + { + "epoch": 0.8990454957151814, + "grad_norm": 0.5108675360679626, + "learning_rate": 1.159659236327828e-06, + "loss": 0.08630943298339844, + "step": 6452 + }, + { + "epoch": 0.8991848394063959, + "grad_norm": 0.3421303927898407, + "learning_rate": 1.1564979124921582e-06, + "loss": 0.07792282104492188, + "step": 6453 + }, + { + "epoch": 0.8993241830976103, + "grad_norm": 0.5256783366203308, + "learning_rate": 1.153340775252445e-06, + "loss": 0.10101890563964844, + "step": 6454 + }, + { + "epoch": 0.8994635267888247, + "grad_norm": 0.5435088872909546, + "learning_rate": 1.1501878253101362e-06, + "loss": 0.08436775207519531, + "step": 6455 + }, + { + "epoch": 0.8996028704800391, + "grad_norm": 0.4733598530292511, + "learning_rate": 1.1470390633657468e-06, + "loss": 0.09099006652832031, + "step": 6456 + }, + { + "epoch": 0.8997422141712534, + "grad_norm": 0.5212000012397766, + "learning_rate": 1.1438944901188532e-06, + "loss": 0.0834054946899414, + "step": 6457 + }, + { + "epoch": 0.8998815578624678, + "grad_norm": 0.40309590101242065, + "learning_rate": 1.1407541062681138e-06, + "loss": 0.07789039611816406, + "step": 6458 + }, + { + "epoch": 0.9000209015536822, + "grad_norm": 0.7016037106513977, + "learning_rate": 1.1376179125112508e-06, + "loss": 0.12248611450195312, + "step": 6459 + }, + { + "epoch": 0.9001602452448966, + "grad_norm": 0.6030941009521484, + "learning_rate": 1.1344859095450468e-06, + "loss": 0.09895515441894531, + "step": 6460 + }, + { + "epoch": 0.900299588936111, + "grad_norm": 0.4441756010055542, + "learning_rate": 1.1313580980653671e-06, + "loss": 0.09642410278320312, + "step": 6461 + }, + { + "epoch": 0.9004389326273253, + "grad_norm": 0.5343574285507202, + "learning_rate": 1.1282344787671429e-06, + "loss": 0.09718513488769531, + "step": 6462 + }, + { + "epoch": 0.9005782763185397, + "grad_norm": 0.4648703634738922, + "learning_rate": 1.1251150523443676e-06, + "loss": 0.07480144500732422, + "step": 6463 + }, + { + "epoch": 0.9007176200097541, + "grad_norm": 0.4142240285873413, + "learning_rate": 1.121999819490105e-06, + "loss": 0.08765983581542969, + "step": 6464 + }, + { + "epoch": 0.9008569637009685, + "grad_norm": 0.39899659156799316, + "learning_rate": 1.1188887808964878e-06, + "loss": 0.06779813766479492, + "step": 6465 + }, + { + "epoch": 0.9009963073921828, + "grad_norm": 0.6355984210968018, + "learning_rate": 1.1157819372547252e-06, + "loss": 0.09247779846191406, + "step": 6466 + }, + { + "epoch": 0.9011356510833972, + "grad_norm": 0.3311305344104767, + "learning_rate": 1.1126792892550786e-06, + "loss": 0.07645606994628906, + "step": 6467 + }, + { + "epoch": 0.9012749947746116, + "grad_norm": 0.5016315579414368, + "learning_rate": 1.10958083758689e-06, + "loss": 0.08672046661376953, + "step": 6468 + }, + { + "epoch": 0.901414338465826, + "grad_norm": 0.5514034628868103, + "learning_rate": 1.1064865829385662e-06, + "loss": 0.08151054382324219, + "step": 6469 + }, + { + "epoch": 0.9015536821570403, + "grad_norm": 0.4267237186431885, + "learning_rate": 1.1033965259975777e-06, + "loss": 0.08419609069824219, + "step": 6470 + }, + { + "epoch": 0.9016930258482547, + "grad_norm": 0.5240784287452698, + "learning_rate": 1.1003106674504592e-06, + "loss": 0.09228682518005371, + "step": 6471 + }, + { + "epoch": 0.9018323695394691, + "grad_norm": 0.6452212333679199, + "learning_rate": 1.0972290079828252e-06, + "loss": 0.0912017822265625, + "step": 6472 + }, + { + "epoch": 0.9019717132306835, + "grad_norm": 0.3793030381202698, + "learning_rate": 1.094151548279352e-06, + "loss": 0.0841522216796875, + "step": 6473 + }, + { + "epoch": 0.9021110569218979, + "grad_norm": 0.31713631749153137, + "learning_rate": 1.0910782890237747e-06, + "loss": 0.07161331176757812, + "step": 6474 + }, + { + "epoch": 0.9022504006131122, + "grad_norm": 0.6860933899879456, + "learning_rate": 1.088009230898901e-06, + "loss": 0.10318183898925781, + "step": 6475 + }, + { + "epoch": 0.9023897443043266, + "grad_norm": 0.6143903136253357, + "learning_rate": 1.0849443745866095e-06, + "loss": 0.08656501770019531, + "step": 6476 + }, + { + "epoch": 0.902529087995541, + "grad_norm": 0.4611068069934845, + "learning_rate": 1.0818837207678423e-06, + "loss": 0.07996368408203125, + "step": 6477 + }, + { + "epoch": 0.9026684316867554, + "grad_norm": 0.5913567543029785, + "learning_rate": 1.0788272701226e-06, + "loss": 0.0882863998413086, + "step": 6478 + }, + { + "epoch": 0.9028077753779697, + "grad_norm": 0.40460845828056335, + "learning_rate": 1.0757750233299658e-06, + "loss": 0.0894012451171875, + "step": 6479 + }, + { + "epoch": 0.9029471190691841, + "grad_norm": 0.366120845079422, + "learning_rate": 1.0727269810680685e-06, + "loss": 0.07786941528320312, + "step": 6480 + }, + { + "epoch": 0.9030864627603985, + "grad_norm": 0.4122113883495331, + "learning_rate": 1.0696831440141242e-06, + "loss": 0.07747077941894531, + "step": 6481 + }, + { + "epoch": 0.9032258064516129, + "grad_norm": 0.5315484404563904, + "learning_rate": 1.0666435128443942e-06, + "loss": 0.1012115478515625, + "step": 6482 + }, + { + "epoch": 0.9033651501428273, + "grad_norm": 0.5365834832191467, + "learning_rate": 1.0636080882342203e-06, + "loss": 0.07804298400878906, + "step": 6483 + }, + { + "epoch": 0.9035044938340416, + "grad_norm": 0.3930744528770447, + "learning_rate": 1.0605768708580078e-06, + "loss": 0.08449172973632812, + "step": 6484 + }, + { + "epoch": 0.903643837525256, + "grad_norm": 0.5637907981872559, + "learning_rate": 1.0575498613892199e-06, + "loss": 0.09557151794433594, + "step": 6485 + }, + { + "epoch": 0.9037831812164704, + "grad_norm": 0.586622953414917, + "learning_rate": 1.0545270605003855e-06, + "loss": 0.09897899627685547, + "step": 6486 + }, + { + "epoch": 0.9039225249076848, + "grad_norm": 0.5867285132408142, + "learning_rate": 1.0515084688631071e-06, + "loss": 0.09984016418457031, + "step": 6487 + }, + { + "epoch": 0.9040618685988991, + "grad_norm": 0.6026041507720947, + "learning_rate": 1.0484940871480464e-06, + "loss": 0.08056831359863281, + "step": 6488 + }, + { + "epoch": 0.9042012122901135, + "grad_norm": 0.45551860332489014, + "learning_rate": 1.0454839160249274e-06, + "loss": 0.08339881896972656, + "step": 6489 + }, + { + "epoch": 0.9043405559813279, + "grad_norm": 0.5722783207893372, + "learning_rate": 1.0424779561625465e-06, + "loss": 0.10648727416992188, + "step": 6490 + }, + { + "epoch": 0.9044798996725423, + "grad_norm": 0.48218291997909546, + "learning_rate": 1.0394762082287557e-06, + "loss": 0.09201622009277344, + "step": 6491 + }, + { + "epoch": 0.9046192433637567, + "grad_norm": 0.4056242108345032, + "learning_rate": 1.036478672890473e-06, + "loss": 0.071868896484375, + "step": 6492 + }, + { + "epoch": 0.904758587054971, + "grad_norm": 0.4452251195907593, + "learning_rate": 1.0334853508136854e-06, + "loss": 0.08147430419921875, + "step": 6493 + }, + { + "epoch": 0.9048979307461855, + "grad_norm": 0.5563366413116455, + "learning_rate": 1.030496242663439e-06, + "loss": 0.08441352844238281, + "step": 6494 + }, + { + "epoch": 0.9050372744373999, + "grad_norm": 0.32269006967544556, + "learning_rate": 1.0275113491038512e-06, + "loss": 0.07616043090820312, + "step": 6495 + }, + { + "epoch": 0.9051766181286143, + "grad_norm": 0.4313589930534363, + "learning_rate": 1.0245306707980873e-06, + "loss": 0.0862884521484375, + "step": 6496 + }, + { + "epoch": 0.9053159618198287, + "grad_norm": 0.409189909696579, + "learning_rate": 1.021554208408393e-06, + "loss": 0.07885360717773438, + "step": 6497 + }, + { + "epoch": 0.905455305511043, + "grad_norm": 0.502906858921051, + "learning_rate": 1.018581962596068e-06, + "loss": 0.094268798828125, + "step": 6498 + }, + { + "epoch": 0.9055946492022574, + "grad_norm": 0.6183108687400818, + "learning_rate": 1.0156139340214755e-06, + "loss": 0.09248316287994385, + "step": 6499 + }, + { + "epoch": 0.9057339928934718, + "grad_norm": 0.49334415793418884, + "learning_rate": 1.01265012334405e-06, + "loss": 0.08922004699707031, + "step": 6500 + }, + { + "epoch": 0.9058733365846862, + "grad_norm": 0.6824068427085876, + "learning_rate": 1.009690531222276e-06, + "loss": 0.11182785034179688, + "step": 6501 + }, + { + "epoch": 0.9060126802759005, + "grad_norm": 0.38399577140808105, + "learning_rate": 1.0067351583137119e-06, + "loss": 0.07391929626464844, + "step": 6502 + }, + { + "epoch": 0.9061520239671149, + "grad_norm": 0.39867305755615234, + "learning_rate": 1.0037840052749682e-06, + "loss": 0.07080841064453125, + "step": 6503 + }, + { + "epoch": 0.9062913676583293, + "grad_norm": 0.7181921005249023, + "learning_rate": 1.000837072761729e-06, + "loss": 0.08862018585205078, + "step": 6504 + }, + { + "epoch": 0.9064307113495437, + "grad_norm": 0.4832775592803955, + "learning_rate": 9.978943614287374e-07, + "loss": 0.07720279693603516, + "step": 6505 + }, + { + "epoch": 0.906570055040758, + "grad_norm": 0.49359485507011414, + "learning_rate": 9.949558719297924e-07, + "loss": 0.08584785461425781, + "step": 6506 + }, + { + "epoch": 0.9067093987319724, + "grad_norm": 0.4017866849899292, + "learning_rate": 9.920216049177566e-07, + "loss": 0.07062435150146484, + "step": 6507 + }, + { + "epoch": 0.9068487424231868, + "grad_norm": 0.3613189160823822, + "learning_rate": 9.890915610445617e-07, + "loss": 0.07014286518096924, + "step": 6508 + }, + { + "epoch": 0.9069880861144012, + "grad_norm": 0.5319538712501526, + "learning_rate": 9.861657409611958e-07, + "loss": 0.09761238098144531, + "step": 6509 + }, + { + "epoch": 0.9071274298056156, + "grad_norm": 0.5773212313652039, + "learning_rate": 9.832441453177099e-07, + "loss": 0.08554744720458984, + "step": 6510 + }, + { + "epoch": 0.9072667734968299, + "grad_norm": 0.35067641735076904, + "learning_rate": 9.803267747632162e-07, + "loss": 0.06833457946777344, + "step": 6511 + }, + { + "epoch": 0.9074061171880443, + "grad_norm": 0.37243130803108215, + "learning_rate": 9.774136299458825e-07, + "loss": 0.07568359375, + "step": 6512 + }, + { + "epoch": 0.9075454608792587, + "grad_norm": 0.3575122356414795, + "learning_rate": 9.745047115129513e-07, + "loss": 0.06916666030883789, + "step": 6513 + }, + { + "epoch": 0.9076848045704731, + "grad_norm": 0.37038612365722656, + "learning_rate": 9.716000201107102e-07, + "loss": 0.07843589782714844, + "step": 6514 + }, + { + "epoch": 0.9078241482616874, + "grad_norm": 0.4891580045223236, + "learning_rate": 9.686995563845204e-07, + "loss": 0.09379768371582031, + "step": 6515 + }, + { + "epoch": 0.9079634919529018, + "grad_norm": 0.4679016172885895, + "learning_rate": 9.658033209787998e-07, + "loss": 0.0887908935546875, + "step": 6516 + }, + { + "epoch": 0.9081028356441162, + "grad_norm": 0.3828592300415039, + "learning_rate": 9.629113145370228e-07, + "loss": 0.08316230773925781, + "step": 6517 + }, + { + "epoch": 0.9082421793353306, + "grad_norm": 0.5603492259979248, + "learning_rate": 9.60023537701724e-07, + "loss": 0.10734176635742188, + "step": 6518 + }, + { + "epoch": 0.908381523026545, + "grad_norm": 0.4070824086666107, + "learning_rate": 9.57139991114504e-07, + "loss": 0.07339668273925781, + "step": 6519 + }, + { + "epoch": 0.9085208667177593, + "grad_norm": 0.5390711426734924, + "learning_rate": 9.542606754160277e-07, + "loss": 0.08376789093017578, + "step": 6520 + }, + { + "epoch": 0.9086602104089737, + "grad_norm": 0.5579406023025513, + "learning_rate": 9.513855912460057e-07, + "loss": 0.08527946472167969, + "step": 6521 + }, + { + "epoch": 0.9087995541001881, + "grad_norm": 0.5047799348831177, + "learning_rate": 9.485147392432159e-07, + "loss": 0.08408927917480469, + "step": 6522 + }, + { + "epoch": 0.9089388977914025, + "grad_norm": 0.5802271962165833, + "learning_rate": 9.456481200454992e-07, + "loss": 0.08719015121459961, + "step": 6523 + }, + { + "epoch": 0.9090782414826168, + "grad_norm": 0.543469250202179, + "learning_rate": 9.427857342897528e-07, + "loss": 0.08871841430664062, + "step": 6524 + }, + { + "epoch": 0.9092175851738312, + "grad_norm": 0.4871247708797455, + "learning_rate": 9.399275826119325e-07, + "loss": 0.07925891876220703, + "step": 6525 + }, + { + "epoch": 0.9093569288650456, + "grad_norm": 0.4907892644405365, + "learning_rate": 9.370736656470525e-07, + "loss": 0.08393478393554688, + "step": 6526 + }, + { + "epoch": 0.90949627255626, + "grad_norm": 0.5979554057121277, + "learning_rate": 9.342239840291967e-07, + "loss": 0.07831335067749023, + "step": 6527 + }, + { + "epoch": 0.9096356162474744, + "grad_norm": 0.34017741680145264, + "learning_rate": 9.313785383914852e-07, + "loss": 0.06641340255737305, + "step": 6528 + }, + { + "epoch": 0.9097749599386887, + "grad_norm": 0.4323507845401764, + "learning_rate": 9.285373293661193e-07, + "loss": 0.08502006530761719, + "step": 6529 + }, + { + "epoch": 0.9099143036299031, + "grad_norm": 0.4210036098957062, + "learning_rate": 9.257003575843537e-07, + "loss": 0.09259796142578125, + "step": 6530 + }, + { + "epoch": 0.9100536473211175, + "grad_norm": 0.526805579662323, + "learning_rate": 9.228676236764911e-07, + "loss": 0.10302925109863281, + "step": 6531 + }, + { + "epoch": 0.9101929910123319, + "grad_norm": 0.38294661045074463, + "learning_rate": 9.200391282719079e-07, + "loss": 0.07758140563964844, + "step": 6532 + }, + { + "epoch": 0.9103323347035462, + "grad_norm": 0.42574480175971985, + "learning_rate": 9.172148719990237e-07, + "loss": 0.08785629272460938, + "step": 6533 + }, + { + "epoch": 0.9104716783947607, + "grad_norm": 0.45163431763648987, + "learning_rate": 9.143948554853299e-07, + "loss": 0.08731317520141602, + "step": 6534 + }, + { + "epoch": 0.9106110220859751, + "grad_norm": 0.7292478680610657, + "learning_rate": 9.115790793573653e-07, + "loss": 0.10149860382080078, + "step": 6535 + }, + { + "epoch": 0.9107503657771895, + "grad_norm": 0.527208685874939, + "learning_rate": 9.08767544240734e-07, + "loss": 0.10195350646972656, + "step": 6536 + }, + { + "epoch": 0.9108897094684039, + "grad_norm": 0.4287576973438263, + "learning_rate": 9.059602507600962e-07, + "loss": 0.08968353271484375, + "step": 6537 + }, + { + "epoch": 0.9110290531596182, + "grad_norm": 0.47948089241981506, + "learning_rate": 9.031571995391664e-07, + "loss": 0.07014179229736328, + "step": 6538 + }, + { + "epoch": 0.9111683968508326, + "grad_norm": 0.5637090802192688, + "learning_rate": 9.003583912007152e-07, + "loss": 0.09303092956542969, + "step": 6539 + }, + { + "epoch": 0.911307740542047, + "grad_norm": 0.43494492769241333, + "learning_rate": 8.975638263665787e-07, + "loss": 0.09068870544433594, + "step": 6540 + }, + { + "epoch": 0.9114470842332614, + "grad_norm": 0.7020281553268433, + "learning_rate": 8.947735056576468e-07, + "loss": 0.09417343139648438, + "step": 6541 + }, + { + "epoch": 0.9115864279244758, + "grad_norm": 0.5241010785102844, + "learning_rate": 8.919874296938569e-07, + "loss": 0.09886360168457031, + "step": 6542 + }, + { + "epoch": 0.9117257716156901, + "grad_norm": 0.4367097318172455, + "learning_rate": 8.892055990942228e-07, + "loss": 0.08441352844238281, + "step": 6543 + }, + { + "epoch": 0.9118651153069045, + "grad_norm": 0.3985019028186798, + "learning_rate": 8.864280144767923e-07, + "loss": 0.08358955383300781, + "step": 6544 + }, + { + "epoch": 0.9120044589981189, + "grad_norm": 0.3929474353790283, + "learning_rate": 8.836546764586895e-07, + "loss": 0.07632064819335938, + "step": 6545 + }, + { + "epoch": 0.9121438026893333, + "grad_norm": 1.0007405281066895, + "learning_rate": 8.808855856560838e-07, + "loss": 0.10637092590332031, + "step": 6546 + }, + { + "epoch": 0.9122831463805476, + "grad_norm": 0.3755791187286377, + "learning_rate": 8.781207426842031e-07, + "loss": 0.08441734313964844, + "step": 6547 + }, + { + "epoch": 0.912422490071762, + "grad_norm": 0.4387824237346649, + "learning_rate": 8.753601481573337e-07, + "loss": 0.09298896789550781, + "step": 6548 + }, + { + "epoch": 0.9125618337629764, + "grad_norm": 0.7849602699279785, + "learning_rate": 8.726038026888184e-07, + "loss": 0.09986114501953125, + "step": 6549 + }, + { + "epoch": 0.9127011774541908, + "grad_norm": 0.5194541215896606, + "learning_rate": 8.698517068910495e-07, + "loss": 0.07961845397949219, + "step": 6550 + }, + { + "epoch": 0.9128405211454051, + "grad_norm": 0.7375668287277222, + "learning_rate": 8.671038613754846e-07, + "loss": 0.10225105285644531, + "step": 6551 + }, + { + "epoch": 0.9129798648366195, + "grad_norm": 0.5516365766525269, + "learning_rate": 8.643602667526307e-07, + "loss": 0.08510971069335938, + "step": 6552 + }, + { + "epoch": 0.9131192085278339, + "grad_norm": 0.39602386951446533, + "learning_rate": 8.616209236320517e-07, + "loss": 0.07701969146728516, + "step": 6553 + }, + { + "epoch": 0.9132585522190483, + "grad_norm": 0.36713799834251404, + "learning_rate": 8.588858326223625e-07, + "loss": 0.07244491577148438, + "step": 6554 + }, + { + "epoch": 0.9133978959102627, + "grad_norm": 0.4489080011844635, + "learning_rate": 8.561549943312442e-07, + "loss": 0.09574699401855469, + "step": 6555 + }, + { + "epoch": 0.913537239601477, + "grad_norm": 0.5084143877029419, + "learning_rate": 8.534284093654288e-07, + "loss": 0.09010887145996094, + "step": 6556 + }, + { + "epoch": 0.9136765832926914, + "grad_norm": 0.3949383497238159, + "learning_rate": 8.507060783306919e-07, + "loss": 0.06914138793945312, + "step": 6557 + }, + { + "epoch": 0.9138159269839058, + "grad_norm": 0.34043726325035095, + "learning_rate": 8.479880018318831e-07, + "loss": 0.06420040130615234, + "step": 6558 + }, + { + "epoch": 0.9139552706751202, + "grad_norm": 0.6164972186088562, + "learning_rate": 8.452741804728904e-07, + "loss": 0.10011863708496094, + "step": 6559 + }, + { + "epoch": 0.9140946143663345, + "grad_norm": 0.4840853214263916, + "learning_rate": 8.425646148566624e-07, + "loss": 0.08787918090820312, + "step": 6560 + }, + { + "epoch": 0.9142339580575489, + "grad_norm": 0.42901116609573364, + "learning_rate": 8.398593055852067e-07, + "loss": 0.09472274780273438, + "step": 6561 + }, + { + "epoch": 0.9143733017487633, + "grad_norm": 0.42655035853385925, + "learning_rate": 8.371582532595823e-07, + "loss": 0.07423123717308044, + "step": 6562 + }, + { + "epoch": 0.9145126454399777, + "grad_norm": 0.3919852674007416, + "learning_rate": 8.34461458479896e-07, + "loss": 0.07666397094726562, + "step": 6563 + }, + { + "epoch": 0.9146519891311921, + "grad_norm": 0.45005932450294495, + "learning_rate": 8.317689218453196e-07, + "loss": 0.07568168640136719, + "step": 6564 + }, + { + "epoch": 0.9147913328224064, + "grad_norm": 0.40929025411605835, + "learning_rate": 8.290806439540677e-07, + "loss": 0.07333755493164062, + "step": 6565 + }, + { + "epoch": 0.9149306765136208, + "grad_norm": 0.3433808386325836, + "learning_rate": 8.263966254034206e-07, + "loss": 0.07049369812011719, + "step": 6566 + }, + { + "epoch": 0.9150700202048352, + "grad_norm": 0.4902956485748291, + "learning_rate": 8.237168667897011e-07, + "loss": 0.08889961242675781, + "step": 6567 + }, + { + "epoch": 0.9152093638960496, + "grad_norm": 0.3707128167152405, + "learning_rate": 8.210413687082885e-07, + "loss": 0.07912826538085938, + "step": 6568 + }, + { + "epoch": 0.9153487075872639, + "grad_norm": 0.4784780740737915, + "learning_rate": 8.183701317536253e-07, + "loss": 0.07708168029785156, + "step": 6569 + }, + { + "epoch": 0.9154880512784783, + "grad_norm": 0.4898829460144043, + "learning_rate": 8.157031565191941e-07, + "loss": 0.07291603088378906, + "step": 6570 + }, + { + "epoch": 0.9156273949696927, + "grad_norm": 0.6272456645965576, + "learning_rate": 8.130404435975348e-07, + "loss": 0.09256649017333984, + "step": 6571 + }, + { + "epoch": 0.9157667386609071, + "grad_norm": 0.4360508322715759, + "learning_rate": 8.103819935802426e-07, + "loss": 0.0854644775390625, + "step": 6572 + }, + { + "epoch": 0.9159060823521215, + "grad_norm": 0.562339723110199, + "learning_rate": 8.077278070579675e-07, + "loss": 0.10459709167480469, + "step": 6573 + }, + { + "epoch": 0.9160454260433359, + "grad_norm": 0.4274658262729645, + "learning_rate": 8.050778846204066e-07, + "loss": 0.09029579162597656, + "step": 6574 + }, + { + "epoch": 0.9161847697345503, + "grad_norm": 0.7552006840705872, + "learning_rate": 8.024322268563112e-07, + "loss": 0.12897491455078125, + "step": 6575 + }, + { + "epoch": 0.9163241134257647, + "grad_norm": 0.3962680995464325, + "learning_rate": 7.997908343534844e-07, + "loss": 0.07331085205078125, + "step": 6576 + }, + { + "epoch": 0.9164634571169791, + "grad_norm": 0.7455151081085205, + "learning_rate": 7.971537076987901e-07, + "loss": 0.08852386474609375, + "step": 6577 + }, + { + "epoch": 0.9166028008081935, + "grad_norm": 0.5858816504478455, + "learning_rate": 7.945208474781307e-07, + "loss": 0.07168006896972656, + "step": 6578 + }, + { + "epoch": 0.9167421444994078, + "grad_norm": 0.7991291880607605, + "learning_rate": 7.918922542764717e-07, + "loss": 0.10230064392089844, + "step": 6579 + }, + { + "epoch": 0.9168814881906222, + "grad_norm": 0.5003995895385742, + "learning_rate": 7.89267928677826e-07, + "loss": 0.08494949340820312, + "step": 6580 + }, + { + "epoch": 0.9170208318818366, + "grad_norm": 0.5383285880088806, + "learning_rate": 7.866478712652581e-07, + "loss": 0.09811019897460938, + "step": 6581 + }, + { + "epoch": 0.917160175573051, + "grad_norm": 0.5786818265914917, + "learning_rate": 7.840320826208825e-07, + "loss": 0.09610939025878906, + "step": 6582 + }, + { + "epoch": 0.9172995192642653, + "grad_norm": 0.3583344519138336, + "learning_rate": 7.81420563325872e-07, + "loss": 0.06829357147216797, + "step": 6583 + }, + { + "epoch": 0.9174388629554797, + "grad_norm": 0.5871196985244751, + "learning_rate": 7.788133139604448e-07, + "loss": 0.0903787612915039, + "step": 6584 + }, + { + "epoch": 0.9175782066466941, + "grad_norm": 0.3632908761501312, + "learning_rate": 7.76210335103873e-07, + "loss": 0.07554054260253906, + "step": 6585 + }, + { + "epoch": 0.9177175503379085, + "grad_norm": 0.4300605058670044, + "learning_rate": 7.736116273344763e-07, + "loss": 0.07187080383300781, + "step": 6586 + }, + { + "epoch": 0.9178568940291228, + "grad_norm": 0.4897874891757965, + "learning_rate": 7.710171912296305e-07, + "loss": 0.08119010925292969, + "step": 6587 + }, + { + "epoch": 0.9179962377203372, + "grad_norm": 0.522615909576416, + "learning_rate": 7.684270273657612e-07, + "loss": 0.09082603454589844, + "step": 6588 + }, + { + "epoch": 0.9181355814115516, + "grad_norm": 0.6944094896316528, + "learning_rate": 7.658411363183393e-07, + "loss": 0.1133413314819336, + "step": 6589 + }, + { + "epoch": 0.918274925102766, + "grad_norm": 0.5398294925689697, + "learning_rate": 7.632595186618963e-07, + "loss": 0.08392715454101562, + "step": 6590 + }, + { + "epoch": 0.9184142687939804, + "grad_norm": 0.5039502382278442, + "learning_rate": 7.606821749700045e-07, + "loss": 0.09145355224609375, + "step": 6591 + }, + { + "epoch": 0.9185536124851947, + "grad_norm": 0.33921775221824646, + "learning_rate": 7.581091058152923e-07, + "loss": 0.07109880447387695, + "step": 6592 + }, + { + "epoch": 0.9186929561764091, + "grad_norm": 0.462317556142807, + "learning_rate": 7.55540311769436e-07, + "loss": 0.08951759338378906, + "step": 6593 + }, + { + "epoch": 0.9188322998676235, + "grad_norm": 0.36292555928230286, + "learning_rate": 7.529757934031634e-07, + "loss": 0.07836723327636719, + "step": 6594 + }, + { + "epoch": 0.9189716435588379, + "grad_norm": 0.4138258099555969, + "learning_rate": 7.504155512862587e-07, + "loss": 0.0862722396850586, + "step": 6595 + }, + { + "epoch": 0.9191109872500522, + "grad_norm": 0.4200185537338257, + "learning_rate": 7.478595859875381e-07, + "loss": 0.07413768768310547, + "step": 6596 + }, + { + "epoch": 0.9192503309412666, + "grad_norm": 0.43482866883277893, + "learning_rate": 7.453078980748829e-07, + "loss": 0.09173011779785156, + "step": 6597 + }, + { + "epoch": 0.919389674632481, + "grad_norm": 0.4838433563709259, + "learning_rate": 7.427604881152239e-07, + "loss": 0.09014511108398438, + "step": 6598 + }, + { + "epoch": 0.9195290183236954, + "grad_norm": 0.5129477977752686, + "learning_rate": 7.402173566745308e-07, + "loss": 0.10297012329101562, + "step": 6599 + }, + { + "epoch": 0.9196683620149098, + "grad_norm": 0.3329744040966034, + "learning_rate": 7.376785043178314e-07, + "loss": 0.06363964080810547, + "step": 6600 + }, + { + "epoch": 0.9198077057061241, + "grad_norm": 0.3785676658153534, + "learning_rate": 7.35143931609208e-07, + "loss": 0.07735633850097656, + "step": 6601 + }, + { + "epoch": 0.9199470493973385, + "grad_norm": 0.49611395597457886, + "learning_rate": 7.326136391117766e-07, + "loss": 0.08708572387695312, + "step": 6602 + }, + { + "epoch": 0.9200863930885529, + "grad_norm": 0.5451335310935974, + "learning_rate": 7.3008762738771e-07, + "loss": 0.09027099609375, + "step": 6603 + }, + { + "epoch": 0.9202257367797673, + "grad_norm": 0.3413240611553192, + "learning_rate": 7.275658969982324e-07, + "loss": 0.07712078094482422, + "step": 6604 + }, + { + "epoch": 0.9203650804709816, + "grad_norm": 0.45055973529815674, + "learning_rate": 7.250484485036202e-07, + "loss": 0.08214330673217773, + "step": 6605 + }, + { + "epoch": 0.920504424162196, + "grad_norm": 0.31858131289482117, + "learning_rate": 7.225352824631859e-07, + "loss": 0.0790109634399414, + "step": 6606 + }, + { + "epoch": 0.9206437678534104, + "grad_norm": 0.43797215819358826, + "learning_rate": 7.200263994352985e-07, + "loss": 0.09473800659179688, + "step": 6607 + }, + { + "epoch": 0.9207831115446248, + "grad_norm": 0.3412601947784424, + "learning_rate": 7.175217999773765e-07, + "loss": 0.07590484619140625, + "step": 6608 + }, + { + "epoch": 0.9209224552358392, + "grad_norm": 0.5793514251708984, + "learning_rate": 7.150214846458859e-07, + "loss": 0.08974647521972656, + "step": 6609 + }, + { + "epoch": 0.9210617989270535, + "grad_norm": 0.6005751490592957, + "learning_rate": 7.125254539963356e-07, + "loss": 0.11059999465942383, + "step": 6610 + }, + { + "epoch": 0.9212011426182679, + "grad_norm": 0.43312883377075195, + "learning_rate": 7.100337085832909e-07, + "loss": 0.06834888458251953, + "step": 6611 + }, + { + "epoch": 0.9213404863094823, + "grad_norm": 0.43880197405815125, + "learning_rate": 7.075462489603557e-07, + "loss": 0.07840967178344727, + "step": 6612 + }, + { + "epoch": 0.9214798300006967, + "grad_norm": 0.4398439824581146, + "learning_rate": 7.050630756801924e-07, + "loss": 0.08313179016113281, + "step": 6613 + }, + { + "epoch": 0.9216191736919112, + "grad_norm": 0.6521060466766357, + "learning_rate": 7.025841892945018e-07, + "loss": 0.10219192504882812, + "step": 6614 + }, + { + "epoch": 0.9217585173831255, + "grad_norm": 0.4898424446582794, + "learning_rate": 7.001095903540345e-07, + "loss": 0.06756401062011719, + "step": 6615 + }, + { + "epoch": 0.9218978610743399, + "grad_norm": 0.6703525185585022, + "learning_rate": 6.976392794085973e-07, + "loss": 0.09815406799316406, + "step": 6616 + }, + { + "epoch": 0.9220372047655543, + "grad_norm": 0.5865079760551453, + "learning_rate": 6.951732570070291e-07, + "loss": 0.0904245376586914, + "step": 6617 + }, + { + "epoch": 0.9221765484567687, + "grad_norm": 0.6228212714195251, + "learning_rate": 6.92711523697227e-07, + "loss": 0.10821723937988281, + "step": 6618 + }, + { + "epoch": 0.922315892147983, + "grad_norm": 0.6868049502372742, + "learning_rate": 6.902540800261292e-07, + "loss": 0.10289573669433594, + "step": 6619 + }, + { + "epoch": 0.9224552358391974, + "grad_norm": 0.43380123376846313, + "learning_rate": 6.878009265397301e-07, + "loss": 0.080047607421875, + "step": 6620 + }, + { + "epoch": 0.9225945795304118, + "grad_norm": 0.6497091054916382, + "learning_rate": 6.853520637830557e-07, + "loss": 0.10596656799316406, + "step": 6621 + }, + { + "epoch": 0.9227339232216262, + "grad_norm": 0.580585241317749, + "learning_rate": 6.829074923001955e-07, + "loss": 0.09992218017578125, + "step": 6622 + }, + { + "epoch": 0.9228732669128406, + "grad_norm": 0.4318520426750183, + "learning_rate": 6.804672126342704e-07, + "loss": 0.09050559997558594, + "step": 6623 + }, + { + "epoch": 0.9230126106040549, + "grad_norm": 0.38267046213150024, + "learning_rate": 6.780312253274624e-07, + "loss": 0.07812309265136719, + "step": 6624 + }, + { + "epoch": 0.9231519542952693, + "grad_norm": 0.4556185305118561, + "learning_rate": 6.755995309209851e-07, + "loss": 0.08832740783691406, + "step": 6625 + }, + { + "epoch": 0.9232912979864837, + "grad_norm": 0.4381963908672333, + "learning_rate": 6.731721299551064e-07, + "loss": 0.074920654296875, + "step": 6626 + }, + { + "epoch": 0.9234306416776981, + "grad_norm": 0.48914414644241333, + "learning_rate": 6.707490229691483e-07, + "loss": 0.07126522064208984, + "step": 6627 + }, + { + "epoch": 0.9235699853689124, + "grad_norm": 0.5399072170257568, + "learning_rate": 6.683302105014577e-07, + "loss": 0.08540534973144531, + "step": 6628 + }, + { + "epoch": 0.9237093290601268, + "grad_norm": 0.425044983625412, + "learning_rate": 6.65915693089445e-07, + "loss": 0.0877828598022461, + "step": 6629 + }, + { + "epoch": 0.9238486727513412, + "grad_norm": 0.34386464953422546, + "learning_rate": 6.635054712695632e-07, + "loss": 0.06816291809082031, + "step": 6630 + }, + { + "epoch": 0.9239880164425556, + "grad_norm": 0.46013981103897095, + "learning_rate": 6.610995455773017e-07, + "loss": 0.09243011474609375, + "step": 6631 + }, + { + "epoch": 0.92412736013377, + "grad_norm": 0.49861785769462585, + "learning_rate": 6.586979165472107e-07, + "loss": 0.08870887756347656, + "step": 6632 + }, + { + "epoch": 0.9242667038249843, + "grad_norm": 0.5354626774787903, + "learning_rate": 6.563005847128701e-07, + "loss": 0.09901237487792969, + "step": 6633 + }, + { + "epoch": 0.9244060475161987, + "grad_norm": 0.4065479040145874, + "learning_rate": 6.539075506069181e-07, + "loss": 0.07964420318603516, + "step": 6634 + }, + { + "epoch": 0.9245453912074131, + "grad_norm": 0.5890137553215027, + "learning_rate": 6.515188147610274e-07, + "loss": 0.09647464752197266, + "step": 6635 + }, + { + "epoch": 0.9246847348986275, + "grad_norm": 0.5694355368614197, + "learning_rate": 6.491343777059245e-07, + "loss": 0.09836578369140625, + "step": 6636 + }, + { + "epoch": 0.9248240785898418, + "grad_norm": 0.4388856887817383, + "learning_rate": 6.467542399713744e-07, + "loss": 0.08289337158203125, + "step": 6637 + }, + { + "epoch": 0.9249634222810562, + "grad_norm": 0.5295605063438416, + "learning_rate": 6.443784020861921e-07, + "loss": 0.08414459228515625, + "step": 6638 + }, + { + "epoch": 0.9251027659722706, + "grad_norm": 0.558428168296814, + "learning_rate": 6.420068645782285e-07, + "loss": 0.09575176239013672, + "step": 6639 + }, + { + "epoch": 0.925242109663485, + "grad_norm": 0.47260648012161255, + "learning_rate": 6.396396279743911e-07, + "loss": 0.09474658966064453, + "step": 6640 + }, + { + "epoch": 0.9253814533546993, + "grad_norm": 0.5914565920829773, + "learning_rate": 6.372766928006236e-07, + "loss": 0.07888031005859375, + "step": 6641 + }, + { + "epoch": 0.9255207970459137, + "grad_norm": 0.39078742265701294, + "learning_rate": 6.349180595819171e-07, + "loss": 0.08607673645019531, + "step": 6642 + }, + { + "epoch": 0.9256601407371281, + "grad_norm": 0.6260488629341125, + "learning_rate": 6.325637288423059e-07, + "loss": 0.08702278137207031, + "step": 6643 + }, + { + "epoch": 0.9257994844283425, + "grad_norm": 0.35773929953575134, + "learning_rate": 6.302137011048648e-07, + "loss": 0.076518714427948, + "step": 6644 + }, + { + "epoch": 0.9259388281195569, + "grad_norm": 0.43563538789749146, + "learning_rate": 6.278679768917229e-07, + "loss": 0.07980155944824219, + "step": 6645 + }, + { + "epoch": 0.9260781718107712, + "grad_norm": 0.3694564700126648, + "learning_rate": 6.25526556724041e-07, + "loss": 0.08545494079589844, + "step": 6646 + }, + { + "epoch": 0.9262175155019856, + "grad_norm": 0.47958648204803467, + "learning_rate": 6.231894411220319e-07, + "loss": 0.08474254608154297, + "step": 6647 + }, + { + "epoch": 0.9263568591932, + "grad_norm": 0.5308637619018555, + "learning_rate": 6.208566306049513e-07, + "loss": 0.0970163345336914, + "step": 6648 + }, + { + "epoch": 0.9264962028844144, + "grad_norm": 0.48583003878593445, + "learning_rate": 6.185281256910936e-07, + "loss": 0.09014892578125, + "step": 6649 + }, + { + "epoch": 0.9266355465756287, + "grad_norm": 0.3105502724647522, + "learning_rate": 6.162039268977981e-07, + "loss": 0.07374382019042969, + "step": 6650 + }, + { + "epoch": 0.9267748902668431, + "grad_norm": 0.6345722079277039, + "learning_rate": 6.138840347414498e-07, + "loss": 0.0948333740234375, + "step": 6651 + }, + { + "epoch": 0.9269142339580575, + "grad_norm": 0.48984068632125854, + "learning_rate": 6.115684497374786e-07, + "loss": 0.08294296264648438, + "step": 6652 + }, + { + "epoch": 0.9270535776492719, + "grad_norm": 0.5081027746200562, + "learning_rate": 6.092571724003527e-07, + "loss": 0.08483123779296875, + "step": 6653 + }, + { + "epoch": 0.9271929213404864, + "grad_norm": 0.4513362944126129, + "learning_rate": 6.069502032435814e-07, + "loss": 0.08945178985595703, + "step": 6654 + }, + { + "epoch": 0.9273322650317007, + "grad_norm": 0.39781633019447327, + "learning_rate": 6.04647542779726e-07, + "loss": 0.07929325103759766, + "step": 6655 + }, + { + "epoch": 0.9274716087229151, + "grad_norm": 0.4966123402118683, + "learning_rate": 6.023491915203839e-07, + "loss": 0.0943441390991211, + "step": 6656 + }, + { + "epoch": 0.9276109524141295, + "grad_norm": 0.5207604765892029, + "learning_rate": 6.000551499761953e-07, + "loss": 0.08665847778320312, + "step": 6657 + }, + { + "epoch": 0.9277502961053439, + "grad_norm": 0.5674774050712585, + "learning_rate": 5.977654186568438e-07, + "loss": 0.12056922912597656, + "step": 6658 + }, + { + "epoch": 0.9278896397965583, + "grad_norm": 0.4182356297969818, + "learning_rate": 5.954799980710579e-07, + "loss": 0.07442152500152588, + "step": 6659 + }, + { + "epoch": 0.9280289834877726, + "grad_norm": 0.47476640343666077, + "learning_rate": 5.931988887266005e-07, + "loss": 0.07684993743896484, + "step": 6660 + }, + { + "epoch": 0.928168327178987, + "grad_norm": 0.37362852692604065, + "learning_rate": 5.90922091130286e-07, + "loss": 0.0788259506225586, + "step": 6661 + }, + { + "epoch": 0.9283076708702014, + "grad_norm": 0.558434009552002, + "learning_rate": 5.886496057879676e-07, + "loss": 0.09956550598144531, + "step": 6662 + }, + { + "epoch": 0.9284470145614158, + "grad_norm": 0.45953044295310974, + "learning_rate": 5.863814332045347e-07, + "loss": 0.08555030822753906, + "step": 6663 + }, + { + "epoch": 0.9285863582526301, + "grad_norm": 0.3436571955680847, + "learning_rate": 5.841175738839311e-07, + "loss": 0.08179283142089844, + "step": 6664 + }, + { + "epoch": 0.9287257019438445, + "grad_norm": 0.4074748456478119, + "learning_rate": 5.818580283291276e-07, + "loss": 0.09112358093261719, + "step": 6665 + }, + { + "epoch": 0.9288650456350589, + "grad_norm": 0.6078681349754333, + "learning_rate": 5.796027970421492e-07, + "loss": 0.09537696838378906, + "step": 6666 + }, + { + "epoch": 0.9290043893262733, + "grad_norm": 0.3612077534198761, + "learning_rate": 5.773518805240508e-07, + "loss": 0.0844879150390625, + "step": 6667 + }, + { + "epoch": 0.9291437330174876, + "grad_norm": 0.4283103048801422, + "learning_rate": 5.75105279274939e-07, + "loss": 0.07426261901855469, + "step": 6668 + }, + { + "epoch": 0.929283076708702, + "grad_norm": 0.36559849977493286, + "learning_rate": 5.728629937939568e-07, + "loss": 0.07793807983398438, + "step": 6669 + }, + { + "epoch": 0.9294224203999164, + "grad_norm": 0.36675459146499634, + "learning_rate": 5.706250245792899e-07, + "loss": 0.0653390884399414, + "step": 6670 + }, + { + "epoch": 0.9295617640911308, + "grad_norm": 0.5858873128890991, + "learning_rate": 5.683913721281586e-07, + "loss": 0.08613777160644531, + "step": 6671 + }, + { + "epoch": 0.9297011077823452, + "grad_norm": 0.4954722821712494, + "learning_rate": 5.661620369368348e-07, + "loss": 0.09051704406738281, + "step": 6672 + }, + { + "epoch": 0.9298404514735595, + "grad_norm": 0.47059109807014465, + "learning_rate": 5.639370195006266e-07, + "loss": 0.08870697021484375, + "step": 6673 + }, + { + "epoch": 0.9299797951647739, + "grad_norm": 0.7105143070220947, + "learning_rate": 5.617163203138765e-07, + "loss": 0.10810136795043945, + "step": 6674 + }, + { + "epoch": 0.9301191388559883, + "grad_norm": 0.5401536226272583, + "learning_rate": 5.594999398699785e-07, + "loss": 0.07160186767578125, + "step": 6675 + }, + { + "epoch": 0.9302584825472027, + "grad_norm": 0.3743908405303955, + "learning_rate": 5.572878786613589e-07, + "loss": 0.08102226257324219, + "step": 6676 + }, + { + "epoch": 0.930397826238417, + "grad_norm": 0.5879694223403931, + "learning_rate": 5.55080137179489e-07, + "loss": 0.10243034362792969, + "step": 6677 + }, + { + "epoch": 0.9305371699296314, + "grad_norm": 0.5900450944900513, + "learning_rate": 5.528767159148762e-07, + "loss": 0.08101844787597656, + "step": 6678 + }, + { + "epoch": 0.9306765136208458, + "grad_norm": 0.4640294015407562, + "learning_rate": 5.506776153570714e-07, + "loss": 0.0889739990234375, + "step": 6679 + }, + { + "epoch": 0.9308158573120602, + "grad_norm": 0.41161391139030457, + "learning_rate": 5.484828359946682e-07, + "loss": 0.0834503173828125, + "step": 6680 + }, + { + "epoch": 0.9309552010032746, + "grad_norm": 0.6214914917945862, + "learning_rate": 5.462923783152918e-07, + "loss": 0.07645225524902344, + "step": 6681 + }, + { + "epoch": 0.9310945446944889, + "grad_norm": 0.37648242712020874, + "learning_rate": 5.44106242805611e-07, + "loss": 0.07800006866455078, + "step": 6682 + }, + { + "epoch": 0.9312338883857033, + "grad_norm": 0.39686375856399536, + "learning_rate": 5.419244299513371e-07, + "loss": 0.07050132751464844, + "step": 6683 + }, + { + "epoch": 0.9313732320769177, + "grad_norm": 0.28247544169425964, + "learning_rate": 5.397469402372224e-07, + "loss": 0.06866645812988281, + "step": 6684 + }, + { + "epoch": 0.9315125757681321, + "grad_norm": 0.6762100458145142, + "learning_rate": 5.375737741470511e-07, + "loss": 0.09849166870117188, + "step": 6685 + }, + { + "epoch": 0.9316519194593464, + "grad_norm": 0.3319464325904846, + "learning_rate": 5.354049321636523e-07, + "loss": 0.07520484924316406, + "step": 6686 + }, + { + "epoch": 0.9317912631505608, + "grad_norm": 0.5715542435646057, + "learning_rate": 5.332404147688919e-07, + "loss": 0.10098648071289062, + "step": 6687 + }, + { + "epoch": 0.9319306068417752, + "grad_norm": 0.5928129553794861, + "learning_rate": 5.310802224436806e-07, + "loss": 0.08539772033691406, + "step": 6688 + }, + { + "epoch": 0.9320699505329896, + "grad_norm": 0.5034691691398621, + "learning_rate": 5.28924355667959e-07, + "loss": 0.08638381958007812, + "step": 6689 + }, + { + "epoch": 0.932209294224204, + "grad_norm": 0.5191153287887573, + "learning_rate": 5.267728149207152e-07, + "loss": 0.08760547637939453, + "step": 6690 + }, + { + "epoch": 0.9323486379154183, + "grad_norm": 0.47790125012397766, + "learning_rate": 5.246256006799689e-07, + "loss": 0.08920478820800781, + "step": 6691 + }, + { + "epoch": 0.9324879816066327, + "grad_norm": 0.4753800928592682, + "learning_rate": 5.22482713422785e-07, + "loss": 0.08012866973876953, + "step": 6692 + }, + { + "epoch": 0.9326273252978471, + "grad_norm": 0.7255792617797852, + "learning_rate": 5.203441536252607e-07, + "loss": 0.09952545166015625, + "step": 6693 + }, + { + "epoch": 0.9327666689890615, + "grad_norm": 0.5533545613288879, + "learning_rate": 5.182099217625381e-07, + "loss": 0.09373283386230469, + "step": 6694 + }, + { + "epoch": 0.932906012680276, + "grad_norm": 0.4349111318588257, + "learning_rate": 5.160800183087955e-07, + "loss": 0.07940673828125, + "step": 6695 + }, + { + "epoch": 0.9330453563714903, + "grad_norm": 0.4110070765018463, + "learning_rate": 5.139544437372457e-07, + "loss": 0.07968711853027344, + "step": 6696 + }, + { + "epoch": 0.9331847000627047, + "grad_norm": 0.4486566185951233, + "learning_rate": 5.118331985201441e-07, + "loss": 0.08545494079589844, + "step": 6697 + }, + { + "epoch": 0.9333240437539191, + "grad_norm": 0.3988681733608246, + "learning_rate": 5.097162831287872e-07, + "loss": 0.0806577205657959, + "step": 6698 + }, + { + "epoch": 0.9334633874451335, + "grad_norm": 0.7662954926490784, + "learning_rate": 5.076036980334964e-07, + "loss": 0.09683561325073242, + "step": 6699 + }, + { + "epoch": 0.9336027311363478, + "grad_norm": 0.4660816788673401, + "learning_rate": 5.054954437036475e-07, + "loss": 0.09003448486328125, + "step": 6700 + }, + { + "epoch": 0.9337420748275622, + "grad_norm": 0.43399062752723694, + "learning_rate": 5.033915206076456e-07, + "loss": 0.08832740783691406, + "step": 6701 + }, + { + "epoch": 0.9338814185187766, + "grad_norm": 0.37649354338645935, + "learning_rate": 5.012919292129303e-07, + "loss": 0.07748031616210938, + "step": 6702 + }, + { + "epoch": 0.934020762209991, + "grad_norm": 0.4908828139305115, + "learning_rate": 4.991966699859863e-07, + "loss": 0.09891128540039062, + "step": 6703 + }, + { + "epoch": 0.9341601059012054, + "grad_norm": 0.36439937353134155, + "learning_rate": 4.971057433923298e-07, + "loss": 0.08021068572998047, + "step": 6704 + }, + { + "epoch": 0.9342994495924197, + "grad_norm": 0.3672836720943451, + "learning_rate": 4.950191498965207e-07, + "loss": 0.08430290222167969, + "step": 6705 + }, + { + "epoch": 0.9344387932836341, + "grad_norm": 0.3653005361557007, + "learning_rate": 4.929368899621479e-07, + "loss": 0.07901501655578613, + "step": 6706 + }, + { + "epoch": 0.9345781369748485, + "grad_norm": 0.5384196043014526, + "learning_rate": 4.908589640518458e-07, + "loss": 0.0825948715209961, + "step": 6707 + }, + { + "epoch": 0.9347174806660629, + "grad_norm": 0.5665711760520935, + "learning_rate": 4.887853726272785e-07, + "loss": 0.11186695098876953, + "step": 6708 + }, + { + "epoch": 0.9348568243572772, + "grad_norm": 0.4204542934894562, + "learning_rate": 4.867161161491551e-07, + "loss": 0.07552814483642578, + "step": 6709 + }, + { + "epoch": 0.9349961680484916, + "grad_norm": 0.30247625708580017, + "learning_rate": 4.846511950772148e-07, + "loss": 0.07412147521972656, + "step": 6710 + }, + { + "epoch": 0.935135511739706, + "grad_norm": 0.5772008299827576, + "learning_rate": 4.825906098702348e-07, + "loss": 0.09107208251953125, + "step": 6711 + }, + { + "epoch": 0.9352748554309204, + "grad_norm": 0.5945577621459961, + "learning_rate": 4.805343609860314e-07, + "loss": 0.10120391845703125, + "step": 6712 + }, + { + "epoch": 0.9354141991221347, + "grad_norm": 0.5481851696968079, + "learning_rate": 4.784824488814588e-07, + "loss": 0.08399581909179688, + "step": 6713 + }, + { + "epoch": 0.9355535428133491, + "grad_norm": 0.4031440019607544, + "learning_rate": 4.7643487401239917e-07, + "loss": 0.07853412628173828, + "step": 6714 + }, + { + "epoch": 0.9356928865045635, + "grad_norm": 0.6310160160064697, + "learning_rate": 4.7439163683377975e-07, + "loss": 0.10025596618652344, + "step": 6715 + }, + { + "epoch": 0.9358322301957779, + "grad_norm": 0.49422040581703186, + "learning_rate": 4.723527377995618e-07, + "loss": 0.103607177734375, + "step": 6716 + }, + { + "epoch": 0.9359715738869923, + "grad_norm": 0.7435978651046753, + "learning_rate": 4.7031817736274297e-07, + "loss": 0.09535789489746094, + "step": 6717 + }, + { + "epoch": 0.9361109175782066, + "grad_norm": 0.3923938572406769, + "learning_rate": 4.68287955975355e-07, + "loss": 0.0778350830078125, + "step": 6718 + }, + { + "epoch": 0.936250261269421, + "grad_norm": 0.38346895575523376, + "learning_rate": 4.662620740884638e-07, + "loss": 0.0783529281616211, + "step": 6719 + }, + { + "epoch": 0.9363896049606354, + "grad_norm": 0.45232048630714417, + "learning_rate": 4.642405321521803e-07, + "loss": 0.08610916137695312, + "step": 6720 + }, + { + "epoch": 0.9365289486518498, + "grad_norm": 0.4723609387874603, + "learning_rate": 4.622233306156387e-07, + "loss": 0.08774948120117188, + "step": 6721 + }, + { + "epoch": 0.9366682923430641, + "grad_norm": 0.6622180938720703, + "learning_rate": 4.6021046992702046e-07, + "loss": 0.07843971252441406, + "step": 6722 + }, + { + "epoch": 0.9368076360342785, + "grad_norm": 0.4040941894054413, + "learning_rate": 4.5820195053353445e-07, + "loss": 0.07758617401123047, + "step": 6723 + }, + { + "epoch": 0.9369469797254929, + "grad_norm": 0.45229530334472656, + "learning_rate": 4.561977728814282e-07, + "loss": 0.06499338150024414, + "step": 6724 + }, + { + "epoch": 0.9370863234167073, + "grad_norm": 0.6262058019638062, + "learning_rate": 4.541979374159833e-07, + "loss": 0.10933351516723633, + "step": 6725 + }, + { + "epoch": 0.9372256671079217, + "grad_norm": 0.5106614232063293, + "learning_rate": 4.522024445815176e-07, + "loss": 0.07543659210205078, + "step": 6726 + }, + { + "epoch": 0.937365010799136, + "grad_norm": 0.3235766291618347, + "learning_rate": 4.502112948213899e-07, + "loss": 0.07490253448486328, + "step": 6727 + }, + { + "epoch": 0.9375043544903504, + "grad_norm": 0.523576021194458, + "learning_rate": 4.482244885779774e-07, + "loss": 0.0977325439453125, + "step": 6728 + }, + { + "epoch": 0.9376436981815648, + "grad_norm": 0.48409518599510193, + "learning_rate": 4.462420262927114e-07, + "loss": 0.08582496643066406, + "step": 6729 + }, + { + "epoch": 0.9377830418727792, + "grad_norm": 0.3328580856323242, + "learning_rate": 4.442639084060463e-07, + "loss": 0.059914588928222656, + "step": 6730 + }, + { + "epoch": 0.9379223855639935, + "grad_norm": 0.44545766711235046, + "learning_rate": 4.422901353574771e-07, + "loss": 0.0837554931640625, + "step": 6731 + }, + { + "epoch": 0.9380617292552079, + "grad_norm": 0.4862435460090637, + "learning_rate": 4.403207075855265e-07, + "loss": 0.08935070037841797, + "step": 6732 + }, + { + "epoch": 0.9382010729464223, + "grad_norm": 0.4156142771244049, + "learning_rate": 4.3835562552776434e-07, + "loss": 0.087890625, + "step": 6733 + }, + { + "epoch": 0.9383404166376367, + "grad_norm": 0.6418786644935608, + "learning_rate": 4.3639488962077923e-07, + "loss": 0.10782051086425781, + "step": 6734 + }, + { + "epoch": 0.9384797603288512, + "grad_norm": 0.4526478052139282, + "learning_rate": 4.3443850030020497e-07, + "loss": 0.07873249053955078, + "step": 6735 + }, + { + "epoch": 0.9386191040200655, + "grad_norm": 0.5929837226867676, + "learning_rate": 4.324864580007071e-07, + "loss": 0.09885692596435547, + "step": 6736 + }, + { + "epoch": 0.9387584477112799, + "grad_norm": 0.5520939230918884, + "learning_rate": 4.305387631559854e-07, + "loss": 0.09114456176757812, + "step": 6737 + }, + { + "epoch": 0.9388977914024943, + "grad_norm": 0.4344848692417145, + "learning_rate": 4.2859541619877154e-07, + "loss": 0.08769559860229492, + "step": 6738 + }, + { + "epoch": 0.9390371350937087, + "grad_norm": 0.4267640709877014, + "learning_rate": 4.2665641756083344e-07, + "loss": 0.09342002868652344, + "step": 6739 + }, + { + "epoch": 0.939176478784923, + "grad_norm": 0.5730140209197998, + "learning_rate": 4.247217676729709e-07, + "loss": 0.10276031494140625, + "step": 6740 + }, + { + "epoch": 0.9393158224761374, + "grad_norm": 0.3715185225009918, + "learning_rate": 4.227914669650224e-07, + "loss": 0.06966400146484375, + "step": 6741 + }, + { + "epoch": 0.9394551661673518, + "grad_norm": 0.7259917855262756, + "learning_rate": 4.2086551586585144e-07, + "loss": 0.11578750610351562, + "step": 6742 + }, + { + "epoch": 0.9395945098585662, + "grad_norm": 0.8291719555854797, + "learning_rate": 4.1894391480336694e-07, + "loss": 0.09050273895263672, + "step": 6743 + }, + { + "epoch": 0.9397338535497806, + "grad_norm": 0.2945404350757599, + "learning_rate": 4.1702666420450064e-07, + "loss": 0.061679840087890625, + "step": 6744 + }, + { + "epoch": 0.9398731972409949, + "grad_norm": 0.5870511531829834, + "learning_rate": 4.15113764495223e-07, + "loss": 0.07791328430175781, + "step": 6745 + }, + { + "epoch": 0.9400125409322093, + "grad_norm": 0.40850329399108887, + "learning_rate": 4.1320521610053624e-07, + "loss": 0.0822458267211914, + "step": 6746 + }, + { + "epoch": 0.9401518846234237, + "grad_norm": 0.6894055604934692, + "learning_rate": 4.113010194444744e-07, + "loss": 0.105621337890625, + "step": 6747 + }, + { + "epoch": 0.9402912283146381, + "grad_norm": 0.38659751415252686, + "learning_rate": 4.094011749501103e-07, + "loss": 0.07713031768798828, + "step": 6748 + }, + { + "epoch": 0.9404305720058524, + "grad_norm": 0.3728220760822296, + "learning_rate": 4.075056830395441e-07, + "loss": 0.07012748718261719, + "step": 6749 + }, + { + "epoch": 0.9405699156970668, + "grad_norm": 0.4064474105834961, + "learning_rate": 4.056145441339099e-07, + "loss": 0.08251094818115234, + "step": 6750 + }, + { + "epoch": 0.9407092593882812, + "grad_norm": 0.5098365545272827, + "learning_rate": 4.037277586533761e-07, + "loss": 0.08020293712615967, + "step": 6751 + }, + { + "epoch": 0.9408486030794956, + "grad_norm": 0.5023030042648315, + "learning_rate": 4.018453270171474e-07, + "loss": 0.07629776000976562, + "step": 6752 + }, + { + "epoch": 0.94098794677071, + "grad_norm": 0.28868600726127625, + "learning_rate": 3.9996724964344924e-07, + "loss": 0.06585979461669922, + "step": 6753 + }, + { + "epoch": 0.9411272904619243, + "grad_norm": 0.7772579789161682, + "learning_rate": 3.980935269495545e-07, + "loss": 0.1047874391078949, + "step": 6754 + }, + { + "epoch": 0.9412666341531387, + "grad_norm": 0.5793885588645935, + "learning_rate": 3.9622415935175683e-07, + "loss": 0.09769248962402344, + "step": 6755 + }, + { + "epoch": 0.9414059778443531, + "grad_norm": 0.4198811948299408, + "learning_rate": 3.943591472653929e-07, + "loss": 0.07009410858154297, + "step": 6756 + }, + { + "epoch": 0.9415453215355675, + "grad_norm": 0.3661085069179535, + "learning_rate": 3.92498491104818e-07, + "loss": 0.07900619506835938, + "step": 6757 + }, + { + "epoch": 0.9416846652267818, + "grad_norm": 0.49461203813552856, + "learning_rate": 3.906421912834324e-07, + "loss": 0.08791828155517578, + "step": 6758 + }, + { + "epoch": 0.9418240089179962, + "grad_norm": 0.5899751782417297, + "learning_rate": 3.887902482136663e-07, + "loss": 0.09708595275878906, + "step": 6759 + }, + { + "epoch": 0.9419633526092106, + "grad_norm": 0.4670283794403076, + "learning_rate": 3.8694266230697053e-07, + "loss": 0.08649635314941406, + "step": 6760 + }, + { + "epoch": 0.942102696300425, + "grad_norm": 0.32244089245796204, + "learning_rate": 3.850994339738434e-07, + "loss": 0.08074378967285156, + "step": 6761 + }, + { + "epoch": 0.9422420399916394, + "grad_norm": 0.49538299441337585, + "learning_rate": 3.8326056362380846e-07, + "loss": 0.0758504867553711, + "step": 6762 + }, + { + "epoch": 0.9423813836828537, + "grad_norm": 0.5250726342201233, + "learning_rate": 3.814260516654145e-07, + "loss": 0.08752822875976562, + "step": 6763 + }, + { + "epoch": 0.9425207273740681, + "grad_norm": 0.41617584228515625, + "learning_rate": 3.795958985062553e-07, + "loss": 0.08263778686523438, + "step": 6764 + }, + { + "epoch": 0.9426600710652825, + "grad_norm": 0.4242914319038391, + "learning_rate": 3.777701045529436e-07, + "loss": 0.08100700378417969, + "step": 6765 + }, + { + "epoch": 0.9427994147564969, + "grad_norm": 0.36346012353897095, + "learning_rate": 3.759486702111348e-07, + "loss": 0.07995796203613281, + "step": 6766 + }, + { + "epoch": 0.9429387584477112, + "grad_norm": 0.3506278395652771, + "learning_rate": 3.7413159588550295e-07, + "loss": 0.07800483703613281, + "step": 6767 + }, + { + "epoch": 0.9430781021389256, + "grad_norm": 0.4535588026046753, + "learning_rate": 3.723188819797652e-07, + "loss": 0.08218193054199219, + "step": 6768 + }, + { + "epoch": 0.94321744583014, + "grad_norm": 0.5465250611305237, + "learning_rate": 3.7051052889666596e-07, + "loss": 0.08878326416015625, + "step": 6769 + }, + { + "epoch": 0.9433567895213544, + "grad_norm": 0.2768731415271759, + "learning_rate": 3.6870653703797943e-07, + "loss": 0.069000244140625, + "step": 6770 + }, + { + "epoch": 0.9434961332125688, + "grad_norm": 0.37856653332710266, + "learning_rate": 3.6690690680450723e-07, + "loss": 0.07645606994628906, + "step": 6771 + }, + { + "epoch": 0.9436354769037831, + "grad_norm": 0.4287826716899872, + "learning_rate": 3.6511163859608957e-07, + "loss": 0.0843057632446289, + "step": 6772 + }, + { + "epoch": 0.9437748205949975, + "grad_norm": 0.4334360361099243, + "learning_rate": 3.6332073281159394e-07, + "loss": 0.06563568115234375, + "step": 6773 + }, + { + "epoch": 0.9439141642862119, + "grad_norm": 0.5297086834907532, + "learning_rate": 3.6153418984891996e-07, + "loss": 0.08570671081542969, + "step": 6774 + }, + { + "epoch": 0.9440535079774264, + "grad_norm": 0.44585633277893066, + "learning_rate": 3.597520101049945e-07, + "loss": 0.08084678649902344, + "step": 6775 + }, + { + "epoch": 0.9441928516686408, + "grad_norm": 0.3599169850349426, + "learning_rate": 3.579741939757764e-07, + "loss": 0.07885074615478516, + "step": 6776 + }, + { + "epoch": 0.9443321953598551, + "grad_norm": 0.4507899880409241, + "learning_rate": 3.5620074185626075e-07, + "loss": 0.08155488967895508, + "step": 6777 + }, + { + "epoch": 0.9444715390510695, + "grad_norm": 0.5324188470840454, + "learning_rate": 3.544316541404613e-07, + "loss": 0.08903694152832031, + "step": 6778 + }, + { + "epoch": 0.9446108827422839, + "grad_norm": 0.479341596364975, + "learning_rate": 3.526669312214326e-07, + "loss": 0.07930850982666016, + "step": 6779 + }, + { + "epoch": 0.9447502264334983, + "grad_norm": 0.3718048632144928, + "learning_rate": 3.5090657349125647e-07, + "loss": 0.08305168151855469, + "step": 6780 + }, + { + "epoch": 0.9448895701247126, + "grad_norm": 0.6190166473388672, + "learning_rate": 3.491505813410445e-07, + "loss": 0.08555316925048828, + "step": 6781 + }, + { + "epoch": 0.945028913815927, + "grad_norm": 0.725673496723175, + "learning_rate": 3.473989551609358e-07, + "loss": 0.10007286071777344, + "step": 6782 + }, + { + "epoch": 0.9451682575071414, + "grad_norm": 0.36516135931015015, + "learning_rate": 3.4565169534010123e-07, + "loss": 0.07836627960205078, + "step": 6783 + }, + { + "epoch": 0.9453076011983558, + "grad_norm": 0.41540923714637756, + "learning_rate": 3.439088022667458e-07, + "loss": 0.08295536041259766, + "step": 6784 + }, + { + "epoch": 0.9454469448895702, + "grad_norm": 0.5263097286224365, + "learning_rate": 3.421702763280976e-07, + "loss": 0.073028564453125, + "step": 6785 + }, + { + "epoch": 0.9455862885807845, + "grad_norm": 0.41381219029426575, + "learning_rate": 3.4043611791041874e-07, + "loss": 0.0842132568359375, + "step": 6786 + }, + { + "epoch": 0.9457256322719989, + "grad_norm": 0.326447993516922, + "learning_rate": 3.387063273989966e-07, + "loss": 0.06621360778808594, + "step": 6787 + }, + { + "epoch": 0.9458649759632133, + "grad_norm": 0.3467754125595093, + "learning_rate": 3.3698090517815696e-07, + "loss": 0.07483863830566406, + "step": 6788 + }, + { + "epoch": 0.9460043196544277, + "grad_norm": 0.6391805410385132, + "learning_rate": 3.352598516312422e-07, + "loss": 0.11546707153320312, + "step": 6789 + }, + { + "epoch": 0.946143663345642, + "grad_norm": 0.42957955598831177, + "learning_rate": 3.3354316714063527e-07, + "loss": 0.07722663879394531, + "step": 6790 + }, + { + "epoch": 0.9462830070368564, + "grad_norm": 0.3688940107822418, + "learning_rate": 3.318308520877489e-07, + "loss": 0.0740518569946289, + "step": 6791 + }, + { + "epoch": 0.9464223507280708, + "grad_norm": 0.5217251181602478, + "learning_rate": 3.301229068530098e-07, + "loss": 0.09797477722167969, + "step": 6792 + }, + { + "epoch": 0.9465616944192852, + "grad_norm": 0.5509326457977295, + "learning_rate": 3.2841933181589234e-07, + "loss": 0.07828712463378906, + "step": 6793 + }, + { + "epoch": 0.9467010381104995, + "grad_norm": 0.4154675304889679, + "learning_rate": 3.26720127354887e-07, + "loss": 0.07313680648803711, + "step": 6794 + }, + { + "epoch": 0.9468403818017139, + "grad_norm": 0.2965901792049408, + "learning_rate": 3.250252938475229e-07, + "loss": 0.06716728210449219, + "step": 6795 + }, + { + "epoch": 0.9469797254929283, + "grad_norm": 0.4093542695045471, + "learning_rate": 3.2333483167035217e-07, + "loss": 0.0847320556640625, + "step": 6796 + }, + { + "epoch": 0.9471190691841427, + "grad_norm": 0.49165111780166626, + "learning_rate": 3.216487411989544e-07, + "loss": 0.09358072280883789, + "step": 6797 + }, + { + "epoch": 0.9472584128753571, + "grad_norm": 0.42895904183387756, + "learning_rate": 3.19967022807941e-07, + "loss": 0.08227348327636719, + "step": 6798 + }, + { + "epoch": 0.9473977565665714, + "grad_norm": 0.5844333171844482, + "learning_rate": 3.182896768709531e-07, + "loss": 0.085601806640625, + "step": 6799 + }, + { + "epoch": 0.9475371002577858, + "grad_norm": 0.38072389364242554, + "learning_rate": 3.166167037606571e-07, + "loss": 0.08655929565429688, + "step": 6800 + }, + { + "epoch": 0.9476764439490002, + "grad_norm": 1.0217102766036987, + "learning_rate": 3.1494810384875343e-07, + "loss": 0.11993980407714844, + "step": 6801 + }, + { + "epoch": 0.9478157876402146, + "grad_norm": 0.42535099387168884, + "learning_rate": 3.132838775059632e-07, + "loss": 0.08617591857910156, + "step": 6802 + }, + { + "epoch": 0.947955131331429, + "grad_norm": 0.5669595003128052, + "learning_rate": 3.116240251020375e-07, + "loss": 0.10284805297851562, + "step": 6803 + }, + { + "epoch": 0.9480944750226433, + "grad_norm": 0.4191311001777649, + "learning_rate": 3.0996854700575896e-07, + "loss": 0.08337879180908203, + "step": 6804 + }, + { + "epoch": 0.9482338187138577, + "grad_norm": 0.5259641408920288, + "learning_rate": 3.083174435849423e-07, + "loss": 0.08616828918457031, + "step": 6805 + }, + { + "epoch": 0.9483731624050721, + "grad_norm": 0.42437365651130676, + "learning_rate": 3.0667071520641857e-07, + "loss": 0.0898580551147461, + "step": 6806 + }, + { + "epoch": 0.9485125060962865, + "grad_norm": 0.5510688424110413, + "learning_rate": 3.05028362236055e-07, + "loss": 0.08397102355957031, + "step": 6807 + }, + { + "epoch": 0.9486518497875008, + "grad_norm": 0.3313187062740326, + "learning_rate": 3.033903850387465e-07, + "loss": 0.071136474609375, + "step": 6808 + }, + { + "epoch": 0.9487911934787152, + "grad_norm": 0.3933502733707428, + "learning_rate": 3.01756783978413e-07, + "loss": 0.07514286041259766, + "step": 6809 + }, + { + "epoch": 0.9489305371699296, + "grad_norm": 0.40652602910995483, + "learning_rate": 3.001275594180042e-07, + "loss": 0.07753372192382812, + "step": 6810 + }, + { + "epoch": 0.949069880861144, + "grad_norm": 0.5688160061836243, + "learning_rate": 2.9850271171949495e-07, + "loss": 0.09051322937011719, + "step": 6811 + }, + { + "epoch": 0.9492092245523583, + "grad_norm": 0.46016547083854675, + "learning_rate": 2.968822412438921e-07, + "loss": 0.08639240264892578, + "step": 6812 + }, + { + "epoch": 0.9493485682435727, + "grad_norm": 0.5202890634536743, + "learning_rate": 2.9526614835122314e-07, + "loss": 0.08852005004882812, + "step": 6813 + }, + { + "epoch": 0.9494879119347871, + "grad_norm": 0.3840879797935486, + "learning_rate": 2.936544334005498e-07, + "loss": 0.05744743347167969, + "step": 6814 + }, + { + "epoch": 0.9496272556260016, + "grad_norm": 0.42329081892967224, + "learning_rate": 2.920470967499589e-07, + "loss": 0.09808731079101562, + "step": 6815 + }, + { + "epoch": 0.949766599317216, + "grad_norm": 0.40507838129997253, + "learning_rate": 2.9044413875656266e-07, + "loss": 0.08400344848632812, + "step": 6816 + }, + { + "epoch": 0.9499059430084303, + "grad_norm": 0.5086265206336975, + "learning_rate": 2.8884555977650277e-07, + "loss": 0.08617019653320312, + "step": 6817 + }, + { + "epoch": 0.9500452866996447, + "grad_norm": 0.4204801619052887, + "learning_rate": 2.8725136016494404e-07, + "loss": 0.08391284942626953, + "step": 6818 + }, + { + "epoch": 0.9501846303908591, + "grad_norm": 0.4905129075050354, + "learning_rate": 2.856615402760832e-07, + "loss": 0.08357620239257812, + "step": 6819 + }, + { + "epoch": 0.9503239740820735, + "grad_norm": 0.5613455176353455, + "learning_rate": 2.8407610046314425e-07, + "loss": 0.0921783447265625, + "step": 6820 + }, + { + "epoch": 0.9504633177732879, + "grad_norm": 0.44276782870292664, + "learning_rate": 2.824950410783722e-07, + "loss": 0.08233833312988281, + "step": 6821 + }, + { + "epoch": 0.9506026614645022, + "grad_norm": 0.49704214930534363, + "learning_rate": 2.8091836247304603e-07, + "loss": 0.09424495697021484, + "step": 6822 + }, + { + "epoch": 0.9507420051557166, + "grad_norm": 0.3618873655796051, + "learning_rate": 2.7934606499746106e-07, + "loss": 0.07863140106201172, + "step": 6823 + }, + { + "epoch": 0.950881348846931, + "grad_norm": 0.49598756432533264, + "learning_rate": 2.7777814900095344e-07, + "loss": 0.08548736572265625, + "step": 6824 + }, + { + "epoch": 0.9510206925381454, + "grad_norm": 0.388859361410141, + "learning_rate": 2.7621461483187563e-07, + "loss": 0.08004093170166016, + "step": 6825 + }, + { + "epoch": 0.9511600362293597, + "grad_norm": 0.4974066913127899, + "learning_rate": 2.7465546283760526e-07, + "loss": 0.08779430389404297, + "step": 6826 + }, + { + "epoch": 0.9512993799205741, + "grad_norm": 0.41877707839012146, + "learning_rate": 2.731006933645586e-07, + "loss": 0.08432579040527344, + "step": 6827 + }, + { + "epoch": 0.9514387236117885, + "grad_norm": 0.4233865439891815, + "learning_rate": 2.7155030675816153e-07, + "loss": 0.07572650909423828, + "step": 6828 + }, + { + "epoch": 0.9515780673030029, + "grad_norm": 0.3279949128627777, + "learning_rate": 2.7000430336287855e-07, + "loss": 0.07067012786865234, + "step": 6829 + }, + { + "epoch": 0.9517174109942172, + "grad_norm": 0.7296552658081055, + "learning_rate": 2.684626835221971e-07, + "loss": 0.10951805114746094, + "step": 6830 + }, + { + "epoch": 0.9518567546854316, + "grad_norm": 0.8776970505714417, + "learning_rate": 2.669254475786276e-07, + "loss": 0.10978126525878906, + "step": 6831 + }, + { + "epoch": 0.951996098376646, + "grad_norm": 0.3715384602546692, + "learning_rate": 2.6539259587371026e-07, + "loss": 0.08452033996582031, + "step": 6832 + }, + { + "epoch": 0.9521354420678604, + "grad_norm": 0.6402113437652588, + "learning_rate": 2.638641287480104e-07, + "loss": 0.1027679443359375, + "step": 6833 + }, + { + "epoch": 0.9522747857590748, + "grad_norm": 0.509029746055603, + "learning_rate": 2.6234004654111854e-07, + "loss": 0.09473037719726562, + "step": 6834 + }, + { + "epoch": 0.9524141294502891, + "grad_norm": 0.6638472676277161, + "learning_rate": 2.6082034959164613e-07, + "loss": 0.10806846618652344, + "step": 6835 + }, + { + "epoch": 0.9525534731415035, + "grad_norm": 0.419281005859375, + "learning_rate": 2.5930503823724086e-07, + "loss": 0.08102989196777344, + "step": 6836 + }, + { + "epoch": 0.9526928168327179, + "grad_norm": 0.6731868386268616, + "learning_rate": 2.577941128145689e-07, + "loss": 0.10998773574829102, + "step": 6837 + }, + { + "epoch": 0.9528321605239323, + "grad_norm": 0.6205528378486633, + "learning_rate": 2.562875736593218e-07, + "loss": 0.09706687927246094, + "step": 6838 + }, + { + "epoch": 0.9529715042151466, + "grad_norm": 0.4277251660823822, + "learning_rate": 2.547854211062206e-07, + "loss": 0.07789421081542969, + "step": 6839 + }, + { + "epoch": 0.953110847906361, + "grad_norm": 0.4737596809864044, + "learning_rate": 2.532876554890051e-07, + "loss": 0.0849142074584961, + "step": 6840 + }, + { + "epoch": 0.9532501915975754, + "grad_norm": 0.4627349078655243, + "learning_rate": 2.5179427714045136e-07, + "loss": 0.08182334899902344, + "step": 6841 + }, + { + "epoch": 0.9533895352887898, + "grad_norm": 0.47078341245651245, + "learning_rate": 2.5030528639234717e-07, + "loss": 0.07705307006835938, + "step": 6842 + }, + { + "epoch": 0.9535288789800042, + "grad_norm": 0.39247024059295654, + "learning_rate": 2.488206835755147e-07, + "loss": 0.07477474212646484, + "step": 6843 + }, + { + "epoch": 0.9536682226712185, + "grad_norm": 0.5337232351303101, + "learning_rate": 2.4734046901980114e-07, + "loss": 0.09307479858398438, + "step": 6844 + }, + { + "epoch": 0.9538075663624329, + "grad_norm": 0.5427411198616028, + "learning_rate": 2.4586464305407454e-07, + "loss": 0.10519790649414062, + "step": 6845 + }, + { + "epoch": 0.9539469100536473, + "grad_norm": 0.69333416223526, + "learning_rate": 2.443932060062282e-07, + "loss": 0.10502243041992188, + "step": 6846 + }, + { + "epoch": 0.9540862537448617, + "grad_norm": 0.40654683113098145, + "learning_rate": 2.429261582031828e-07, + "loss": 0.08683013916015625, + "step": 6847 + }, + { + "epoch": 0.954225597436076, + "grad_norm": 0.4452696144580841, + "learning_rate": 2.4146349997088646e-07, + "loss": 0.08081817626953125, + "step": 6848 + }, + { + "epoch": 0.9543649411272904, + "grad_norm": 0.6183174848556519, + "learning_rate": 2.400052316343038e-07, + "loss": 0.10132598876953125, + "step": 6849 + }, + { + "epoch": 0.9545042848185048, + "grad_norm": 0.5380740165710449, + "learning_rate": 2.385513535174289e-07, + "loss": 0.08966636657714844, + "step": 6850 + }, + { + "epoch": 0.9546436285097192, + "grad_norm": 0.38383468985557556, + "learning_rate": 2.3710186594328333e-07, + "loss": 0.08449363708496094, + "step": 6851 + }, + { + "epoch": 0.9547829722009336, + "grad_norm": 0.4345821738243103, + "learning_rate": 2.3565676923390734e-07, + "loss": 0.07533454895019531, + "step": 6852 + }, + { + "epoch": 0.9549223158921479, + "grad_norm": 0.40064162015914917, + "learning_rate": 2.3421606371037075e-07, + "loss": 0.07763671875, + "step": 6853 + }, + { + "epoch": 0.9550616595833623, + "grad_norm": 0.5111579298973083, + "learning_rate": 2.3277974969276417e-07, + "loss": 0.08825302124023438, + "step": 6854 + }, + { + "epoch": 0.9552010032745768, + "grad_norm": 0.5797995924949646, + "learning_rate": 2.3134782750020347e-07, + "loss": 0.10132408142089844, + "step": 6855 + }, + { + "epoch": 0.9553403469657912, + "grad_norm": 0.31440216302871704, + "learning_rate": 2.2992029745082966e-07, + "loss": 0.07669925689697266, + "step": 6856 + }, + { + "epoch": 0.9554796906570056, + "grad_norm": 0.3725418448448181, + "learning_rate": 2.2849715986180688e-07, + "loss": 0.08535575866699219, + "step": 6857 + }, + { + "epoch": 0.9556190343482199, + "grad_norm": 0.616052508354187, + "learning_rate": 2.2707841504932438e-07, + "loss": 0.1172952651977539, + "step": 6858 + }, + { + "epoch": 0.9557583780394343, + "grad_norm": 0.5408133864402771, + "learning_rate": 2.2566406332859449e-07, + "loss": 0.11156654357910156, + "step": 6859 + }, + { + "epoch": 0.9558977217306487, + "grad_norm": 0.5346489548683167, + "learning_rate": 2.242541050138547e-07, + "loss": 0.08661270141601562, + "step": 6860 + }, + { + "epoch": 0.9560370654218631, + "grad_norm": 0.676574170589447, + "learning_rate": 2.2284854041836335e-07, + "loss": 0.11248588562011719, + "step": 6861 + }, + { + "epoch": 0.9561764091130774, + "grad_norm": 0.441691130399704, + "learning_rate": 2.214473698544084e-07, + "loss": 0.08644866943359375, + "step": 6862 + }, + { + "epoch": 0.9563157528042918, + "grad_norm": 0.40471798181533813, + "learning_rate": 2.2005059363329196e-07, + "loss": 0.0776824951171875, + "step": 6863 + }, + { + "epoch": 0.9564550964955062, + "grad_norm": 0.5558879971504211, + "learning_rate": 2.1865821206535243e-07, + "loss": 0.09721755981445312, + "step": 6864 + }, + { + "epoch": 0.9565944401867206, + "grad_norm": 0.5292299389839172, + "learning_rate": 2.1727022545994237e-07, + "loss": 0.09392738342285156, + "step": 6865 + }, + { + "epoch": 0.956733783877935, + "grad_norm": 0.6131840348243713, + "learning_rate": 2.1588663412544174e-07, + "loss": 0.09461212158203125, + "step": 6866 + }, + { + "epoch": 0.9568731275691493, + "grad_norm": 0.6178871989250183, + "learning_rate": 2.1450743836925136e-07, + "loss": 0.10685920715332031, + "step": 6867 + }, + { + "epoch": 0.9570124712603637, + "grad_norm": 0.40394875407218933, + "learning_rate": 2.1313263849779498e-07, + "loss": 0.06700897216796875, + "step": 6868 + }, + { + "epoch": 0.9571518149515781, + "grad_norm": 0.4177640378475189, + "learning_rate": 2.1176223481652824e-07, + "loss": 0.08954620361328125, + "step": 6869 + }, + { + "epoch": 0.9572911586427925, + "grad_norm": 0.6595035195350647, + "learning_rate": 2.1039622762991874e-07, + "loss": 0.09499359130859375, + "step": 6870 + }, + { + "epoch": 0.9574305023340068, + "grad_norm": 0.39217373728752136, + "learning_rate": 2.0903461724146146e-07, + "loss": 0.07741594314575195, + "step": 6871 + }, + { + "epoch": 0.9575698460252212, + "grad_norm": 0.3349374532699585, + "learning_rate": 2.0767740395367886e-07, + "loss": 0.07248592376708984, + "step": 6872 + }, + { + "epoch": 0.9577091897164356, + "grad_norm": 0.7855847477912903, + "learning_rate": 2.0632458806810974e-07, + "loss": 0.11083030700683594, + "step": 6873 + }, + { + "epoch": 0.95784853340765, + "grad_norm": 0.49299201369285583, + "learning_rate": 2.0497616988532032e-07, + "loss": 0.08922576904296875, + "step": 6874 + }, + { + "epoch": 0.9579878770988643, + "grad_norm": 0.4442163109779358, + "learning_rate": 2.0363214970489763e-07, + "loss": 0.07723236083984375, + "step": 6875 + }, + { + "epoch": 0.9581272207900787, + "grad_norm": 0.4708702862262726, + "learning_rate": 2.0229252782545171e-07, + "loss": 0.08049440383911133, + "step": 6876 + }, + { + "epoch": 0.9582665644812931, + "grad_norm": 0.5504225492477417, + "learning_rate": 2.0095730454461781e-07, + "loss": 0.09671401977539062, + "step": 6877 + }, + { + "epoch": 0.9584059081725075, + "grad_norm": 0.5158814787864685, + "learning_rate": 1.9962648015904972e-07, + "loss": 0.10125064849853516, + "step": 6878 + }, + { + "epoch": 0.9585452518637219, + "grad_norm": 0.6416032314300537, + "learning_rate": 1.9830005496442873e-07, + "loss": 0.10762882232666016, + "step": 6879 + }, + { + "epoch": 0.9586845955549362, + "grad_norm": 0.5166247487068176, + "learning_rate": 1.969780292554546e-07, + "loss": 0.09470784664154053, + "step": 6880 + }, + { + "epoch": 0.9588239392461506, + "grad_norm": 0.44706493616104126, + "learning_rate": 1.9566040332585246e-07, + "loss": 0.08994102478027344, + "step": 6881 + }, + { + "epoch": 0.958963282937365, + "grad_norm": 0.38526666164398193, + "learning_rate": 1.9434717746836805e-07, + "loss": 0.0789327621459961, + "step": 6882 + }, + { + "epoch": 0.9591026266285794, + "grad_norm": 0.5662649273872375, + "learning_rate": 1.9303835197476804e-07, + "loss": 0.09167671203613281, + "step": 6883 + }, + { + "epoch": 0.9592419703197937, + "grad_norm": 0.4626424014568329, + "learning_rate": 1.917339271358465e-07, + "loss": 0.08877944946289062, + "step": 6884 + }, + { + "epoch": 0.9593813140110081, + "grad_norm": 0.3858412802219391, + "learning_rate": 1.9043390324141597e-07, + "loss": 0.07522201538085938, + "step": 6885 + }, + { + "epoch": 0.9595206577022225, + "grad_norm": 0.3883361518383026, + "learning_rate": 1.8913828058031436e-07, + "loss": 0.08151626586914062, + "step": 6886 + }, + { + "epoch": 0.9596600013934369, + "grad_norm": 0.49849340319633484, + "learning_rate": 1.878470594403936e-07, + "loss": 0.08405494689941406, + "step": 6887 + }, + { + "epoch": 0.9597993450846513, + "grad_norm": 0.45101454854011536, + "learning_rate": 1.865602401085398e-07, + "loss": 0.07732963562011719, + "step": 6888 + }, + { + "epoch": 0.9599386887758656, + "grad_norm": 0.4675615727901459, + "learning_rate": 1.852778228706509e-07, + "loss": 0.089141845703125, + "step": 6889 + }, + { + "epoch": 0.96007803246708, + "grad_norm": 0.5266090631484985, + "learning_rate": 1.8399980801165006e-07, + "loss": 0.07913684844970703, + "step": 6890 + }, + { + "epoch": 0.9602173761582944, + "grad_norm": 0.38674846291542053, + "learning_rate": 1.8272619581549022e-07, + "loss": 0.07574081420898438, + "step": 6891 + }, + { + "epoch": 0.9603567198495088, + "grad_norm": 0.6489673256874084, + "learning_rate": 1.8145698656512943e-07, + "loss": 0.09135150909423828, + "step": 6892 + }, + { + "epoch": 0.9604960635407231, + "grad_norm": 0.38251277804374695, + "learning_rate": 1.8019218054256216e-07, + "loss": 0.07499122619628906, + "step": 6893 + }, + { + "epoch": 0.9606354072319375, + "grad_norm": 0.3791608214378357, + "learning_rate": 1.7893177802879692e-07, + "loss": 0.07827186584472656, + "step": 6894 + }, + { + "epoch": 0.9607747509231519, + "grad_norm": 0.3802769184112549, + "learning_rate": 1.776757793038697e-07, + "loss": 0.0753164291381836, + "step": 6895 + }, + { + "epoch": 0.9609140946143664, + "grad_norm": 0.5524400472640991, + "learning_rate": 1.7642418464683287e-07, + "loss": 0.0873270034790039, + "step": 6896 + }, + { + "epoch": 0.9610534383055808, + "grad_norm": 0.42223408818244934, + "learning_rate": 1.7517699433576173e-07, + "loss": 0.07997703552246094, + "step": 6897 + }, + { + "epoch": 0.9611927819967951, + "grad_norm": 0.40280357003211975, + "learning_rate": 1.7393420864775467e-07, + "loss": 0.06628894805908203, + "step": 6898 + }, + { + "epoch": 0.9613321256880095, + "grad_norm": 0.40567484498023987, + "learning_rate": 1.7269582785892858e-07, + "loss": 0.08608627319335938, + "step": 6899 + }, + { + "epoch": 0.9614714693792239, + "grad_norm": 0.3702489733695984, + "learning_rate": 1.7146185224442557e-07, + "loss": 0.07019519805908203, + "step": 6900 + }, + { + "epoch": 0.9616108130704383, + "grad_norm": 0.4795719385147095, + "learning_rate": 1.7023228207840637e-07, + "loss": 0.0858612060546875, + "step": 6901 + }, + { + "epoch": 0.9617501567616527, + "grad_norm": 0.4540165960788727, + "learning_rate": 1.6900711763405242e-07, + "loss": 0.08951950073242188, + "step": 6902 + }, + { + "epoch": 0.961889500452867, + "grad_norm": 0.6330496668815613, + "learning_rate": 1.67786359183566e-07, + "loss": 0.10473442077636719, + "step": 6903 + }, + { + "epoch": 0.9620288441440814, + "grad_norm": 0.4481245279312134, + "learning_rate": 1.665700069981746e-07, + "loss": 0.08587646484375, + "step": 6904 + }, + { + "epoch": 0.9621681878352958, + "grad_norm": 0.6433982253074646, + "learning_rate": 1.6535806134812427e-07, + "loss": 0.08419990539550781, + "step": 6905 + }, + { + "epoch": 0.9623075315265102, + "grad_norm": 0.37800225615501404, + "learning_rate": 1.6415052250267738e-07, + "loss": 0.07798004150390625, + "step": 6906 + }, + { + "epoch": 0.9624468752177245, + "grad_norm": 0.512642502784729, + "learning_rate": 1.62947390730126e-07, + "loss": 0.0829305648803711, + "step": 6907 + }, + { + "epoch": 0.9625862189089389, + "grad_norm": 0.4296231269836426, + "learning_rate": 1.617486662977763e-07, + "loss": 0.08379936218261719, + "step": 6908 + }, + { + "epoch": 0.9627255626001533, + "grad_norm": 0.5194479823112488, + "learning_rate": 1.6055434947195746e-07, + "loss": 0.08938407897949219, + "step": 6909 + }, + { + "epoch": 0.9628649062913677, + "grad_norm": 0.3113015592098236, + "learning_rate": 1.5936444051801947e-07, + "loss": 0.07796859741210938, + "step": 6910 + }, + { + "epoch": 0.963004249982582, + "grad_norm": 0.5361243486404419, + "learning_rate": 1.5817893970033305e-07, + "loss": 0.09636688232421875, + "step": 6911 + }, + { + "epoch": 0.9631435936737964, + "grad_norm": 0.44262924790382385, + "learning_rate": 1.5699784728229196e-07, + "loss": 0.07544898986816406, + "step": 6912 + }, + { + "epoch": 0.9632829373650108, + "grad_norm": 0.5289692878723145, + "learning_rate": 1.5582116352630626e-07, + "loss": 0.0879068374633789, + "step": 6913 + }, + { + "epoch": 0.9634222810562252, + "grad_norm": 0.4309063255786896, + "learning_rate": 1.5464888869380468e-07, + "loss": 0.09223365783691406, + "step": 6914 + }, + { + "epoch": 0.9635616247474396, + "grad_norm": 0.6686912775039673, + "learning_rate": 1.5348102304524548e-07, + "loss": 0.11322021484375, + "step": 6915 + }, + { + "epoch": 0.9637009684386539, + "grad_norm": 0.5471568703651428, + "learning_rate": 1.523175668400989e-07, + "loss": 0.08207893371582031, + "step": 6916 + }, + { + "epoch": 0.9638403121298683, + "grad_norm": 0.3437763452529907, + "learning_rate": 1.511585203368582e-07, + "loss": 0.08280372619628906, + "step": 6917 + }, + { + "epoch": 0.9639796558210827, + "grad_norm": 0.3699339032173157, + "learning_rate": 1.5000388379303732e-07, + "loss": 0.08369159698486328, + "step": 6918 + }, + { + "epoch": 0.9641189995122971, + "grad_norm": 0.4723954200744629, + "learning_rate": 1.4885365746516889e-07, + "loss": 0.07581329345703125, + "step": 6919 + }, + { + "epoch": 0.9642583432035114, + "grad_norm": 0.5098837018013, + "learning_rate": 1.477078416088107e-07, + "loss": 0.07993316650390625, + "step": 6920 + }, + { + "epoch": 0.9643976868947258, + "grad_norm": 0.84745192527771, + "learning_rate": 1.465664364785324e-07, + "loss": 0.09989070892333984, + "step": 6921 + }, + { + "epoch": 0.9645370305859402, + "grad_norm": 0.581802487373352, + "learning_rate": 1.454294423279312e-07, + "loss": 0.09569549560546875, + "step": 6922 + }, + { + "epoch": 0.9646763742771546, + "grad_norm": 0.5202529430389404, + "learning_rate": 1.4429685940962278e-07, + "loss": 0.10017967224121094, + "step": 6923 + }, + { + "epoch": 0.964815717968369, + "grad_norm": 0.4728740155696869, + "learning_rate": 1.4316868797523697e-07, + "loss": 0.08730506896972656, + "step": 6924 + }, + { + "epoch": 0.9649550616595833, + "grad_norm": 0.2589283287525177, + "learning_rate": 1.420449282754288e-07, + "loss": 0.064361572265625, + "step": 6925 + }, + { + "epoch": 0.9650944053507977, + "grad_norm": 0.6120147705078125, + "learning_rate": 1.4092558055987193e-07, + "loss": 0.1043548583984375, + "step": 6926 + }, + { + "epoch": 0.9652337490420121, + "grad_norm": 0.44837144017219543, + "learning_rate": 1.3981064507726295e-07, + "loss": 0.0804901123046875, + "step": 6927 + }, + { + "epoch": 0.9653730927332265, + "grad_norm": 0.33945250511169434, + "learning_rate": 1.387001220753126e-07, + "loss": 0.06830334663391113, + "step": 6928 + }, + { + "epoch": 0.9655124364244408, + "grad_norm": 0.43656885623931885, + "learning_rate": 1.3759401180075239e-07, + "loss": 0.08484840393066406, + "step": 6929 + }, + { + "epoch": 0.9656517801156552, + "grad_norm": 0.6355556845664978, + "learning_rate": 1.3649231449933686e-07, + "loss": 0.13019943237304688, + "step": 6930 + }, + { + "epoch": 0.9657911238068696, + "grad_norm": 0.5897980332374573, + "learning_rate": 1.3539503041583913e-07, + "loss": 0.08922004699707031, + "step": 6931 + }, + { + "epoch": 0.965930467498084, + "grad_norm": 0.5013455748558044, + "learning_rate": 1.3430215979404638e-07, + "loss": 0.1031036376953125, + "step": 6932 + }, + { + "epoch": 0.9660698111892984, + "grad_norm": 0.579555332660675, + "learning_rate": 1.3321370287677328e-07, + "loss": 0.09626197814941406, + "step": 6933 + }, + { + "epoch": 0.9662091548805127, + "grad_norm": 0.5319574475288391, + "learning_rate": 1.321296599058508e-07, + "loss": 0.08365631103515625, + "step": 6934 + }, + { + "epoch": 0.9663484985717271, + "grad_norm": 0.8909456729888916, + "learning_rate": 1.310500311221241e-07, + "loss": 0.10026168823242188, + "step": 6935 + }, + { + "epoch": 0.9664878422629416, + "grad_norm": 0.5264779329299927, + "learning_rate": 1.2997481676546576e-07, + "loss": 0.0865631103515625, + "step": 6936 + }, + { + "epoch": 0.966627185954156, + "grad_norm": 0.4786481261253357, + "learning_rate": 1.2890401707476242e-07, + "loss": 0.09161090850830078, + "step": 6937 + }, + { + "epoch": 0.9667665296453704, + "grad_norm": 0.6651724576950073, + "learning_rate": 1.2783763228792156e-07, + "loss": 0.0904703140258789, + "step": 6938 + }, + { + "epoch": 0.9669058733365847, + "grad_norm": 0.4535134434700012, + "learning_rate": 1.2677566264186925e-07, + "loss": 0.07775115966796875, + "step": 6939 + }, + { + "epoch": 0.9670452170277991, + "grad_norm": 0.34805890917778015, + "learning_rate": 1.2571810837255228e-07, + "loss": 0.0732583999633789, + "step": 6940 + }, + { + "epoch": 0.9671845607190135, + "grad_norm": 0.4514891803264618, + "learning_rate": 1.246649697149338e-07, + "loss": 0.09055137634277344, + "step": 6941 + }, + { + "epoch": 0.9673239044102279, + "grad_norm": 0.510680615901947, + "learning_rate": 1.2361624690299557e-07, + "loss": 0.08629417419433594, + "step": 6942 + }, + { + "epoch": 0.9674632481014422, + "grad_norm": 0.58719402551651, + "learning_rate": 1.225719401697445e-07, + "loss": 0.09388160705566406, + "step": 6943 + }, + { + "epoch": 0.9676025917926566, + "grad_norm": 0.3658842146396637, + "learning_rate": 1.2153204974719722e-07, + "loss": 0.07269859313964844, + "step": 6944 + }, + { + "epoch": 0.967741935483871, + "grad_norm": 0.4968048930168152, + "learning_rate": 1.2049657586639786e-07, + "loss": 0.09089469909667969, + "step": 6945 + }, + { + "epoch": 0.9678812791750854, + "grad_norm": 0.7148755192756653, + "learning_rate": 1.194655187574001e-07, + "loss": 0.09905815124511719, + "step": 6946 + }, + { + "epoch": 0.9680206228662998, + "grad_norm": 0.5073462128639221, + "learning_rate": 1.1843887864928294e-07, + "loss": 0.09692764282226562, + "step": 6947 + }, + { + "epoch": 0.9681599665575141, + "grad_norm": 0.48205357789993286, + "learning_rate": 1.1741665577014393e-07, + "loss": 0.06798458099365234, + "step": 6948 + }, + { + "epoch": 0.9682993102487285, + "grad_norm": 0.5065221786499023, + "learning_rate": 1.163988503470992e-07, + "loss": 0.1006937026977539, + "step": 6949 + }, + { + "epoch": 0.9684386539399429, + "grad_norm": 0.8038027286529541, + "learning_rate": 1.153854626062767e-07, + "loss": 0.11053085327148438, + "step": 6950 + }, + { + "epoch": 0.9685779976311573, + "grad_norm": 0.8569527268409729, + "learning_rate": 1.1437649277283191e-07, + "loss": 0.11460494995117188, + "step": 6951 + }, + { + "epoch": 0.9687173413223716, + "grad_norm": 0.5215571522712708, + "learning_rate": 1.133719410709344e-07, + "loss": 0.07224178314208984, + "step": 6952 + }, + { + "epoch": 0.968856685013586, + "grad_norm": 0.3985353708267212, + "learning_rate": 1.1237180772377233e-07, + "loss": 0.08861541748046875, + "step": 6953 + }, + { + "epoch": 0.9689960287048004, + "grad_norm": 0.7035132050514221, + "learning_rate": 1.1137609295355234e-07, + "loss": 0.10825157165527344, + "step": 6954 + }, + { + "epoch": 0.9691353723960148, + "grad_norm": 0.4372332692146301, + "learning_rate": 1.1038479698149752e-07, + "loss": 0.08505940437316895, + "step": 6955 + }, + { + "epoch": 0.9692747160872291, + "grad_norm": 0.4516509175300598, + "learning_rate": 1.093979200278561e-07, + "loss": 0.07646751403808594, + "step": 6956 + }, + { + "epoch": 0.9694140597784435, + "grad_norm": 0.4455028176307678, + "learning_rate": 1.0841546231188382e-07, + "loss": 0.08685874938964844, + "step": 6957 + }, + { + "epoch": 0.9695534034696579, + "grad_norm": 0.3522871434688568, + "learning_rate": 1.0743742405186385e-07, + "loss": 0.07671928405761719, + "step": 6958 + }, + { + "epoch": 0.9696927471608723, + "grad_norm": 0.5618293285369873, + "learning_rate": 1.0646380546509572e-07, + "loss": 0.09895515441894531, + "step": 6959 + }, + { + "epoch": 0.9698320908520867, + "grad_norm": 0.45418837666511536, + "learning_rate": 1.054946067678908e-07, + "loss": 0.09001922607421875, + "step": 6960 + }, + { + "epoch": 0.969971434543301, + "grad_norm": 0.40587905049324036, + "learning_rate": 1.0452982817558577e-07, + "loss": 0.08049774169921875, + "step": 6961 + }, + { + "epoch": 0.9701107782345154, + "grad_norm": 0.7532268166542053, + "learning_rate": 1.0356946990253137e-07, + "loss": 0.0918121337890625, + "step": 6962 + }, + { + "epoch": 0.9702501219257298, + "grad_norm": 0.32215622067451477, + "learning_rate": 1.0261353216209691e-07, + "loss": 0.0683445930480957, + "step": 6963 + }, + { + "epoch": 0.9703894656169442, + "grad_norm": 0.38874053955078125, + "learning_rate": 1.0166201516667029e-07, + "loss": 0.08132743835449219, + "step": 6964 + }, + { + "epoch": 0.9705288093081585, + "grad_norm": 0.6784502863883972, + "learning_rate": 1.0071491912766018e-07, + "loss": 0.08378076553344727, + "step": 6965 + }, + { + "epoch": 0.9706681529993729, + "grad_norm": 0.6875331997871399, + "learning_rate": 9.977224425548271e-08, + "loss": 0.10525035858154297, + "step": 6966 + }, + { + "epoch": 0.9708074966905873, + "grad_norm": 0.5110965371131897, + "learning_rate": 9.883399075958589e-08, + "loss": 0.09037208557128906, + "step": 6967 + }, + { + "epoch": 0.9709468403818017, + "grad_norm": 0.4427454173564911, + "learning_rate": 9.790015884842297e-08, + "loss": 0.0822906494140625, + "step": 6968 + }, + { + "epoch": 0.9710861840730161, + "grad_norm": 0.42352691292762756, + "learning_rate": 9.697074872947242e-08, + "loss": 0.07507896423339844, + "step": 6969 + }, + { + "epoch": 0.9712255277642304, + "grad_norm": 0.422932893037796, + "learning_rate": 9.604576060922687e-08, + "loss": 0.07781982421875, + "step": 6970 + }, + { + "epoch": 0.9713648714554448, + "grad_norm": 0.42707112431526184, + "learning_rate": 9.51251946931997e-08, + "loss": 0.08717918395996094, + "step": 6971 + }, + { + "epoch": 0.9715042151466592, + "grad_norm": 0.499498188495636, + "learning_rate": 9.420905118591617e-08, + "loss": 0.07857799530029297, + "step": 6972 + }, + { + "epoch": 0.9716435588378736, + "grad_norm": 0.3902251422405243, + "learning_rate": 9.32973302909268e-08, + "loss": 0.07539558410644531, + "step": 6973 + }, + { + "epoch": 0.9717829025290879, + "grad_norm": 0.42532235383987427, + "learning_rate": 9.239003221079179e-08, + "loss": 0.07567763328552246, + "step": 6974 + }, + { + "epoch": 0.9719222462203023, + "grad_norm": 0.49384135007858276, + "learning_rate": 9.148715714709433e-08, + "loss": 0.07957172393798828, + "step": 6975 + }, + { + "epoch": 0.9720615899115168, + "grad_norm": 0.4363870322704315, + "learning_rate": 9.058870530042952e-08, + "loss": 0.09673881530761719, + "step": 6976 + }, + { + "epoch": 0.9722009336027312, + "grad_norm": 0.510179877281189, + "learning_rate": 8.969467687041766e-08, + "loss": 0.08815479278564453, + "step": 6977 + }, + { + "epoch": 0.9723402772939456, + "grad_norm": 0.6851111054420471, + "learning_rate": 8.880507205568656e-08, + "loss": 0.08975410461425781, + "step": 6978 + }, + { + "epoch": 0.9724796209851599, + "grad_norm": 0.5366417169570923, + "learning_rate": 8.791989105388699e-08, + "loss": 0.09593391418457031, + "step": 6979 + }, + { + "epoch": 0.9726189646763743, + "grad_norm": 0.7453795075416565, + "learning_rate": 8.70391340616883e-08, + "loss": 0.1252613067626953, + "step": 6980 + }, + { + "epoch": 0.9727583083675887, + "grad_norm": 0.7007654309272766, + "learning_rate": 8.616280127477395e-08, + "loss": 0.10030746459960938, + "step": 6981 + }, + { + "epoch": 0.9728976520588031, + "grad_norm": 0.5098311901092529, + "learning_rate": 8.529089288784376e-08, + "loss": 0.09053611755371094, + "step": 6982 + }, + { + "epoch": 0.9730369957500175, + "grad_norm": 0.5342167019844055, + "learning_rate": 8.442340909461832e-08, + "loss": 0.09897041320800781, + "step": 6983 + }, + { + "epoch": 0.9731763394412318, + "grad_norm": 0.3457837402820587, + "learning_rate": 8.356035008783014e-08, + "loss": 0.073211669921875, + "step": 6984 + }, + { + "epoch": 0.9733156831324462, + "grad_norm": 0.41978052258491516, + "learning_rate": 8.270171605923027e-08, + "loss": 0.07869529724121094, + "step": 6985 + }, + { + "epoch": 0.9734550268236606, + "grad_norm": 0.5261331796646118, + "learning_rate": 8.184750719959278e-08, + "loss": 0.08577775955200195, + "step": 6986 + }, + { + "epoch": 0.973594370514875, + "grad_norm": 0.5353406667709351, + "learning_rate": 8.099772369869696e-08, + "loss": 0.08538341522216797, + "step": 6987 + }, + { + "epoch": 0.9737337142060893, + "grad_norm": 0.4277208149433136, + "learning_rate": 8.015236574534957e-08, + "loss": 0.08266735076904297, + "step": 6988 + }, + { + "epoch": 0.9738730578973037, + "grad_norm": 0.7775799036026001, + "learning_rate": 7.931143352736925e-08, + "loss": 0.10931396484375, + "step": 6989 + }, + { + "epoch": 0.9740124015885181, + "grad_norm": 0.4054991900920868, + "learning_rate": 7.8474927231591e-08, + "loss": 0.07913589477539062, + "step": 6990 + }, + { + "epoch": 0.9741517452797325, + "grad_norm": 0.39948418736457825, + "learning_rate": 7.764284704386837e-08, + "loss": 0.0742044448852539, + "step": 6991 + }, + { + "epoch": 0.9742910889709469, + "grad_norm": 0.29565489292144775, + "learning_rate": 7.681519314906904e-08, + "loss": 0.0635109543800354, + "step": 6992 + }, + { + "epoch": 0.9744304326621612, + "grad_norm": 0.8082930445671082, + "learning_rate": 7.599196573107925e-08, + "loss": 0.11098194122314453, + "step": 6993 + }, + { + "epoch": 0.9745697763533756, + "grad_norm": 0.6490104794502258, + "learning_rate": 7.517316497280158e-08, + "loss": 0.09961891174316406, + "step": 6994 + }, + { + "epoch": 0.97470912004459, + "grad_norm": 0.5940678119659424, + "learning_rate": 7.435879105615718e-08, + "loss": 0.09291267395019531, + "step": 6995 + }, + { + "epoch": 0.9748484637358044, + "grad_norm": 0.6699619293212891, + "learning_rate": 7.354884416207686e-08, + "loss": 0.08967781066894531, + "step": 6996 + }, + { + "epoch": 0.9749878074270187, + "grad_norm": 0.3769146502017975, + "learning_rate": 7.274332447051668e-08, + "loss": 0.0711069107055664, + "step": 6997 + }, + { + "epoch": 0.9751271511182331, + "grad_norm": 0.4420348107814789, + "learning_rate": 7.194223216044238e-08, + "loss": 0.09734916687011719, + "step": 6998 + }, + { + "epoch": 0.9752664948094475, + "grad_norm": 0.6728770732879639, + "learning_rate": 7.114556740983824e-08, + "loss": 0.10950756072998047, + "step": 6999 + }, + { + "epoch": 0.9754058385006619, + "grad_norm": 0.5743403434753418, + "learning_rate": 7.035333039570492e-08, + "loss": 0.09207630157470703, + "step": 7000 + }, + { + "epoch": 0.9755451821918762, + "grad_norm": 0.4887056350708008, + "learning_rate": 6.956552129406158e-08, + "loss": 0.07952880859375, + "step": 7001 + }, + { + "epoch": 0.9756845258830906, + "grad_norm": 0.705302357673645, + "learning_rate": 6.878214027993935e-08, + "loss": 0.09957695007324219, + "step": 7002 + }, + { + "epoch": 0.975823869574305, + "grad_norm": 0.5039952993392944, + "learning_rate": 6.800318752738788e-08, + "loss": 0.09332656860351562, + "step": 7003 + }, + { + "epoch": 0.9759632132655194, + "grad_norm": 0.5358952879905701, + "learning_rate": 6.722866320947319e-08, + "loss": 0.09081077575683594, + "step": 7004 + }, + { + "epoch": 0.9761025569567338, + "grad_norm": 0.4405955672264099, + "learning_rate": 6.64585674982754e-08, + "loss": 0.07808876037597656, + "step": 7005 + }, + { + "epoch": 0.9762419006479481, + "grad_norm": 0.7658602595329285, + "learning_rate": 6.569290056489542e-08, + "loss": 0.09946918487548828, + "step": 7006 + }, + { + "epoch": 0.9763812443391625, + "grad_norm": 0.33819109201431274, + "learning_rate": 6.493166257944384e-08, + "loss": 0.07507896423339844, + "step": 7007 + }, + { + "epoch": 0.9765205880303769, + "grad_norm": 0.4212390184402466, + "learning_rate": 6.417485371105204e-08, + "loss": 0.08489418029785156, + "step": 7008 + }, + { + "epoch": 0.9766599317215913, + "grad_norm": 0.3237452507019043, + "learning_rate": 6.342247412786329e-08, + "loss": 0.06791877746582031, + "step": 7009 + }, + { + "epoch": 0.9767992754128056, + "grad_norm": 0.45776933431625366, + "learning_rate": 6.267452399704387e-08, + "loss": 0.08560943603515625, + "step": 7010 + }, + { + "epoch": 0.97693861910402, + "grad_norm": 0.6044099926948547, + "learning_rate": 6.193100348476533e-08, + "loss": 0.08435535430908203, + "step": 7011 + }, + { + "epoch": 0.9770779627952344, + "grad_norm": 0.4583248496055603, + "learning_rate": 6.11919127562266e-08, + "loss": 0.09760665893554688, + "step": 7012 + }, + { + "epoch": 0.9772173064864488, + "grad_norm": 0.5184327960014343, + "learning_rate": 6.045725197563413e-08, + "loss": 0.08325004577636719, + "step": 7013 + }, + { + "epoch": 0.9773566501776632, + "grad_norm": 0.6125224828720093, + "learning_rate": 5.972702130621067e-08, + "loss": 0.0932159423828125, + "step": 7014 + }, + { + "epoch": 0.9774959938688775, + "grad_norm": 0.4503537118434906, + "learning_rate": 5.900122091019977e-08, + "loss": 0.08184337615966797, + "step": 7015 + }, + { + "epoch": 0.977635337560092, + "grad_norm": 0.6186304688453674, + "learning_rate": 5.827985094885691e-08, + "loss": 0.10333061218261719, + "step": 7016 + }, + { + "epoch": 0.9777746812513064, + "grad_norm": 0.4648358225822449, + "learning_rate": 5.756291158245386e-08, + "loss": 0.07580375671386719, + "step": 7017 + }, + { + "epoch": 0.9779140249425208, + "grad_norm": 0.7227604389190674, + "learning_rate": 5.6850402970278774e-08, + "loss": 0.09438705444335938, + "step": 7018 + }, + { + "epoch": 0.9780533686337352, + "grad_norm": 1.1987565755844116, + "learning_rate": 5.6142325270633904e-08, + "loss": 0.0873565673828125, + "step": 7019 + }, + { + "epoch": 0.9781927123249495, + "grad_norm": 0.7601731419563293, + "learning_rate": 5.543867864083785e-08, + "loss": 0.10590124130249023, + "step": 7020 + }, + { + "epoch": 0.9783320560161639, + "grad_norm": 0.4451426565647125, + "learning_rate": 5.473946323722556e-08, + "loss": 0.08472824096679688, + "step": 7021 + }, + { + "epoch": 0.9784713997073783, + "grad_norm": 0.37166550755500793, + "learning_rate": 5.404467921514611e-08, + "loss": 0.07581806182861328, + "step": 7022 + }, + { + "epoch": 0.9786107433985927, + "grad_norm": 0.47010138630867004, + "learning_rate": 5.335432672896712e-08, + "loss": 0.08540725708007812, + "step": 7023 + }, + { + "epoch": 0.978750087089807, + "grad_norm": 0.6108077168464661, + "learning_rate": 5.26684059320659e-08, + "loss": 0.08135604858398438, + "step": 7024 + }, + { + "epoch": 0.9788894307810214, + "grad_norm": 0.404136061668396, + "learning_rate": 5.198691697683833e-08, + "loss": 0.07214927673339844, + "step": 7025 + }, + { + "epoch": 0.9790287744722358, + "grad_norm": 0.3778747320175171, + "learning_rate": 5.130986001469884e-08, + "loss": 0.07019639015197754, + "step": 7026 + }, + { + "epoch": 0.9791681181634502, + "grad_norm": 0.7593587040901184, + "learning_rate": 5.0637235196071555e-08, + "loss": 0.11236190795898438, + "step": 7027 + }, + { + "epoch": 0.9793074618546646, + "grad_norm": 0.3675217926502228, + "learning_rate": 4.996904267039693e-08, + "loss": 0.0801401138305664, + "step": 7028 + }, + { + "epoch": 0.9794468055458789, + "grad_norm": 0.3663270175457001, + "learning_rate": 4.9305282586136206e-08, + "loss": 0.07219696044921875, + "step": 7029 + }, + { + "epoch": 0.9795861492370933, + "grad_norm": 0.7625707387924194, + "learning_rate": 4.864595509076031e-08, + "loss": 0.09708976745605469, + "step": 7030 + }, + { + "epoch": 0.9797254929283077, + "grad_norm": 0.6380797028541565, + "learning_rate": 4.799106033075429e-08, + "loss": 0.09566307067871094, + "step": 7031 + }, + { + "epoch": 0.9798648366195221, + "grad_norm": 0.43362802267074585, + "learning_rate": 4.734059845162175e-08, + "loss": 0.09340858459472656, + "step": 7032 + }, + { + "epoch": 0.9800041803107364, + "grad_norm": 0.3868067264556885, + "learning_rate": 4.669456959788265e-08, + "loss": 0.07273674011230469, + "step": 7033 + }, + { + "epoch": 0.9801435240019508, + "grad_norm": 0.3657895028591156, + "learning_rate": 4.6052973913068844e-08, + "loss": 0.06771087646484375, + "step": 7034 + }, + { + "epoch": 0.9802828676931652, + "grad_norm": 0.48930615186691284, + "learning_rate": 4.541581153972852e-08, + "loss": 0.09151554107666016, + "step": 7035 + }, + { + "epoch": 0.9804222113843796, + "grad_norm": 0.3854818642139435, + "learning_rate": 4.478308261942177e-08, + "loss": 0.07027721405029297, + "step": 7036 + }, + { + "epoch": 0.980561555075594, + "grad_norm": 0.46077412366867065, + "learning_rate": 4.415478729272949e-08, + "loss": 0.08953857421875, + "step": 7037 + }, + { + "epoch": 0.9807008987668083, + "grad_norm": 0.4141087234020233, + "learning_rate": 4.353092569924444e-08, + "loss": 0.08289909362792969, + "step": 7038 + }, + { + "epoch": 0.9808402424580227, + "grad_norm": 0.35964086651802063, + "learning_rate": 4.2911497977573545e-08, + "loss": 0.07779884338378906, + "step": 7039 + }, + { + "epoch": 0.9809795861492371, + "grad_norm": 0.4349823296070099, + "learning_rate": 4.2296504265340044e-08, + "loss": 0.0787200927734375, + "step": 7040 + }, + { + "epoch": 0.9811189298404515, + "grad_norm": 0.4623677134513855, + "learning_rate": 4.1685944699181304e-08, + "loss": 0.08010673522949219, + "step": 7041 + }, + { + "epoch": 0.9812582735316658, + "grad_norm": 0.6302046775817871, + "learning_rate": 4.107981941474881e-08, + "loss": 0.08349108695983887, + "step": 7042 + }, + { + "epoch": 0.9813976172228802, + "grad_norm": 0.3532089293003082, + "learning_rate": 4.04781285467104e-08, + "loss": 0.06485557556152344, + "step": 7043 + }, + { + "epoch": 0.9815369609140946, + "grad_norm": 0.4426809251308441, + "learning_rate": 3.9880872228748034e-08, + "loss": 0.084014892578125, + "step": 7044 + }, + { + "epoch": 0.981676304605309, + "grad_norm": 0.4518667459487915, + "learning_rate": 3.928805059356e-08, + "loss": 0.0874481201171875, + "step": 7045 + }, + { + "epoch": 0.9818156482965233, + "grad_norm": 0.4150584042072296, + "learning_rate": 3.869966377285428e-08, + "loss": 0.07783126831054688, + "step": 7046 + }, + { + "epoch": 0.9819549919877377, + "grad_norm": 0.3879675269126892, + "learning_rate": 3.8115711897359634e-08, + "loss": 0.07575035095214844, + "step": 7047 + }, + { + "epoch": 0.9820943356789521, + "grad_norm": 0.8007100224494934, + "learning_rate": 3.753619509681672e-08, + "loss": 0.10845947265625, + "step": 7048 + }, + { + "epoch": 0.9822336793701665, + "grad_norm": 0.47340044379234314, + "learning_rate": 3.696111349998255e-08, + "loss": 0.08667945861816406, + "step": 7049 + }, + { + "epoch": 0.9823730230613809, + "grad_norm": 0.5999298691749573, + "learning_rate": 3.6390467234621585e-08, + "loss": 0.10376739501953125, + "step": 7050 + }, + { + "epoch": 0.9825123667525952, + "grad_norm": 0.4627942144870758, + "learning_rate": 3.582425642752352e-08, + "loss": 0.08148002624511719, + "step": 7051 + }, + { + "epoch": 0.9826517104438096, + "grad_norm": 0.4911785423755646, + "learning_rate": 3.52624812044855e-08, + "loss": 0.07576614618301392, + "step": 7052 + }, + { + "epoch": 0.982791054135024, + "grad_norm": 0.3912443518638611, + "learning_rate": 3.470514169032102e-08, + "loss": 0.07485103607177734, + "step": 7053 + }, + { + "epoch": 0.9829303978262384, + "grad_norm": 0.33802953362464905, + "learning_rate": 3.4152238008859915e-08, + "loss": 0.07449531555175781, + "step": 7054 + }, + { + "epoch": 0.9830697415174527, + "grad_norm": 0.5231349468231201, + "learning_rate": 3.360377028294171e-08, + "loss": 0.0814962387084961, + "step": 7055 + }, + { + "epoch": 0.9832090852086672, + "grad_norm": 0.4523211419582367, + "learning_rate": 3.305973863442669e-08, + "loss": 0.0751800537109375, + "step": 7056 + }, + { + "epoch": 0.9833484288998816, + "grad_norm": 0.6672792434692383, + "learning_rate": 3.252014318418484e-08, + "loss": 0.08451271057128906, + "step": 7057 + }, + { + "epoch": 0.983487772591096, + "grad_norm": 0.34023118019104004, + "learning_rate": 3.198498405210027e-08, + "loss": 0.07354545593261719, + "step": 7058 + }, + { + "epoch": 0.9836271162823104, + "grad_norm": 0.5787100195884705, + "learning_rate": 3.145426135707563e-08, + "loss": 0.08195018768310547, + "step": 7059 + }, + { + "epoch": 0.9837664599735247, + "grad_norm": 0.47166934609413147, + "learning_rate": 3.092797521702551e-08, + "loss": 0.08232593536376953, + "step": 7060 + }, + { + "epoch": 0.9839058036647391, + "grad_norm": 0.2888556718826294, + "learning_rate": 3.040612574887636e-08, + "loss": 0.06496810913085938, + "step": 7061 + }, + { + "epoch": 0.9840451473559535, + "grad_norm": 0.5097386837005615, + "learning_rate": 2.9888713068573215e-08, + "loss": 0.08570671081542969, + "step": 7062 + }, + { + "epoch": 0.9841844910471679, + "grad_norm": 0.7833879590034485, + "learning_rate": 2.9375737291070795e-08, + "loss": 0.10070514678955078, + "step": 7063 + }, + { + "epoch": 0.9843238347383823, + "grad_norm": 0.47662314772605896, + "learning_rate": 2.8867198530344587e-08, + "loss": 0.09008407592773438, + "step": 7064 + }, + { + "epoch": 0.9844631784295966, + "grad_norm": 0.5034756660461426, + "learning_rate": 2.836309689937755e-08, + "loss": 0.08761787414550781, + "step": 7065 + }, + { + "epoch": 0.984602522120811, + "grad_norm": 0.49407002329826355, + "learning_rate": 2.7863432510168988e-08, + "loss": 0.0803670883178711, + "step": 7066 + }, + { + "epoch": 0.9847418658120254, + "grad_norm": 0.42134276032447815, + "learning_rate": 2.7368205473734532e-08, + "loss": 0.08095455169677734, + "step": 7067 + }, + { + "epoch": 0.9848812095032398, + "grad_norm": 0.42246201634407043, + "learning_rate": 2.6877415900103955e-08, + "loss": 0.08166122436523438, + "step": 7068 + }, + { + "epoch": 0.9850205531944541, + "grad_norm": 0.4630609154701233, + "learning_rate": 2.6391063898314474e-08, + "loss": 0.08371543884277344, + "step": 7069 + }, + { + "epoch": 0.9851598968856685, + "grad_norm": 0.6231539249420166, + "learning_rate": 2.5909149576428537e-08, + "loss": 0.10285472869873047, + "step": 7070 + }, + { + "epoch": 0.9852992405768829, + "grad_norm": 0.5963087677955627, + "learning_rate": 2.5431673041509396e-08, + "loss": 0.11695671081542969, + "step": 7071 + }, + { + "epoch": 0.9854385842680973, + "grad_norm": 0.32864001393318176, + "learning_rate": 2.495863439964774e-08, + "loss": 0.07395362854003906, + "step": 7072 + }, + { + "epoch": 0.9855779279593117, + "grad_norm": 0.391567200422287, + "learning_rate": 2.44900337559395e-08, + "loss": 0.0840444564819336, + "step": 7073 + }, + { + "epoch": 0.985717271650526, + "grad_norm": 0.8476215600967407, + "learning_rate": 2.4025871214496954e-08, + "loss": 0.10869979858398438, + "step": 7074 + }, + { + "epoch": 0.9858566153417404, + "grad_norm": 0.4333758056163788, + "learning_rate": 2.3566146878446495e-08, + "loss": 0.07245683670043945, + "step": 7075 + }, + { + "epoch": 0.9859959590329548, + "grad_norm": 0.5221754312515259, + "learning_rate": 2.311086084992864e-08, + "loss": 0.10821342468261719, + "step": 7076 + }, + { + "epoch": 0.9861353027241692, + "grad_norm": 0.4940152168273926, + "learning_rate": 2.2660013230098032e-08, + "loss": 0.08906173706054688, + "step": 7077 + }, + { + "epoch": 0.9862746464153835, + "grad_norm": 0.3899872899055481, + "learning_rate": 2.2213604119121214e-08, + "loss": 0.07811164855957031, + "step": 7078 + }, + { + "epoch": 0.9864139901065979, + "grad_norm": 0.7278218269348145, + "learning_rate": 2.1771633616181066e-08, + "loss": 0.09787428379058838, + "step": 7079 + }, + { + "epoch": 0.9865533337978123, + "grad_norm": 0.39910638332366943, + "learning_rate": 2.1334101819472375e-08, + "loss": 0.08283233642578125, + "step": 7080 + }, + { + "epoch": 0.9866926774890267, + "grad_norm": 0.3585379719734192, + "learning_rate": 2.0901008826206272e-08, + "loss": 0.08247566223144531, + "step": 7081 + }, + { + "epoch": 0.986832021180241, + "grad_norm": 0.5152358412742615, + "learning_rate": 2.047235473260578e-08, + "loss": 0.08919429779052734, + "step": 7082 + }, + { + "epoch": 0.9869713648714554, + "grad_norm": 0.5050048232078552, + "learning_rate": 2.004813963390584e-08, + "loss": 0.08233642578125, + "step": 7083 + }, + { + "epoch": 0.9871107085626698, + "grad_norm": 0.3855263888835907, + "learning_rate": 1.9628363624362155e-08, + "loss": 0.08159542083740234, + "step": 7084 + }, + { + "epoch": 0.9872500522538842, + "grad_norm": 0.3982451856136322, + "learning_rate": 1.921302679723569e-08, + "loss": 0.07309150695800781, + "step": 7085 + }, + { + "epoch": 0.9873893959450986, + "grad_norm": 0.5257668495178223, + "learning_rate": 1.8802129244803735e-08, + "loss": 0.08589744567871094, + "step": 7086 + }, + { + "epoch": 0.9875287396363129, + "grad_norm": 0.6316283345222473, + "learning_rate": 1.839567105836215e-08, + "loss": 0.10360145568847656, + "step": 7087 + }, + { + "epoch": 0.9876680833275273, + "grad_norm": 0.34859439730644226, + "learning_rate": 1.7993652328214263e-08, + "loss": 0.0782318115234375, + "step": 7088 + }, + { + "epoch": 0.9878074270187417, + "grad_norm": 0.7264302372932434, + "learning_rate": 1.7596073143677505e-08, + "loss": 0.10284423828125, + "step": 7089 + }, + { + "epoch": 0.9879467707099561, + "grad_norm": 0.5563429594039917, + "learning_rate": 1.720293359309011e-08, + "loss": 0.08594250679016113, + "step": 7090 + }, + { + "epoch": 0.9880861144011704, + "grad_norm": 0.6004126071929932, + "learning_rate": 1.681423376379554e-08, + "loss": 0.09902381896972656, + "step": 7091 + }, + { + "epoch": 0.9882254580923848, + "grad_norm": 0.5610800385475159, + "learning_rate": 1.6429973742153606e-08, + "loss": 0.08568572998046875, + "step": 7092 + }, + { + "epoch": 0.9883648017835992, + "grad_norm": 0.4559021294116974, + "learning_rate": 1.6050153613538234e-08, + "loss": 0.07840156555175781, + "step": 7093 + }, + { + "epoch": 0.9885041454748136, + "grad_norm": 0.5221399664878845, + "learning_rate": 1.567477346233748e-08, + "loss": 0.10094070434570312, + "step": 7094 + }, + { + "epoch": 0.988643489166028, + "grad_norm": 0.50816410779953, + "learning_rate": 1.5303833371953514e-08, + "loss": 0.08942604064941406, + "step": 7095 + }, + { + "epoch": 0.9887828328572423, + "grad_norm": 0.3962925672531128, + "learning_rate": 1.4937333424798194e-08, + "loss": 0.08453369140625, + "step": 7096 + }, + { + "epoch": 0.9889221765484568, + "grad_norm": 0.34309330582618713, + "learning_rate": 1.457527370230194e-08, + "loss": 0.07164764404296875, + "step": 7097 + }, + { + "epoch": 0.9890615202396712, + "grad_norm": 0.47285687923431396, + "learning_rate": 1.4217654284904846e-08, + "loss": 0.0832662582397461, + "step": 7098 + }, + { + "epoch": 0.9892008639308856, + "grad_norm": 0.5277454257011414, + "learning_rate": 1.3864475252058918e-08, + "loss": 0.09312248229980469, + "step": 7099 + }, + { + "epoch": 0.9893402076221, + "grad_norm": 0.47882428765296936, + "learning_rate": 1.3515736682239156e-08, + "loss": 0.08303546905517578, + "step": 7100 + }, + { + "epoch": 0.9894795513133143, + "grad_norm": 0.43358054757118225, + "learning_rate": 1.3171438652921365e-08, + "loss": 0.07584762573242188, + "step": 7101 + }, + { + "epoch": 0.9896188950045287, + "grad_norm": 0.39095717668533325, + "learning_rate": 1.2831581240602131e-08, + "loss": 0.08746147155761719, + "step": 7102 + }, + { + "epoch": 0.9897582386957431, + "grad_norm": 0.5316935777664185, + "learning_rate": 1.2496164520792165e-08, + "loss": 0.10240554809570312, + "step": 7103 + }, + { + "epoch": 0.9898975823869575, + "grad_norm": 0.508830189704895, + "learning_rate": 1.2165188568011854e-08, + "loss": 0.07275009155273438, + "step": 7104 + }, + { + "epoch": 0.9900369260781718, + "grad_norm": 0.4272843897342682, + "learning_rate": 1.1838653455797933e-08, + "loss": 0.08353996276855469, + "step": 7105 + }, + { + "epoch": 0.9901762697693862, + "grad_norm": 0.4685605764389038, + "learning_rate": 1.1516559256694594e-08, + "loss": 0.08686065673828125, + "step": 7106 + }, + { + "epoch": 0.9903156134606006, + "grad_norm": 0.48946720361709595, + "learning_rate": 1.1198906042269032e-08, + "loss": 0.10035133361816406, + "step": 7107 + }, + { + "epoch": 0.990454957151815, + "grad_norm": 0.524306058883667, + "learning_rate": 1.0885693883093685e-08, + "loss": 0.07447242736816406, + "step": 7108 + }, + { + "epoch": 0.9905943008430294, + "grad_norm": 0.35127028822898865, + "learning_rate": 1.0576922848759552e-08, + "loss": 0.07534599304199219, + "step": 7109 + }, + { + "epoch": 0.9907336445342437, + "grad_norm": 0.38399893045425415, + "learning_rate": 1.0272593007865094e-08, + "loss": 0.08558082580566406, + "step": 7110 + }, + { + "epoch": 0.9908729882254581, + "grad_norm": 0.35737910866737366, + "learning_rate": 9.972704428027335e-09, + "loss": 0.07888603210449219, + "step": 7111 + }, + { + "epoch": 0.9910123319166725, + "grad_norm": 0.599956750869751, + "learning_rate": 9.677257175875199e-09, + "loss": 0.09619522094726562, + "step": 7112 + }, + { + "epoch": 0.9911516756078869, + "grad_norm": 0.7487927079200745, + "learning_rate": 9.38625131704951e-09, + "loss": 0.09790229797363281, + "step": 7113 + }, + { + "epoch": 0.9912910192991012, + "grad_norm": 0.5637513995170593, + "learning_rate": 9.099686916205219e-09, + "loss": 0.10332298278808594, + "step": 7114 + }, + { + "epoch": 0.9914303629903156, + "grad_norm": 0.485030859708786, + "learning_rate": 8.817564037009174e-09, + "loss": 0.08054828643798828, + "step": 7115 + }, + { + "epoch": 0.99156970668153, + "grad_norm": 0.7538532614707947, + "learning_rate": 8.539882742146788e-09, + "loss": 0.12087631225585938, + "step": 7116 + }, + { + "epoch": 0.9917090503727444, + "grad_norm": 0.4601266384124756, + "learning_rate": 8.266643093306492e-09, + "loss": 0.08521366119384766, + "step": 7117 + }, + { + "epoch": 0.9918483940639587, + "grad_norm": 0.5605766773223877, + "learning_rate": 7.997845151201944e-09, + "loss": 0.10391712188720703, + "step": 7118 + }, + { + "epoch": 0.9919877377551731, + "grad_norm": 0.42981472611427307, + "learning_rate": 7.733488975549819e-09, + "loss": 0.09618759155273438, + "step": 7119 + }, + { + "epoch": 0.9921270814463875, + "grad_norm": 0.4218016564846039, + "learning_rate": 7.473574625085355e-09, + "loss": 0.08344650268554688, + "step": 7120 + }, + { + "epoch": 0.9922664251376019, + "grad_norm": 0.6114040017127991, + "learning_rate": 7.218102157555695e-09, + "loss": 0.10653877258300781, + "step": 7121 + }, + { + "epoch": 0.9924057688288163, + "grad_norm": 0.4633100926876068, + "learning_rate": 6.967071629719879e-09, + "loss": 0.08092379570007324, + "step": 7122 + }, + { + "epoch": 0.9925451125200306, + "grad_norm": 0.40838223695755005, + "learning_rate": 6.720483097353292e-09, + "loss": 0.08192634582519531, + "step": 7123 + }, + { + "epoch": 0.992684456211245, + "grad_norm": 0.4206674098968506, + "learning_rate": 6.478336615238779e-09, + "loss": 0.08394622802734375, + "step": 7124 + }, + { + "epoch": 0.9928237999024594, + "grad_norm": 0.7364654541015625, + "learning_rate": 6.240632237179967e-09, + "loss": 0.08516120910644531, + "step": 7125 + }, + { + "epoch": 0.9929631435936738, + "grad_norm": 0.5324241518974304, + "learning_rate": 6.0073700159879455e-09, + "loss": 0.08316802978515625, + "step": 7126 + }, + { + "epoch": 0.9931024872848881, + "grad_norm": 0.5486480593681335, + "learning_rate": 5.778550003485706e-09, + "loss": 0.09218978881835938, + "step": 7127 + }, + { + "epoch": 0.9932418309761025, + "grad_norm": 0.41748836636543274, + "learning_rate": 5.5541722505148e-09, + "loss": 0.08418083190917969, + "step": 7128 + }, + { + "epoch": 0.9933811746673169, + "grad_norm": 0.3808891773223877, + "learning_rate": 5.334236806926462e-09, + "loss": 0.07963752746582031, + "step": 7129 + }, + { + "epoch": 0.9935205183585313, + "grad_norm": 0.4138103723526001, + "learning_rate": 5.11874372158383e-09, + "loss": 0.07788276672363281, + "step": 7130 + }, + { + "epoch": 0.9936598620497457, + "grad_norm": 0.3381500840187073, + "learning_rate": 4.907693042366379e-09, + "loss": 0.05954456329345703, + "step": 7131 + }, + { + "epoch": 0.99379920574096, + "grad_norm": 0.7284276485443115, + "learning_rate": 4.70108481616327e-09, + "loss": 0.10182762145996094, + "step": 7132 + }, + { + "epoch": 0.9939385494321744, + "grad_norm": 0.4952135682106018, + "learning_rate": 4.498919088877784e-09, + "loss": 0.0878591537475586, + "step": 7133 + }, + { + "epoch": 0.9940778931233888, + "grad_norm": 0.4101082682609558, + "learning_rate": 4.301195905427324e-09, + "loss": 0.08599662780761719, + "step": 7134 + }, + { + "epoch": 0.9942172368146032, + "grad_norm": 0.5030078291893005, + "learning_rate": 4.107915309743416e-09, + "loss": 0.10036087036132812, + "step": 7135 + }, + { + "epoch": 0.9943565805058175, + "grad_norm": 0.45293694734573364, + "learning_rate": 3.919077344765043e-09, + "loss": 0.08067703247070312, + "step": 7136 + }, + { + "epoch": 0.994495924197032, + "grad_norm": 0.42211294174194336, + "learning_rate": 3.734682052449756e-09, + "loss": 0.08383941650390625, + "step": 7137 + }, + { + "epoch": 0.9946352678882464, + "grad_norm": 0.5162994265556335, + "learning_rate": 3.5547294737670046e-09, + "loss": 0.08991670608520508, + "step": 7138 + }, + { + "epoch": 0.9947746115794608, + "grad_norm": 0.3973909020423889, + "learning_rate": 3.3792196486959193e-09, + "loss": 0.08155059814453125, + "step": 7139 + }, + { + "epoch": 0.9949139552706752, + "grad_norm": 0.5320663452148438, + "learning_rate": 3.2081526162319744e-09, + "loss": 0.0913991928100586, + "step": 7140 + }, + { + "epoch": 0.9950532989618895, + "grad_norm": 0.38402384519577026, + "learning_rate": 3.0415284143803234e-09, + "loss": 0.08519649505615234, + "step": 7141 + }, + { + "epoch": 0.9951926426531039, + "grad_norm": 0.5145822167396545, + "learning_rate": 2.879347080164685e-09, + "loss": 0.08051776885986328, + "step": 7142 + }, + { + "epoch": 0.9953319863443183, + "grad_norm": 0.5806477665901184, + "learning_rate": 2.721608649614016e-09, + "loss": 0.09140396118164062, + "step": 7143 + }, + { + "epoch": 0.9954713300355327, + "grad_norm": 0.501900851726532, + "learning_rate": 2.5683131577780574e-09, + "loss": 0.09831428527832031, + "step": 7144 + }, + { + "epoch": 0.995610673726747, + "grad_norm": 0.5028826594352722, + "learning_rate": 2.4194606387140106e-09, + "loss": 0.09189796447753906, + "step": 7145 + }, + { + "epoch": 0.9957500174179614, + "grad_norm": 0.39212867617607117, + "learning_rate": 2.275051125490979e-09, + "loss": 0.08453369140625, + "step": 7146 + }, + { + "epoch": 0.9958893611091758, + "grad_norm": 0.5050148367881775, + "learning_rate": 2.1350846501988487e-09, + "loss": 0.0884552001953125, + "step": 7147 + }, + { + "epoch": 0.9960287048003902, + "grad_norm": 0.5020392537117004, + "learning_rate": 1.9995612439305255e-09, + "loss": 0.08473587036132812, + "step": 7148 + }, + { + "epoch": 0.9961680484916046, + "grad_norm": 0.638524055480957, + "learning_rate": 1.8684809367974786e-09, + "loss": 0.10556602478027344, + "step": 7149 + }, + { + "epoch": 0.9963073921828189, + "grad_norm": 0.7545373439788818, + "learning_rate": 1.741843757920858e-09, + "loss": 0.11260414123535156, + "step": 7150 + }, + { + "epoch": 0.9964467358740333, + "grad_norm": 0.44193553924560547, + "learning_rate": 1.6196497354403761e-09, + "loss": 0.08049964904785156, + "step": 7151 + }, + { + "epoch": 0.9965860795652477, + "grad_norm": 0.5209641456604004, + "learning_rate": 1.5018988965032067e-09, + "loss": 0.08657073974609375, + "step": 7152 + }, + { + "epoch": 0.9967254232564621, + "grad_norm": 0.4961441457271576, + "learning_rate": 1.3885912672706447e-09, + "loss": 0.08486175537109375, + "step": 7153 + }, + { + "epoch": 0.9968647669476765, + "grad_norm": 0.4200379252433777, + "learning_rate": 1.279726872918108e-09, + "loss": 0.08141803741455078, + "step": 7154 + }, + { + "epoch": 0.9970041106388908, + "grad_norm": 0.43419206142425537, + "learning_rate": 1.1753057376306942e-09, + "loss": 0.08554649353027344, + "step": 7155 + }, + { + "epoch": 0.9971434543301052, + "grad_norm": 0.5242231488227844, + "learning_rate": 1.0753278846076242e-09, + "loss": 0.09805488586425781, + "step": 7156 + }, + { + "epoch": 0.9972827980213196, + "grad_norm": 0.4818689227104187, + "learning_rate": 9.79793336066681e-10, + "loss": 0.09085845947265625, + "step": 7157 + }, + { + "epoch": 0.997422141712534, + "grad_norm": 0.4554215669631958, + "learning_rate": 8.887021132286677e-10, + "loss": 0.07626914978027344, + "step": 7158 + }, + { + "epoch": 0.9975614854037483, + "grad_norm": 0.44219473004341125, + "learning_rate": 8.020542363329497e-10, + "loss": 0.08272361755371094, + "step": 7159 + }, + { + "epoch": 0.9977008290949627, + "grad_norm": 0.5290684700012207, + "learning_rate": 7.198497246307945e-10, + "loss": 0.08831405639648438, + "step": 7160 + }, + { + "epoch": 0.9978401727861771, + "grad_norm": 0.3731153905391693, + "learning_rate": 6.420885963875912e-10, + "loss": 0.0826120376586914, + "step": 7161 + }, + { + "epoch": 0.9979795164773915, + "grad_norm": 0.5223524570465088, + "learning_rate": 5.687708688806304e-10, + "loss": 0.09847068786621094, + "step": 7162 + }, + { + "epoch": 0.9981188601686058, + "grad_norm": 0.6787335276603699, + "learning_rate": 4.99896558396884e-10, + "loss": 0.10584735870361328, + "step": 7163 + }, + { + "epoch": 0.9982582038598202, + "grad_norm": 0.5365256667137146, + "learning_rate": 4.3546568023966616e-10, + "loss": 0.0966796875, + "step": 7164 + }, + { + "epoch": 0.9983975475510346, + "grad_norm": 0.3727641701698303, + "learning_rate": 3.754782487241926e-10, + "loss": 0.08203315734863281, + "step": 7165 + }, + { + "epoch": 0.998536891242249, + "grad_norm": 0.5522305965423584, + "learning_rate": 3.1993427717758043e-10, + "loss": 0.09334754943847656, + "step": 7166 + }, + { + "epoch": 0.9986762349334634, + "grad_norm": 0.3667144179344177, + "learning_rate": 2.688337779410688e-10, + "loss": 0.07984352111816406, + "step": 7167 + }, + { + "epoch": 0.9988155786246777, + "grad_norm": 0.3851466476917267, + "learning_rate": 2.2217676237001883e-10, + "loss": 0.07594966888427734, + "step": 7168 + }, + { + "epoch": 0.9989549223158921, + "grad_norm": 0.6921710968017578, + "learning_rate": 1.7996324082725224e-10, + "loss": 0.113525390625, + "step": 7169 + }, + { + "epoch": 0.9990942660071065, + "grad_norm": 0.4964178204536438, + "learning_rate": 1.4219322269193313e-10, + "loss": 0.08576393127441406, + "step": 7170 + }, + { + "epoch": 0.9992336096983209, + "grad_norm": 0.45557501912117004, + "learning_rate": 1.0886671635956803e-10, + "loss": 0.08455753326416016, + "step": 7171 + }, + { + "epoch": 0.9993729533895352, + "grad_norm": 0.4526277184486389, + "learning_rate": 7.998372922868313e-11, + "loss": 0.06916618347167969, + "step": 7172 + }, + { + "epoch": 0.9995122970807496, + "grad_norm": 0.4647752642631531, + "learning_rate": 5.554426772080845e-11, + "loss": 0.09052038192749023, + "step": 7173 + }, + { + "epoch": 0.999651640771964, + "grad_norm": 0.4091443121433258, + "learning_rate": 3.554833726493456e-11, + "loss": 0.06570053100585938, + "step": 7174 + }, + { + "epoch": 0.9997909844631784, + "grad_norm": 0.4803467094898224, + "learning_rate": 1.999594230417401e-11, + "loss": 0.08111190795898438, + "step": 7175 + }, + { + "epoch": 0.9999303281543928, + "grad_norm": 1.149819254875183, + "learning_rate": 8.887086291320401e-12, + "loss": 0.11686134338378906, + "step": 7176 + }, + { + "epoch": 1.0, + "grad_norm": 0.9517463445663452, + "learning_rate": 2.2217716955097445e-12, + "loss": 0.11278915405273438, + "step": 7177 + }, + { + "epoch": 1.0, + "step": 7177, + "total_flos": 5.326107668090192e+19, + "train_loss": 0.1045213177918524, + "train_runtime": 73736.8908, + "train_samples_per_second": 24.915, + "train_steps_per_second": 0.097 + } + ], + "logging_steps": 1.0, + "max_steps": 7177, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 5.326107668090192e+19, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}