| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 200.0, |
| "global_step": 5205, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00019212295869356388, |
| "grad_norm": 41.0853157043457, |
| "learning_rate": 1.1494252873563217e-06, |
| "loss": 10.62430191040039, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0009606147934678194, |
| "grad_norm": 40.41545867919922, |
| "learning_rate": 5.747126436781608e-06, |
| "loss": 10.600605010986328, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0019212295869356388, |
| "grad_norm": 24.825101852416992, |
| "learning_rate": 1.1494252873563217e-05, |
| "loss": 9.892306518554687, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.002881844380403458, |
| "grad_norm": 14.558918952941895, |
| "learning_rate": 1.7241379310344825e-05, |
| "loss": 8.44493408203125, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0038424591738712775, |
| "grad_norm": 7.1786956787109375, |
| "learning_rate": 2.2988505747126433e-05, |
| "loss": 7.345828247070313, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.004803073967339097, |
| "grad_norm": 4.3571858406066895, |
| "learning_rate": 2.8735632183908045e-05, |
| "loss": 6.655020141601563, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.005763688760806916, |
| "grad_norm": 2.342735767364502, |
| "learning_rate": 3.448275862068965e-05, |
| "loss": 6.164151000976562, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0067243035542747355, |
| "grad_norm": 2.570441246032715, |
| "learning_rate": 4.022988505747126e-05, |
| "loss": 5.778363800048828, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.007684918347742555, |
| "grad_norm": 2.7692394256591797, |
| "learning_rate": 4.5977011494252866e-05, |
| "loss": 5.467860412597656, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.008645533141210375, |
| "grad_norm": 0.8302366137504578, |
| "learning_rate": 5.172413793103448e-05, |
| "loss": 5.22553825378418, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.009606147934678195, |
| "grad_norm": 3.4278295040130615, |
| "learning_rate": 5.747126436781609e-05, |
| "loss": 5.0273689270019535, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.010566762728146013, |
| "grad_norm": 3.7562148571014404, |
| "learning_rate": 6.32183908045977e-05, |
| "loss": 4.888296890258789, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.011527377521613832, |
| "grad_norm": 3.4503631591796875, |
| "learning_rate": 6.89655172413793e-05, |
| "loss": 4.772652435302734, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.012487992315081652, |
| "grad_norm": 2.543815851211548, |
| "learning_rate": 7.471264367816091e-05, |
| "loss": 4.681902313232422, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.013448607108549471, |
| "grad_norm": 3.03908371925354, |
| "learning_rate": 8.045977011494252e-05, |
| "loss": 4.596535873413086, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.01440922190201729, |
| "grad_norm": 2.9536008834838867, |
| "learning_rate": 8.620689655172413e-05, |
| "loss": 4.515168380737305, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.01536983669548511, |
| "grad_norm": 2.8714489936828613, |
| "learning_rate": 9.195402298850573e-05, |
| "loss": 4.442340850830078, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.01633045148895293, |
| "grad_norm": 2.1564369201660156, |
| "learning_rate": 9.770114942528733e-05, |
| "loss": 4.369157028198242, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.01729106628242075, |
| "grad_norm": 1.5947808027267456, |
| "learning_rate": 0.00010344827586206896, |
| "loss": 4.297710418701172, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.01825168107588857, |
| "grad_norm": 1.6778233051300049, |
| "learning_rate": 0.00010919540229885056, |
| "loss": 4.230863571166992, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.01921229586935639, |
| "grad_norm": 1.724900245666504, |
| "learning_rate": 0.00011494252873563218, |
| "loss": 4.168477249145508, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.020172910662824207, |
| "grad_norm": 1.8800899982452393, |
| "learning_rate": 0.00012068965517241378, |
| "loss": 4.10834846496582, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.021133525456292025, |
| "grad_norm": 1.567356824874878, |
| "learning_rate": 0.0001264367816091954, |
| "loss": 4.051171493530274, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.022094140249759846, |
| "grad_norm": 2.2714192867279053, |
| "learning_rate": 0.000132183908045977, |
| "loss": 4.001234817504883, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.023054755043227664, |
| "grad_norm": 2.6298563480377197, |
| "learning_rate": 0.0001379310344827586, |
| "loss": 3.9564029693603517, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.024015369836695485, |
| "grad_norm": 2.0561466217041016, |
| "learning_rate": 0.0001436781609195402, |
| "loss": 3.9216072082519533, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.024975984630163303, |
| "grad_norm": 2.2524895668029785, |
| "learning_rate": 0.00014942528735632183, |
| "loss": 3.8848548889160157, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.025936599423631124, |
| "grad_norm": 1.8565645217895508, |
| "learning_rate": 0.00015517241379310346, |
| "loss": 3.8477012634277346, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.026897214217098942, |
| "grad_norm": 2.007159948348999, |
| "learning_rate": 0.00016091954022988503, |
| "loss": 3.8230205535888673, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.027857829010566763, |
| "grad_norm": 2.5089001655578613, |
| "learning_rate": 0.00016666666666666666, |
| "loss": 3.790781784057617, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.02881844380403458, |
| "grad_norm": 1.7888845205307007, |
| "learning_rate": 0.00017241379310344826, |
| "loss": 3.7702369689941406, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.029779058597502402, |
| "grad_norm": 1.5350267887115479, |
| "learning_rate": 0.00017816091954022986, |
| "loss": 3.752313995361328, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.03073967339097022, |
| "grad_norm": 2.6425604820251465, |
| "learning_rate": 0.00018390804597701147, |
| "loss": 3.736890411376953, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.03170028818443804, |
| "grad_norm": 2.1871745586395264, |
| "learning_rate": 0.0001896551724137931, |
| "loss": 3.7081199645996095, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.03266090297790586, |
| "grad_norm": 2.301276683807373, |
| "learning_rate": 0.00019540229885057467, |
| "loss": 3.6886245727539064, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.03362151777137368, |
| "grad_norm": 2.3541669845581055, |
| "learning_rate": 0.0002011494252873563, |
| "loss": 3.6742767333984374, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.0345821325648415, |
| "grad_norm": 2.193776845932007, |
| "learning_rate": 0.00020689655172413793, |
| "loss": 3.654471588134766, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.03554274735830932, |
| "grad_norm": 2.2675869464874268, |
| "learning_rate": 0.00021264367816091953, |
| "loss": 3.634075164794922, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.03650336215177714, |
| "grad_norm": 2.262054920196533, |
| "learning_rate": 0.00021839080459770113, |
| "loss": 3.623701477050781, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.037463976945244955, |
| "grad_norm": 2.4970099925994873, |
| "learning_rate": 0.00022413793103448273, |
| "loss": 3.608829116821289, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.03842459173871278, |
| "grad_norm": 2.580589532852173, |
| "learning_rate": 0.00022988505747126436, |
| "loss": 3.594409942626953, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0393852065321806, |
| "grad_norm": 2.5698821544647217, |
| "learning_rate": 0.00023563218390804593, |
| "loss": 3.589406967163086, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.040345821325648415, |
| "grad_norm": 1.9148350954055786, |
| "learning_rate": 0.00024137931034482756, |
| "loss": 3.5731269836425783, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.04130643611911623, |
| "grad_norm": 2.6525511741638184, |
| "learning_rate": 0.00024712643678160916, |
| "loss": 3.5579261779785156, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.04226705091258405, |
| "grad_norm": 2.4932427406311035, |
| "learning_rate": 0.0002528735632183908, |
| "loss": 3.5460506439208985, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.043227665706051875, |
| "grad_norm": 1.522286295890808, |
| "learning_rate": 0.00025862068965517237, |
| "loss": 3.534061813354492, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.04418828049951969, |
| "grad_norm": 2.077308177947998, |
| "learning_rate": 0.000264367816091954, |
| "loss": 3.519297790527344, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.04514889529298751, |
| "grad_norm": 2.2139241695404053, |
| "learning_rate": 0.0002701149425287356, |
| "loss": 3.508769226074219, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.04610951008645533, |
| "grad_norm": 2.71065616607666, |
| "learning_rate": 0.0002758620689655172, |
| "loss": 3.4990470886230467, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.04707012487992315, |
| "grad_norm": 1.8673561811447144, |
| "learning_rate": 0.00028160919540229883, |
| "loss": 3.492870330810547, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.04803073967339097, |
| "grad_norm": 2.2589685916900635, |
| "learning_rate": 0.0002873563218390804, |
| "loss": 3.482229232788086, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.04899135446685879, |
| "grad_norm": 1.6717121601104736, |
| "learning_rate": 0.00029310344827586203, |
| "loss": 3.4728935241699217, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.049951969260326606, |
| "grad_norm": 2.6019248962402344, |
| "learning_rate": 0.00029885057471264366, |
| "loss": 3.4658123016357423, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.05091258405379443, |
| "grad_norm": 2.2234745025634766, |
| "learning_rate": 0.00029999951546647263, |
| "loss": 3.46058349609375, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.05187319884726225, |
| "grad_norm": 1.7633609771728516, |
| "learning_rate": 0.0002999975470543828, |
| "loss": 3.4448532104492187, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.052833813640730067, |
| "grad_norm": 1.698107361793518, |
| "learning_rate": 0.00029999406450023966, |
| "loss": 3.4391746520996094, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.053794428434197884, |
| "grad_norm": 2.3649418354034424, |
| "learning_rate": 0.0002999890678391978, |
| "loss": 3.4237644195556642, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.05475504322766571, |
| "grad_norm": 1.6354587078094482, |
| "learning_rate": 0.00029998255712169563, |
| "loss": 3.4203346252441404, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.05571565802113353, |
| "grad_norm": 2.191877841949463, |
| "learning_rate": 0.00029997453241345533, |
| "loss": 3.4129383087158205, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.056676272814601344, |
| "grad_norm": 2.1449062824249268, |
| "learning_rate": 0.0002999649937954818, |
| "loss": 3.4019508361816406, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.05763688760806916, |
| "grad_norm": 1.9247010946273804, |
| "learning_rate": 0.0002999539413640621, |
| "loss": 3.3923084259033205, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.05859750240153699, |
| "grad_norm": 1.6601442098617554, |
| "learning_rate": 0.0002999413752307644, |
| "loss": 3.382649230957031, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.059558117195004805, |
| "grad_norm": 1.656147837638855, |
| "learning_rate": 0.0002999272955224369, |
| "loss": 3.3772228240966795, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.06051873198847262, |
| "grad_norm": 1.9272291660308838, |
| "learning_rate": 0.0002999117023812064, |
| "loss": 3.3669418334960937, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.06147934678194044, |
| "grad_norm": 2.0251946449279785, |
| "learning_rate": 0.000299894595964477, |
| "loss": 3.3636070251464845, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.06243996157540826, |
| "grad_norm": 1.5922743082046509, |
| "learning_rate": 0.0002998759764449286, |
| "loss": 3.3583389282226563, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.06340057636887608, |
| "grad_norm": 2.2867417335510254, |
| "learning_rate": 0.0002998558440105148, |
| "loss": 3.3570938110351562, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.0643611911623439, |
| "grad_norm": 1.678760051727295, |
| "learning_rate": 0.0002998341988644614, |
| "loss": 3.3533226013183595, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.06532180595581172, |
| "grad_norm": 1.9134340286254883, |
| "learning_rate": 0.0002998110412252641, |
| "loss": 3.3417484283447267, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.06628242074927954, |
| "grad_norm": 1.3479728698730469, |
| "learning_rate": 0.0002997863713266866, |
| "loss": 3.3312728881835936, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.06724303554274735, |
| "grad_norm": 1.3297585248947144, |
| "learning_rate": 0.0002997601894177576, |
| "loss": 3.3266414642333983, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.06820365033621517, |
| "grad_norm": 1.5574193000793457, |
| "learning_rate": 0.00029973249576276914, |
| "loss": 3.321349334716797, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.069164265129683, |
| "grad_norm": 1.3063061237335205, |
| "learning_rate": 0.0002997032906412732, |
| "loss": 3.316680145263672, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.07012487992315082, |
| "grad_norm": 1.440643548965454, |
| "learning_rate": 0.0002996725743480793, |
| "loss": 3.3083744049072266, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.07108549471661864, |
| "grad_norm": 1.2425435781478882, |
| "learning_rate": 0.00029964034719325147, |
| "loss": 3.299941635131836, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.07204610951008646, |
| "grad_norm": 1.3375848531723022, |
| "learning_rate": 0.0002996066095021048, |
| "loss": 3.298897552490234, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.07300672430355427, |
| "grad_norm": 1.429042100906372, |
| "learning_rate": 0.0002995713616152028, |
| "loss": 3.2951019287109373, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.07396733909702209, |
| "grad_norm": 1.6015335321426392, |
| "learning_rate": 0.0002995346038883532, |
| "loss": 3.2871078491210937, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.07492795389048991, |
| "grad_norm": 1.444579005241394, |
| "learning_rate": 0.0002994963366926048, |
| "loss": 3.286525344848633, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.07588856868395773, |
| "grad_norm": 1.481871247291565, |
| "learning_rate": 0.0002994565604142439, |
| "loss": 3.2782516479492188, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.07684918347742556, |
| "grad_norm": 1.2840272188186646, |
| "learning_rate": 0.00029941527545478976, |
| "loss": 3.2710845947265623, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.07780979827089338, |
| "grad_norm": 1.2282651662826538, |
| "learning_rate": 0.00029937248223099136, |
| "loss": 3.2628021240234375, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.0787704130643612, |
| "grad_norm": 1.5256259441375732, |
| "learning_rate": 0.00029932818117482245, |
| "loss": 3.2642288208007812, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.07973102785782901, |
| "grad_norm": 1.4667640924453735, |
| "learning_rate": 0.0002992823727334776, |
| "loss": 3.2577728271484374, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.08069164265129683, |
| "grad_norm": 1.9893492460250854, |
| "learning_rate": 0.00029923505736936774, |
| "loss": 3.257119369506836, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.08165225744476465, |
| "grad_norm": 1.1091679334640503, |
| "learning_rate": 0.0002991862355601151, |
| "loss": 3.2528831481933596, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.08261287223823247, |
| "grad_norm": 1.278975486755371, |
| "learning_rate": 0.00029913590779854886, |
| "loss": 3.246588134765625, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.08357348703170028, |
| "grad_norm": 1.7972854375839233, |
| "learning_rate": 0.00029908407459269977, |
| "loss": 3.2438831329345703, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.0845341018251681, |
| "grad_norm": 1.242336392402649, |
| "learning_rate": 0.0002990307364657954, |
| "loss": 3.2402252197265624, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.08549471661863593, |
| "grad_norm": 1.3686480522155762, |
| "learning_rate": 0.0002989758939562545, |
| "loss": 3.234004592895508, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.08645533141210375, |
| "grad_norm": 1.6549354791641235, |
| "learning_rate": 0.0002989195476176818, |
| "loss": 3.231829833984375, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.08741594620557157, |
| "grad_norm": 1.1533397436141968, |
| "learning_rate": 0.00029886169801886237, |
| "loss": 3.2260894775390625, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.08837656099903939, |
| "grad_norm": 1.2408617734909058, |
| "learning_rate": 0.00029880234574375576, |
| "loss": 3.2213096618652344, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.0893371757925072, |
| "grad_norm": 1.3094635009765625, |
| "learning_rate": 0.00029874149139149037, |
| "loss": 3.218523406982422, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.09029779058597502, |
| "grad_norm": 1.352949619293213, |
| "learning_rate": 0.00029867913557635704, |
| "loss": 3.216180419921875, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.09125840537944284, |
| "grad_norm": 1.2964717149734497, |
| "learning_rate": 0.0002986152789278031, |
| "loss": 3.21021728515625, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.09221902017291066, |
| "grad_norm": 1.2936161756515503, |
| "learning_rate": 0.00029854992209042626, |
| "loss": 3.208134078979492, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.09317963496637849, |
| "grad_norm": 1.262890338897705, |
| "learning_rate": 0.0002984830657239673, |
| "loss": 3.2024818420410157, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.0941402497598463, |
| "grad_norm": 1.1409361362457275, |
| "learning_rate": 0.00029841471050330424, |
| "loss": 3.201049041748047, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.09510086455331412, |
| "grad_norm": 1.154974102973938, |
| "learning_rate": 0.00029834485711844515, |
| "loss": 3.1987125396728517, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.09606147934678194, |
| "grad_norm": 1.115403652191162, |
| "learning_rate": 0.00029827350627452116, |
| "loss": 3.1905792236328123, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.09702209414024976, |
| "grad_norm": 1.3029977083206177, |
| "learning_rate": 0.00029820065869177954, |
| "loss": 3.1972682952880858, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.09798270893371758, |
| "grad_norm": 1.2876211404800415, |
| "learning_rate": 0.0002981263151055762, |
| "loss": 3.190180206298828, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.0989433237271854, |
| "grad_norm": 1.4698253870010376, |
| "learning_rate": 0.0002980504762663683, |
| "loss": 3.183108901977539, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.09990393852065321, |
| "grad_norm": 1.4724935293197632, |
| "learning_rate": 0.0002979731429397071, |
| "loss": 3.1818157196044923, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.10086455331412104, |
| "grad_norm": 1.252737283706665, |
| "learning_rate": 0.0002978943159062295, |
| "loss": 3.178622245788574, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.10182516810758886, |
| "grad_norm": 1.3171908855438232, |
| "learning_rate": 0.0002978139959616507, |
| "loss": 3.1794586181640625, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.10278578290105668, |
| "grad_norm": 1.3921356201171875, |
| "learning_rate": 0.00029773218391675594, |
| "loss": 3.175571060180664, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.1037463976945245, |
| "grad_norm": 1.0483286380767822, |
| "learning_rate": 0.00029764888059739255, |
| "loss": 3.1715625762939452, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.10470701248799232, |
| "grad_norm": 1.0862990617752075, |
| "learning_rate": 0.00029756408684446136, |
| "loss": 3.173019218444824, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.10566762728146013, |
| "grad_norm": 1.3614447116851807, |
| "learning_rate": 0.0002974778035139081, |
| "loss": 3.166230583190918, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.10662824207492795, |
| "grad_norm": 1.0270849466323853, |
| "learning_rate": 0.00029739003147671536, |
| "loss": 3.1632837295532226, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.10758885686839577, |
| "grad_norm": 1.5262467861175537, |
| "learning_rate": 0.00029730077161889304, |
| "loss": 3.163615417480469, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.10854947166186359, |
| "grad_norm": 0.967074453830719, |
| "learning_rate": 0.00029721002484147, |
| "loss": 3.160854721069336, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.10951008645533142, |
| "grad_norm": 1.3244905471801758, |
| "learning_rate": 0.00029711779206048454, |
| "loss": 3.158466339111328, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.11047070124879924, |
| "grad_norm": 1.2831615209579468, |
| "learning_rate": 0.0002970240742069755, |
| "loss": 3.1545330047607423, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.11143131604226705, |
| "grad_norm": 0.985418975353241, |
| "learning_rate": 0.0002969288722269726, |
| "loss": 3.149937057495117, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.11239193083573487, |
| "grad_norm": 1.0786052942276, |
| "learning_rate": 0.000296832187081487, |
| "loss": 3.1523942947387695, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.11335254562920269, |
| "grad_norm": 1.0934644937515259, |
| "learning_rate": 0.0002967340197465017, |
| "loss": 3.1465457916259765, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.1143131604226705, |
| "grad_norm": 1.354055404663086, |
| "learning_rate": 0.00029663437121296146, |
| "loss": 3.1465686798095702, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.11527377521613832, |
| "grad_norm": 1.1991117000579834, |
| "learning_rate": 0.000296533242486763, |
| "loss": 3.143705368041992, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.11623439000960614, |
| "grad_norm": 1.2595206499099731, |
| "learning_rate": 0.0002964306345887447, |
| "loss": 3.1374244689941406, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.11719500480307397, |
| "grad_norm": 1.1941015720367432, |
| "learning_rate": 0.0002963265485546764, |
| "loss": 3.134844970703125, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.11815561959654179, |
| "grad_norm": 1.045292615890503, |
| "learning_rate": 0.00029622098543524884, |
| "loss": 3.1372697830200194, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.11911623439000961, |
| "grad_norm": 0.8975104093551636, |
| "learning_rate": 0.00029611394629606324, |
| "loss": 3.1340496063232424, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.12007684918347743, |
| "grad_norm": 0.8841443061828613, |
| "learning_rate": 0.0002960054322176204, |
| "loss": 3.12859992980957, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.12103746397694524, |
| "grad_norm": 1.4646399021148682, |
| "learning_rate": 0.0002958954442953096, |
| "loss": 3.1266860961914062, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.12199807877041306, |
| "grad_norm": 1.2528270483016968, |
| "learning_rate": 0.000295783983639398, |
| "loss": 3.127555274963379, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.12295869356388088, |
| "grad_norm": 1.0346133708953857, |
| "learning_rate": 0.00029567105137501916, |
| "loss": 3.123224639892578, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.1239193083573487, |
| "grad_norm": 1.1643197536468506, |
| "learning_rate": 0.00029555664864216156, |
| "loss": 3.1221992492675783, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.12487992315081652, |
| "grad_norm": 0.9598444700241089, |
| "learning_rate": 0.00029544077659565747, |
| "loss": 3.1172836303710936, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.12584053794428435, |
| "grad_norm": 1.193166732788086, |
| "learning_rate": 0.0002953234364051708, |
| "loss": 3.1188310623168944, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.12680115273775217, |
| "grad_norm": 1.3501605987548828, |
| "learning_rate": 0.00029520462925518575, |
| "loss": 3.113943099975586, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.12776176753121998, |
| "grad_norm": 1.1221381425857544, |
| "learning_rate": 0.00029508435634499467, |
| "loss": 3.113107109069824, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.1287223823246878, |
| "grad_norm": 1.2083373069763184, |
| "learning_rate": 0.00029496261888868586, |
| "loss": 3.11612491607666, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.12968299711815562, |
| "grad_norm": 1.0393755435943604, |
| "learning_rate": 0.0002948394181151314, |
| "loss": 3.109981155395508, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.13064361191162344, |
| "grad_norm": 1.1999260187149048, |
| "learning_rate": 0.0002947147552679748, |
| "loss": 3.1087677001953127, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.13160422670509125, |
| "grad_norm": 0.9760367274284363, |
| "learning_rate": 0.00029458863160561837, |
| "loss": 3.1072208404541017, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.13256484149855907, |
| "grad_norm": 1.0330339670181274, |
| "learning_rate": 0.0002944610484012105, |
| "loss": 3.1058883666992188, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.1335254562920269, |
| "grad_norm": 0.8262532353401184, |
| "learning_rate": 0.0002943320069426329, |
| "loss": 3.1005834579467773, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.1344860710854947, |
| "grad_norm": 1.139979362487793, |
| "learning_rate": 0.00029420150853248756, |
| "loss": 3.1009517669677735, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.13544668587896252, |
| "grad_norm": 1.2979532480239868, |
| "learning_rate": 0.0002940695544880836, |
| "loss": 3.0997783660888674, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.13640730067243034, |
| "grad_norm": 1.267760157585144, |
| "learning_rate": 0.0002939361461414238, |
| "loss": 3.0968265533447266, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.1373679154658982, |
| "grad_norm": 1.070842981338501, |
| "learning_rate": 0.0002938012848391915, |
| "loss": 3.0932445526123047, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.138328530259366, |
| "grad_norm": 1.2436041831970215, |
| "learning_rate": 0.0002936649719427367, |
| "loss": 3.0934431076049806, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.13928914505283382, |
| "grad_norm": 1.0946882963180542, |
| "learning_rate": 0.00029352720882806267, |
| "loss": 3.0919456481933594, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.14024975984630164, |
| "grad_norm": 1.0279921293258667, |
| "learning_rate": 0.00029338799688581146, |
| "loss": 3.090176010131836, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.14121037463976946, |
| "grad_norm": 1.1641135215759277, |
| "learning_rate": 0.00029324733752125054, |
| "loss": 3.087961196899414, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.14217098943323728, |
| "grad_norm": 1.176697850227356, |
| "learning_rate": 0.0002931052321542581, |
| "loss": 3.086262512207031, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.1431316042267051, |
| "grad_norm": 1.13466215133667, |
| "learning_rate": 0.00029296168221930904, |
| "loss": 3.0825126647949217, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.1440922190201729, |
| "grad_norm": 1.2085994482040405, |
| "learning_rate": 0.0002928166891654604, |
| "loss": 3.085866928100586, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.14505283381364073, |
| "grad_norm": 1.0957227945327759, |
| "learning_rate": 0.00029267025445633667, |
| "loss": 3.083575439453125, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.14601344860710855, |
| "grad_norm": 0.9977965354919434, |
| "learning_rate": 0.0002925223795701149, |
| "loss": 3.0803741455078124, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.14697406340057637, |
| "grad_norm": 1.1439604759216309, |
| "learning_rate": 0.00029237306599951007, |
| "loss": 3.0777074813842775, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.14793467819404418, |
| "grad_norm": 0.8976171612739563, |
| "learning_rate": 0.00029222231525176005, |
| "loss": 3.0794315338134766, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.148895292987512, |
| "grad_norm": 1.0934321880340576, |
| "learning_rate": 0.0002920701288486099, |
| "loss": 3.0764802932739257, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.14985590778097982, |
| "grad_norm": 1.088166356086731, |
| "learning_rate": 0.00029191650832629694, |
| "loss": 3.0753334045410154, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.15081652257444764, |
| "grad_norm": 0.9292176365852356, |
| "learning_rate": 0.00029176145523553517, |
| "loss": 3.0713483810424806, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.15177713736791545, |
| "grad_norm": 1.0206445455551147, |
| "learning_rate": 0.0002916049711414996, |
| "loss": 3.0717500686645507, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.15273775216138327, |
| "grad_norm": 1.208133578300476, |
| "learning_rate": 0.00029144705762381036, |
| "loss": 3.073202896118164, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.15369836695485112, |
| "grad_norm": 0.9913300275802612, |
| "learning_rate": 0.0002912877162765169, |
| "loss": 3.0660655975341795, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.15465898174831894, |
| "grad_norm": 1.1119699478149414, |
| "learning_rate": 0.00029112694870808155, |
| "loss": 3.067470169067383, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.15561959654178675, |
| "grad_norm": 1.0400080680847168, |
| "learning_rate": 0.00029096475654136395, |
| "loss": 3.0652034759521483, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.15658021133525457, |
| "grad_norm": 0.9247759580612183, |
| "learning_rate": 0.000290801141413604, |
| "loss": 3.061397361755371, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.1575408261287224, |
| "grad_norm": 1.060563564300537, |
| "learning_rate": 0.00029063610497640576, |
| "loss": 3.0618301391601563, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.1585014409221902, |
| "grad_norm": 1.137531042098999, |
| "learning_rate": 0.0002904696488957204, |
| "loss": 3.0617816925048826, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.15946205571565802, |
| "grad_norm": 1.0392354726791382, |
| "learning_rate": 0.0002903017748518298, |
| "loss": 3.053458404541016, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.16042267050912584, |
| "grad_norm": 0.9369018077850342, |
| "learning_rate": 0.0002901324845393294, |
| "loss": 3.0599037170410157, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.16138328530259366, |
| "grad_norm": 0.8582949638366699, |
| "learning_rate": 0.00028996177966711097, |
| "loss": 3.0585887908935545, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.16234390009606148, |
| "grad_norm": 0.9821586608886719, |
| "learning_rate": 0.0002897896619583455, |
| "loss": 3.0565303802490233, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.1633045148895293, |
| "grad_norm": 1.064107894897461, |
| "learning_rate": 0.0002896161331504659, |
| "loss": 3.0574161529541017, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.1642651296829971, |
| "grad_norm": 1.0095359086990356, |
| "learning_rate": 0.00028944119499514913, |
| "loss": 3.054368019104004, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.16522574447646493, |
| "grad_norm": 1.0314931869506836, |
| "learning_rate": 0.0002892648492582989, |
| "loss": 3.0471572875976562, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.16618635926993275, |
| "grad_norm": 1.1124778985977173, |
| "learning_rate": 0.00028908709772002765, |
| "loss": 3.0495674133300783, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.16714697406340057, |
| "grad_norm": 0.9910938143730164, |
| "learning_rate": 0.00028890794217463863, |
| "loss": 3.0501693725585937, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.16810758885686838, |
| "grad_norm": 1.0562894344329834, |
| "learning_rate": 0.0002887273844306076, |
| "loss": 3.0482528686523436, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.1690682036503362, |
| "grad_norm": 0.9755779504776001, |
| "learning_rate": 0.00028854542631056494, |
| "loss": 3.043923187255859, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.17002881844380405, |
| "grad_norm": 0.9010165333747864, |
| "learning_rate": 0.0002883620696512769, |
| "loss": 3.047307586669922, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.17098943323727187, |
| "grad_norm": 1.0329279899597168, |
| "learning_rate": 0.0002881773163036273, |
| "loss": 3.045383071899414, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.17195004803073968, |
| "grad_norm": 0.8573128581047058, |
| "learning_rate": 0.00028799116813259875, |
| "loss": 3.0434667587280275, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.1729106628242075, |
| "grad_norm": 1.0701560974121094, |
| "learning_rate": 0.0002878036270172538, |
| "loss": 3.0357574462890624, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.17387127761767532, |
| "grad_norm": 1.0385925769805908, |
| "learning_rate": 0.000287614694850716, |
| "loss": 3.0400657653808594, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.17483189241114314, |
| "grad_norm": 0.9062285423278809, |
| "learning_rate": 0.00028742437354015073, |
| "loss": 3.0386375427246093, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.17579250720461095, |
| "grad_norm": 1.2658556699752808, |
| "learning_rate": 0.0002872326650067462, |
| "loss": 3.0390193939208983, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.17675312199807877, |
| "grad_norm": 0.9829347729682922, |
| "learning_rate": 0.00028703957118569363, |
| "loss": 3.0389629364013673, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.1777137367915466, |
| "grad_norm": 1.0681240558624268, |
| "learning_rate": 0.000286845094026168, |
| "loss": 3.038488006591797, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.1786743515850144, |
| "grad_norm": 1.271577000617981, |
| "learning_rate": 0.0002866492354913086, |
| "loss": 3.037461853027344, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.17963496637848222, |
| "grad_norm": 0.898420512676239, |
| "learning_rate": 0.0002864519975581986, |
| "loss": 3.032963180541992, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.18059558117195004, |
| "grad_norm": 1.040523886680603, |
| "learning_rate": 0.0002862533822178456, |
| "loss": 3.032858657836914, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.18155619596541786, |
| "grad_norm": 0.9702991843223572, |
| "learning_rate": 0.00028605339147516113, |
| "loss": 3.029499816894531, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.18251681075888568, |
| "grad_norm": 1.0565072298049927, |
| "learning_rate": 0.00028585202734894105, |
| "loss": 3.030923843383789, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.1834774255523535, |
| "grad_norm": 0.9746853113174438, |
| "learning_rate": 0.00028564929187184447, |
| "loss": 3.0296430587768555, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.1844380403458213, |
| "grad_norm": 0.9929348826408386, |
| "learning_rate": 0.00028544518709037363, |
| "loss": 3.0282403945922853, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.18539865513928913, |
| "grad_norm": 0.8328250646591187, |
| "learning_rate": 0.000285239715064853, |
| "loss": 3.0228111267089846, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.18635926993275698, |
| "grad_norm": 1.216163158416748, |
| "learning_rate": 0.0002850328778694088, |
| "loss": 3.0275947570800783, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.1873198847262248, |
| "grad_norm": 1.077054500579834, |
| "learning_rate": 0.0002848246775919478, |
| "loss": 3.0228961944580077, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.1882804995196926, |
| "grad_norm": 1.0535924434661865, |
| "learning_rate": 0.0002846151163341364, |
| "loss": 3.024255561828613, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.18924111431316043, |
| "grad_norm": 1.0875251293182373, |
| "learning_rate": 0.0002844041962113792, |
| "loss": 3.0249141693115233, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.19020172910662825, |
| "grad_norm": 0.9994290471076965, |
| "learning_rate": 0.00028419191935279793, |
| "loss": 3.024821472167969, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.19116234390009607, |
| "grad_norm": 0.9676864743232727, |
| "learning_rate": 0.00028397828790120965, |
| "loss": 3.025034713745117, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.19212295869356388, |
| "grad_norm": 0.9725849628448486, |
| "learning_rate": 0.0002837633040131055, |
| "loss": 3.0168121337890623, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.1930835734870317, |
| "grad_norm": 0.8413275480270386, |
| "learning_rate": 0.00028354696985862865, |
| "loss": 3.017466735839844, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.19404418828049952, |
| "grad_norm": 0.8834673166275024, |
| "learning_rate": 0.00028332928762155225, |
| "loss": 3.0153526306152343, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.19500480307396734, |
| "grad_norm": 0.9116687774658203, |
| "learning_rate": 0.0002831102594992579, |
| "loss": 3.0169631958007814, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.19596541786743515, |
| "grad_norm": 0.9015905261039734, |
| "learning_rate": 0.00028288988770271297, |
| "loss": 3.015023422241211, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.19692603266090297, |
| "grad_norm": 1.0525202751159668, |
| "learning_rate": 0.00028266817445644855, |
| "loss": 3.014568901062012, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.1978866474543708, |
| "grad_norm": 0.9534981846809387, |
| "learning_rate": 0.0002824451219985369, |
| "loss": 3.0124256134033205, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.1988472622478386, |
| "grad_norm": 0.8009642362594604, |
| "learning_rate": 0.0002822207325805688, |
| "loss": 3.017258071899414, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.19980787704130643, |
| "grad_norm": 1.049578309059143, |
| "learning_rate": 0.00028199500846763116, |
| "loss": 3.0123516082763673, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.20076849183477424, |
| "grad_norm": 1.0571658611297607, |
| "learning_rate": 0.0002817679519382836, |
| "loss": 3.01287956237793, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.2017291066282421, |
| "grad_norm": 1.0068848133087158, |
| "learning_rate": 0.0002815395652845359, |
| "loss": 3.0150609970092774, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.2026897214217099, |
| "grad_norm": 1.0555028915405273, |
| "learning_rate": 0.0002813098508118247, |
| "loss": 3.0080333709716798, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.20365033621517772, |
| "grad_norm": 0.9432759881019592, |
| "learning_rate": 0.0002810788108389901, |
| "loss": 3.0080041885375977, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.20461095100864554, |
| "grad_norm": 0.8660106658935547, |
| "learning_rate": 0.0002808464476982526, |
| "loss": 3.0054351806640627, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.20557156580211336, |
| "grad_norm": 0.9244606494903564, |
| "learning_rate": 0.0002806127637351892, |
| "loss": 3.0057044982910157, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.20653218059558118, |
| "grad_norm": 1.1719539165496826, |
| "learning_rate": 0.00028037776130871, |
| "loss": 3.00616340637207, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.207492795389049, |
| "grad_norm": 1.2205188274383545, |
| "learning_rate": 0.00028014144279103406, |
| "loss": 3.0076831817626952, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.2084534101825168, |
| "grad_norm": 0.9898233413696289, |
| "learning_rate": 0.0002799038105676658, |
| "loss": 3.0033103942871096, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.20941402497598463, |
| "grad_norm": 0.9085561633110046, |
| "learning_rate": 0.00027966486703737066, |
| "loss": 3.0030406951904296, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.21037463976945245, |
| "grad_norm": 1.0487213134765625, |
| "learning_rate": 0.0002794246146121512, |
| "loss": 3.0055965423583983, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.21133525456292027, |
| "grad_norm": 0.7599855065345764, |
| "learning_rate": 0.0002791830557172224, |
| "loss": 3.0053455352783205, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.21229586935638808, |
| "grad_norm": 1.0096842050552368, |
| "learning_rate": 0.00027894019279098726, |
| "loss": 2.99503231048584, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.2132564841498559, |
| "grad_norm": 1.0883978605270386, |
| "learning_rate": 0.00027869602828501234, |
| "loss": 3.0012443542480467, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.21421709894332372, |
| "grad_norm": 0.8861981630325317, |
| "learning_rate": 0.00027845056466400297, |
| "loss": 2.9960763931274412, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.21517771373679154, |
| "grad_norm": 1.0259391069412231, |
| "learning_rate": 0.0002782038044057783, |
| "loss": 2.998910140991211, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.21613832853025935, |
| "grad_norm": 0.9595276117324829, |
| "learning_rate": 0.0002779557500012462, |
| "loss": 2.9986873626708985, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.21709894332372717, |
| "grad_norm": 1.0162408351898193, |
| "learning_rate": 0.0002777064039543784, |
| "loss": 2.9951988220214845, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.21805955811719502, |
| "grad_norm": 0.8509448766708374, |
| "learning_rate": 0.00027745576878218496, |
| "loss": 2.9969932556152346, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.21902017291066284, |
| "grad_norm": 0.7489306330680847, |
| "learning_rate": 0.0002772038470146888, |
| "loss": 2.9912147521972656, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.21998078770413065, |
| "grad_norm": 0.936427891254425, |
| "learning_rate": 0.0002769506411949007, |
| "loss": 2.993634033203125, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.22094140249759847, |
| "grad_norm": 1.020005464553833, |
| "learning_rate": 0.00027669615387879284, |
| "loss": 2.9922344207763674, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.2219020172910663, |
| "grad_norm": 0.9557084441184998, |
| "learning_rate": 0.0002764403876352736, |
| "loss": 2.9920581817626952, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.2228626320845341, |
| "grad_norm": 0.9452062845230103, |
| "learning_rate": 0.0002761833450461613, |
| "loss": 2.990475082397461, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.22382324687800192, |
| "grad_norm": 0.9600782990455627, |
| "learning_rate": 0.0002759250287061583, |
| "loss": 2.990601348876953, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.22478386167146974, |
| "grad_norm": 0.9427198767662048, |
| "learning_rate": 0.00027566544122282496, |
| "loss": 2.990622138977051, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.22574447646493756, |
| "grad_norm": 1.020228624343872, |
| "learning_rate": 0.0002754045852165529, |
| "loss": 2.988072967529297, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.22670509125840538, |
| "grad_norm": 0.8352153301239014, |
| "learning_rate": 0.00027514246332053876, |
| "loss": 2.9882789611816407, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.2276657060518732, |
| "grad_norm": 0.9119144678115845, |
| "learning_rate": 0.0002748790781807577, |
| "loss": 2.985960006713867, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.228626320845341, |
| "grad_norm": 1.0915721654891968, |
| "learning_rate": 0.0002746144324559368, |
| "loss": 2.9834156036376953, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.22958693563880883, |
| "grad_norm": 0.9779495000839233, |
| "learning_rate": 0.00027434852881752774, |
| "loss": 2.9862506866455076, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.23054755043227665, |
| "grad_norm": 0.7937590479850769, |
| "learning_rate": 0.0002740813699496804, |
| "loss": 2.984625244140625, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.23150816522574447, |
| "grad_norm": 0.8358086943626404, |
| "learning_rate": 0.0002738129585492153, |
| "loss": 2.979814910888672, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.23246878001921228, |
| "grad_norm": 0.9309784173965454, |
| "learning_rate": 0.0002735432973255967, |
| "loss": 2.9813312530517577, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.2334293948126801, |
| "grad_norm": 0.9012494683265686, |
| "learning_rate": 0.0002732723890009051, |
| "loss": 2.981847381591797, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.23439000960614795, |
| "grad_norm": 0.924193799495697, |
| "learning_rate": 0.00027300023630980985, |
| "loss": 2.985409164428711, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.23535062439961577, |
| "grad_norm": 0.9454841613769531, |
| "learning_rate": 0.00027272684199954137, |
| "loss": 2.98272819519043, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.23631123919308358, |
| "grad_norm": 0.8768509030342102, |
| "learning_rate": 0.0002724522088298637, |
| "loss": 2.981144332885742, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.2372718539865514, |
| "grad_norm": 0.9420005679130554, |
| "learning_rate": 0.0002721763395730462, |
| "loss": 2.9806352615356446, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.23823246878001922, |
| "grad_norm": 0.8248008489608765, |
| "learning_rate": 0.00027189923701383627, |
| "loss": 2.9782638549804688, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.23919308357348704, |
| "grad_norm": 0.7679121494293213, |
| "learning_rate": 0.0002716209039494304, |
| "loss": 2.9759841918945313, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.24015369836695485, |
| "grad_norm": 0.9788777828216553, |
| "learning_rate": 0.0002713413431894466, |
| "loss": 2.977429962158203, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.24111431316042267, |
| "grad_norm": 0.931688666343689, |
| "learning_rate": 0.00027106055755589566, |
| "loss": 2.9781124114990236, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.2420749279538905, |
| "grad_norm": 1.3526350259780884, |
| "learning_rate": 0.00027077854988315285, |
| "loss": 2.981077766418457, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.2430355427473583, |
| "grad_norm": 1.0243141651153564, |
| "learning_rate": 0.00027049532301792924, |
| "loss": 2.9759902954101562, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.24399615754082613, |
| "grad_norm": 0.9570327401161194, |
| "learning_rate": 0.00027021087981924296, |
| "loss": 2.9805068969726562, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.24495677233429394, |
| "grad_norm": 0.8493214249610901, |
| "learning_rate": 0.0002699252231583904, |
| "loss": 2.9751022338867186, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.24591738712776176, |
| "grad_norm": 1.0867499113082886, |
| "learning_rate": 0.000269638355918917, |
| "loss": 2.9705522537231444, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.24687800192122958, |
| "grad_norm": 0.9490500092506409, |
| "learning_rate": 0.00026935028099658864, |
| "loss": 2.9714107513427734, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.2478386167146974, |
| "grad_norm": 1.0482022762298584, |
| "learning_rate": 0.00026906100129936173, |
| "loss": 2.9761116027832033, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.24879923150816521, |
| "grad_norm": 0.9524055123329163, |
| "learning_rate": 0.0002687705197473545, |
| "loss": 2.971488189697266, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.24975984630163303, |
| "grad_norm": 0.8309116959571838, |
| "learning_rate": 0.00026847883927281715, |
| "loss": 2.971644401550293, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.2507204610951009, |
| "grad_norm": 0.946146547794342, |
| "learning_rate": 0.00026818596282010223, |
| "loss": 2.970326614379883, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.2516810758885687, |
| "grad_norm": 0.9582008719444275, |
| "learning_rate": 0.00026789189334563507, |
| "loss": 2.971521759033203, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.2526416906820365, |
| "grad_norm": 0.6957625150680542, |
| "learning_rate": 0.00026759663381788407, |
| "loss": 2.965200424194336, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.25360230547550433, |
| "grad_norm": 0.9765472412109375, |
| "learning_rate": 0.00026730018721733034, |
| "loss": 2.962516212463379, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.25456292026897215, |
| "grad_norm": 0.9319335222244263, |
| "learning_rate": 0.0002670025565364379, |
| "loss": 2.9645359039306642, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.25552353506243997, |
| "grad_norm": 1.1191481351852417, |
| "learning_rate": 0.0002667037447796234, |
| "loss": 2.9653718948364256, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.2564841498559078, |
| "grad_norm": 1.0608773231506348, |
| "learning_rate": 0.0002664037549632259, |
| "loss": 2.9668670654296876, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.2574447646493756, |
| "grad_norm": 1.082219123840332, |
| "learning_rate": 0.00026610259011547617, |
| "loss": 2.9651315689086912, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.2584053794428434, |
| "grad_norm": 0.8572206497192383, |
| "learning_rate": 0.0002658002532764663, |
| "loss": 2.964995193481445, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.25936599423631124, |
| "grad_norm": 0.960854172706604, |
| "learning_rate": 0.00026549674749811917, |
| "loss": 2.959628677368164, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.26032660902977905, |
| "grad_norm": 0.9040970206260681, |
| "learning_rate": 0.00026519207584415705, |
| "loss": 2.9645341873168944, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.2612872238232469, |
| "grad_norm": 0.9025639295578003, |
| "learning_rate": 0.00026488624139007154, |
| "loss": 2.957231140136719, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.2622478386167147, |
| "grad_norm": 0.9620739221572876, |
| "learning_rate": 0.0002645792472230917, |
| "loss": 2.9581756591796875, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.2632084534101825, |
| "grad_norm": 0.9987210631370544, |
| "learning_rate": 0.0002642710964421535, |
| "loss": 2.9581737518310547, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.2641690682036503, |
| "grad_norm": 0.9952805042266846, |
| "learning_rate": 0.0002639617921578681, |
| "loss": 2.960963821411133, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.26512968299711814, |
| "grad_norm": 0.9213324189186096, |
| "learning_rate": 0.0002636513374924908, |
| "loss": 2.9607002258300783, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.26609029779058596, |
| "grad_norm": 0.6715449690818787, |
| "learning_rate": 0.00026333973557988923, |
| "loss": 2.9600353240966797, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.2670509125840538, |
| "grad_norm": 0.8836393356323242, |
| "learning_rate": 0.0002630269895655119, |
| "loss": 2.954792785644531, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.2680115273775216, |
| "grad_norm": 0.8318425416946411, |
| "learning_rate": 0.00026271310260635633, |
| "loss": 2.9616050720214844, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.2689721421709894, |
| "grad_norm": 0.6590518355369568, |
| "learning_rate": 0.0002623980778709374, |
| "loss": 2.957061004638672, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.26993275696445723, |
| "grad_norm": 0.8581438660621643, |
| "learning_rate": 0.0002620819185392551, |
| "loss": 2.9538414001464846, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.27089337175792505, |
| "grad_norm": 0.9463372230529785, |
| "learning_rate": 0.00026176462780276246, |
| "loss": 2.9602792739868162, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.27185398655139287, |
| "grad_norm": 0.9935978651046753, |
| "learning_rate": 0.0002614462088643336, |
| "loss": 2.9563518524169923, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.2728146013448607, |
| "grad_norm": 0.9396317601203918, |
| "learning_rate": 0.00026112666493823103, |
| "loss": 2.9529541015625, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.2737752161383285, |
| "grad_norm": 0.8872493505477905, |
| "learning_rate": 0.00026080599925007355, |
| "loss": 2.9572736740112306, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.2747358309317964, |
| "grad_norm": 0.9465567469596863, |
| "learning_rate": 0.00026048421503680337, |
| "loss": 2.9554428100585937, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.2756964457252642, |
| "grad_norm": 0.9301679134368896, |
| "learning_rate": 0.00026016131554665377, |
| "loss": 2.951742935180664, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.276657060518732, |
| "grad_norm": 0.805698812007904, |
| "learning_rate": 0.000259837304039116, |
| "loss": 2.953638458251953, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.27761767531219983, |
| "grad_norm": 0.9713769555091858, |
| "learning_rate": 0.0002595121837849065, |
| "loss": 2.9541744232177733, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.27857829010566765, |
| "grad_norm": 0.6296630501747131, |
| "learning_rate": 0.000259185958065934, |
| "loss": 2.9497676849365235, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.27953890489913547, |
| "grad_norm": 0.6616190075874329, |
| "learning_rate": 0.00025885863017526613, |
| "loss": 2.949513626098633, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.2804995196926033, |
| "grad_norm": 0.845889151096344, |
| "learning_rate": 0.00025853020341709646, |
| "loss": 2.9539087295532225, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.2814601344860711, |
| "grad_norm": 0.8943501710891724, |
| "learning_rate": 0.000258200681106711, |
| "loss": 2.9553314208984376, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.2824207492795389, |
| "grad_norm": 1.1197773218154907, |
| "learning_rate": 0.00025787006657045477, |
| "loss": 2.9485729217529295, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.28338136407300674, |
| "grad_norm": 0.7538876533508301, |
| "learning_rate": 0.000257538363145698, |
| "loss": 2.951329803466797, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.28434197886647455, |
| "grad_norm": 0.8518261313438416, |
| "learning_rate": 0.00025720557418080304, |
| "loss": 2.951564407348633, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.28530259365994237, |
| "grad_norm": 0.9422370195388794, |
| "learning_rate": 0.00025687170303508977, |
| "loss": 2.9509925842285156, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.2862632084534102, |
| "grad_norm": 0.793566107749939, |
| "learning_rate": 0.00025653675307880225, |
| "loss": 2.9440481185913088, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.287223823246878, |
| "grad_norm": 1.0476356744766235, |
| "learning_rate": 0.00025620072769307463, |
| "loss": 2.9432241439819338, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.2881844380403458, |
| "grad_norm": 1.1108616590499878, |
| "learning_rate": 0.00025586363026989677, |
| "loss": 2.9504762649536134, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.28914505283381364, |
| "grad_norm": 0.5834405422210693, |
| "learning_rate": 0.0002555254642120802, |
| "loss": 2.946168899536133, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.29010566762728146, |
| "grad_norm": 1.21562922000885, |
| "learning_rate": 0.0002551862329332238, |
| "loss": 2.946132469177246, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.2910662824207493, |
| "grad_norm": 0.9039652943611145, |
| "learning_rate": 0.0002548459398576791, |
| "loss": 2.9483963012695313, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.2920268972142171, |
| "grad_norm": 0.7553810477256775, |
| "learning_rate": 0.00025450458842051616, |
| "loss": 2.944832611083984, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.2929875120076849, |
| "grad_norm": 0.7280674576759338, |
| "learning_rate": 0.0002541621820674882, |
| "loss": 2.942336082458496, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.29394812680115273, |
| "grad_norm": 0.7404939532279968, |
| "learning_rate": 0.0002538187242549976, |
| "loss": 2.942631721496582, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.29490874159462055, |
| "grad_norm": 0.9760322570800781, |
| "learning_rate": 0.00025347421845006056, |
| "loss": 2.9433094024658204, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.29586935638808837, |
| "grad_norm": 0.9803165793418884, |
| "learning_rate": 0.00025312866813027195, |
| "loss": 2.9410396575927735, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.2968299711815562, |
| "grad_norm": 0.839809000492096, |
| "learning_rate": 0.0002527820767837708, |
| "loss": 2.943090057373047, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.297790585975024, |
| "grad_norm": 0.6867128610610962, |
| "learning_rate": 0.00025243444790920447, |
| "loss": 2.9385210037231446, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.2987512007684918, |
| "grad_norm": 0.6075034737586975, |
| "learning_rate": 0.0002520857850156936, |
| "loss": 2.9380651473999024, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.29971181556195964, |
| "grad_norm": 0.8469932079315186, |
| "learning_rate": 0.0002517360916227968, |
| "loss": 2.937509536743164, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.30067243035542746, |
| "grad_norm": 0.8804566264152527, |
| "learning_rate": 0.000251385371260475, |
| "loss": 2.942656326293945, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.3016330451488953, |
| "grad_norm": 0.787070095539093, |
| "learning_rate": 0.0002510336274690557, |
| "loss": 2.934864807128906, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.3025936599423631, |
| "grad_norm": 0.9216657280921936, |
| "learning_rate": 0.0002506808637991974, |
| "loss": 2.9392667770385743, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.3035542747358309, |
| "grad_norm": 0.8175917863845825, |
| "learning_rate": 0.0002503270838118537, |
| "loss": 2.9393451690673826, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.3045148895292987, |
| "grad_norm": 0.8747788667678833, |
| "learning_rate": 0.0002499722910782374, |
| "loss": 2.93638916015625, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.30547550432276654, |
| "grad_norm": 0.9733981490135193, |
| "learning_rate": 0.0002496164891797844, |
| "loss": 2.9394798278808594, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.30643611911623436, |
| "grad_norm": 0.773009181022644, |
| "learning_rate": 0.0002492596817081175, |
| "loss": 2.9367996215820313, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.30739673390970224, |
| "grad_norm": 0.9535477161407471, |
| "learning_rate": 0.0002489018722650103, |
| "loss": 2.936510467529297, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.30835734870317005, |
| "grad_norm": 0.7678182125091553, |
| "learning_rate": 0.0002485430644623507, |
| "loss": 2.937050628662109, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.30931796349663787, |
| "grad_norm": 0.9366130828857422, |
| "learning_rate": 0.00024818326192210447, |
| "loss": 2.9339839935302736, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.3102785782901057, |
| "grad_norm": 0.7543423175811768, |
| "learning_rate": 0.0002478224682762787, |
| "loss": 2.931721496582031, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.3112391930835735, |
| "grad_norm": 1.0019062757492065, |
| "learning_rate": 0.0002474606871668852, |
| "loss": 2.9345191955566405, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.3121998078770413, |
| "grad_norm": 0.6927400827407837, |
| "learning_rate": 0.00024709792224590356, |
| "loss": 2.9311363220214846, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.31316042267050914, |
| "grad_norm": 0.9281826019287109, |
| "learning_rate": 0.0002467341771752446, |
| "loss": 2.9298322677612303, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.31412103746397696, |
| "grad_norm": 0.7644611597061157, |
| "learning_rate": 0.000246369455626713, |
| "loss": 2.9329246520996093, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.3150816522574448, |
| "grad_norm": 0.9157893657684326, |
| "learning_rate": 0.00024600376128197047, |
| "loss": 2.928366851806641, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.3160422670509126, |
| "grad_norm": 0.8347942233085632, |
| "learning_rate": 0.00024563709783249877, |
| "loss": 2.9295623779296873, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.3170028818443804, |
| "grad_norm": 0.9679508805274963, |
| "learning_rate": 0.00024526946897956194, |
| "loss": 2.9315326690673826, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.31796349663784823, |
| "grad_norm": 0.8744584321975708, |
| "learning_rate": 0.00024490087843416947, |
| "loss": 2.929412078857422, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.31892411143131605, |
| "grad_norm": 0.7325993180274963, |
| "learning_rate": 0.00024453132991703844, |
| "loss": 2.9305303573608397, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.31988472622478387, |
| "grad_norm": 0.8605995178222656, |
| "learning_rate": 0.00024416082715855627, |
| "loss": 2.9294113159179687, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.3208453410182517, |
| "grad_norm": 0.9381006360054016, |
| "learning_rate": 0.00024378937389874276, |
| "loss": 2.9343215942382814, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.3218059558117195, |
| "grad_norm": 0.8204743266105652, |
| "learning_rate": 0.0002434169738872126, |
| "loss": 2.9295400619506835, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.3227665706051873, |
| "grad_norm": 0.8801187872886658, |
| "learning_rate": 0.0002430436308831374, |
| "loss": 2.9252452850341797, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.32372718539865514, |
| "grad_norm": 0.801816999912262, |
| "learning_rate": 0.00024266934865520767, |
| "loss": 2.929386329650879, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.32468780019212296, |
| "grad_norm": 0.9650601744651794, |
| "learning_rate": 0.00024229413098159506, |
| "loss": 2.9256067276000977, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.3256484149855908, |
| "grad_norm": 0.8740299344062805, |
| "learning_rate": 0.00024191798164991378, |
| "loss": 2.926050567626953, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.3266090297790586, |
| "grad_norm": 0.6990634799003601, |
| "learning_rate": 0.0002415409044571828, |
| "loss": 2.923676300048828, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.3275696445725264, |
| "grad_norm": 0.8076874613761902, |
| "learning_rate": 0.00024116290320978724, |
| "loss": 2.9225109100341795, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.3285302593659942, |
| "grad_norm": 0.851506233215332, |
| "learning_rate": 0.00024078398172344006, |
| "loss": 2.9278465270996095, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.32949087415946204, |
| "grad_norm": 0.7479684948921204, |
| "learning_rate": 0.00024040414382314358, |
| "loss": 2.9235177993774415, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.33045148895292986, |
| "grad_norm": 0.9311428070068359, |
| "learning_rate": 0.00024002339334315066, |
| "loss": 2.927138900756836, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.3314121037463977, |
| "grad_norm": 0.6650685667991638, |
| "learning_rate": 0.00023964173412692631, |
| "loss": 2.9230600357055665, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.3323727185398655, |
| "grad_norm": 0.7150014638900757, |
| "learning_rate": 0.00023925917002710865, |
| "loss": 2.9212249755859374, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.8817017674446106, |
| "learning_rate": 0.0002388757049054701, |
| "loss": 2.925118064880371, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.33429394812680113, |
| "grad_norm": 0.8242185711860657, |
| "learning_rate": 0.00023849134263287836, |
| "loss": 2.9193931579589845, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.33525456292026895, |
| "grad_norm": 0.8570958375930786, |
| "learning_rate": 0.00023810608708925755, |
| "loss": 2.9213741302490233, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.33621517771373677, |
| "grad_norm": 0.9741989970207214, |
| "learning_rate": 0.00023771994216354857, |
| "loss": 2.9224111557006838, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.3371757925072046, |
| "grad_norm": 0.7361451983451843, |
| "learning_rate": 0.00023733291175367046, |
| "loss": 2.9207143783569336, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.3381364073006724, |
| "grad_norm": 0.7350742816925049, |
| "learning_rate": 0.00023694499976648043, |
| "loss": 2.92266960144043, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.3390970220941403, |
| "grad_norm": 0.9980021119117737, |
| "learning_rate": 0.0002365562101177349, |
| "loss": 2.9178647994995117, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.3400576368876081, |
| "grad_norm": 0.897578775882721, |
| "learning_rate": 0.00023616654673204983, |
| "loss": 2.9217491149902344, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.3410182516810759, |
| "grad_norm": 0.6844500303268433, |
| "learning_rate": 0.00023577601354286094, |
| "loss": 2.923050308227539, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.34197886647454373, |
| "grad_norm": 0.8132905960083008, |
| "learning_rate": 0.00023538461449238422, |
| "loss": 2.919468307495117, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.34293948126801155, |
| "grad_norm": 1.028024435043335, |
| "learning_rate": 0.00023499235353157603, |
| "loss": 2.9254117965698243, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.34390009606147937, |
| "grad_norm": 0.6290096640586853, |
| "learning_rate": 0.0002345992346200932, |
| "loss": 2.915414810180664, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.3448607108549472, |
| "grad_norm": 0.8899547457695007, |
| "learning_rate": 0.00023420526172625316, |
| "loss": 2.9221290588378905, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.345821325648415, |
| "grad_norm": 0.8772971034049988, |
| "learning_rate": 0.00023381043882699377, |
| "loss": 2.919437026977539, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.3467819404418828, |
| "grad_norm": 0.7739781737327576, |
| "learning_rate": 0.0002334147699078333, |
| "loss": 2.914821815490723, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.34774255523535064, |
| "grad_norm": 0.8273237347602844, |
| "learning_rate": 0.00023301825896282992, |
| "loss": 2.913847732543945, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.34870317002881845, |
| "grad_norm": 0.820066511631012, |
| "learning_rate": 0.00023262090999454194, |
| "loss": 2.916702651977539, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.34966378482228627, |
| "grad_norm": 0.7784633040428162, |
| "learning_rate": 0.00023222272701398664, |
| "loss": 2.9117355346679688, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.3506243996157541, |
| "grad_norm": 0.7284680008888245, |
| "learning_rate": 0.00023182371404060047, |
| "loss": 2.9168115615844727, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.3515850144092219, |
| "grad_norm": 0.9456787109375, |
| "learning_rate": 0.00023142387510219814, |
| "loss": 2.909496879577637, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.3525456292026897, |
| "grad_norm": 0.7613958716392517, |
| "learning_rate": 0.00023102321423493192, |
| "loss": 2.914762496948242, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.35350624399615754, |
| "grad_norm": 0.6795462965965271, |
| "learning_rate": 0.00023062173548325112, |
| "loss": 2.9124961853027345, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.35446685878962536, |
| "grad_norm": 1.093863844871521, |
| "learning_rate": 0.0002302194428998611, |
| "loss": 2.9172582626342773, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.3554274735830932, |
| "grad_norm": 1.0343449115753174, |
| "learning_rate": 0.0002298163405456824, |
| "loss": 2.913264274597168, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.356388088376561, |
| "grad_norm": 0.6599006056785583, |
| "learning_rate": 0.00022941243248980966, |
| "loss": 2.9158676147460936, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.3573487031700288, |
| "grad_norm": 0.8203375935554504, |
| "learning_rate": 0.0002290077228094708, |
| "loss": 2.9108463287353517, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.35830931796349663, |
| "grad_norm": 0.751015841960907, |
| "learning_rate": 0.00022860221558998554, |
| "loss": 2.9137802124023438, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.35926993275696445, |
| "grad_norm": 0.7769482731819153, |
| "learning_rate": 0.00022819591492472438, |
| "loss": 2.911220169067383, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.36023054755043227, |
| "grad_norm": 0.8420271277427673, |
| "learning_rate": 0.00022778882491506725, |
| "loss": 2.909513473510742, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.3611911623439001, |
| "grad_norm": 0.9250156879425049, |
| "learning_rate": 0.00022738094967036208, |
| "loss": 2.914291191101074, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.3621517771373679, |
| "grad_norm": 0.8891603350639343, |
| "learning_rate": 0.00022697229330788312, |
| "loss": 2.9125133514404298, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.3631123919308357, |
| "grad_norm": 0.6370509266853333, |
| "learning_rate": 0.00022656285995278984, |
| "loss": 2.9118471145629883, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.36407300672430354, |
| "grad_norm": 0.7550172209739685, |
| "learning_rate": 0.00022615265373808488, |
| "loss": 2.9111328125, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.36503362151777136, |
| "grad_norm": 0.898451030254364, |
| "learning_rate": 0.00022574167880457245, |
| "loss": 2.9084396362304688, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.3659942363112392, |
| "grad_norm": 0.7411810755729675, |
| "learning_rate": 0.00022532993930081668, |
| "loss": 2.9065261840820313, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.366954851104707, |
| "grad_norm": 0.785703718662262, |
| "learning_rate": 0.00022491743938309936, |
| "loss": 2.9086023330688477, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.3679154658981748, |
| "grad_norm": 0.7651037573814392, |
| "learning_rate": 0.0002245041832153786, |
| "loss": 2.908749008178711, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.3688760806916426, |
| "grad_norm": 1.0927786827087402, |
| "learning_rate": 0.0002240901749692461, |
| "loss": 2.9079204559326173, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.36983669548511044, |
| "grad_norm": 0.7358237504959106, |
| "learning_rate": 0.00022367541882388554, |
| "loss": 2.9088617324829102, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.37079731027857826, |
| "grad_norm": 0.7084841728210449, |
| "learning_rate": 0.00022325991896603018, |
| "loss": 2.9067033767700194, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.37175792507204614, |
| "grad_norm": 0.6146923303604126, |
| "learning_rate": 0.00022284367958992065, |
| "loss": 2.905855178833008, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.37271853986551395, |
| "grad_norm": 0.9858738780021667, |
| "learning_rate": 0.0002224267048972627, |
| "loss": 2.909621810913086, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.37367915465898177, |
| "grad_norm": 0.779106080532074, |
| "learning_rate": 0.00022200899909718456, |
| "loss": 2.907650375366211, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.3746397694524496, |
| "grad_norm": 0.6753861308097839, |
| "learning_rate": 0.00022159056640619457, |
| "loss": 2.901345443725586, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.3756003842459174, |
| "grad_norm": 1.005218267440796, |
| "learning_rate": 0.00022117141104813876, |
| "loss": 2.9051822662353515, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.3765609990393852, |
| "grad_norm": 0.8823059797286987, |
| "learning_rate": 0.000220751537254158, |
| "loss": 2.9064775466918946, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.37752161383285304, |
| "grad_norm": 0.7537618279457092, |
| "learning_rate": 0.00022033094926264548, |
| "loss": 2.904936408996582, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.37848222862632086, |
| "grad_norm": 0.6074866652488708, |
| "learning_rate": 0.00021990965131920358, |
| "loss": 2.901531219482422, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.3794428434197887, |
| "grad_norm": 0.7387614846229553, |
| "learning_rate": 0.0002194876476766015, |
| "loss": 2.901895523071289, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.3804034582132565, |
| "grad_norm": 0.9459737539291382, |
| "learning_rate": 0.00021906494259473196, |
| "loss": 2.9033708572387695, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.3813640730067243, |
| "grad_norm": 0.8098238706588745, |
| "learning_rate": 0.00021864154034056832, |
| "loss": 2.8992437362670898, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.38232468780019213, |
| "grad_norm": 0.8918400406837463, |
| "learning_rate": 0.00021821744518812154, |
| "loss": 2.900362014770508, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.38328530259365995, |
| "grad_norm": 0.8188735246658325, |
| "learning_rate": 0.00021779266141839699, |
| "loss": 2.8992008209228515, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.38424591738712777, |
| "grad_norm": 0.9960513114929199, |
| "learning_rate": 0.00021736719331935127, |
| "loss": 2.900490379333496, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.3852065321805956, |
| "grad_norm": 0.8250312805175781, |
| "learning_rate": 0.00021694104518584886, |
| "loss": 2.9021739959716797, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.3861671469740634, |
| "grad_norm": 0.5508167743682861, |
| "learning_rate": 0.00021651422131961884, |
| "loss": 2.9001636505126953, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.3871277617675312, |
| "grad_norm": 0.9340776205062866, |
| "learning_rate": 0.0002160867260292115, |
| "loss": 2.898966407775879, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.38808837656099904, |
| "grad_norm": 1.0545355081558228, |
| "learning_rate": 0.00021565856362995464, |
| "loss": 2.9000267028808593, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.38904899135446686, |
| "grad_norm": 0.8669793605804443, |
| "learning_rate": 0.00021522973844391024, |
| "loss": 2.898406982421875, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.3900096061479347, |
| "grad_norm": 0.706919252872467, |
| "learning_rate": 0.00021480025479983077, |
| "loss": 2.8980600357055666, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.3909702209414025, |
| "grad_norm": 0.6402170658111572, |
| "learning_rate": 0.00021437011703311545, |
| "loss": 2.8995635986328123, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.3919308357348703, |
| "grad_norm": 0.902797281742096, |
| "learning_rate": 0.0002139393294857665, |
| "loss": 2.902130889892578, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.3928914505283381, |
| "grad_norm": 0.9768553376197815, |
| "learning_rate": 0.00021350789650634525, |
| "loss": 2.898158645629883, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.39385206532180594, |
| "grad_norm": 0.8208462595939636, |
| "learning_rate": 0.00021307582244992838, |
| "loss": 2.9008291244506834, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.39481268011527376, |
| "grad_norm": 0.6077558398246765, |
| "learning_rate": 0.0002126431116780639, |
| "loss": 2.8985877990722657, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.3957732949087416, |
| "grad_norm": 0.7051781415939331, |
| "learning_rate": 0.00021220976855872712, |
| "loss": 2.897053527832031, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.3967339097022094, |
| "grad_norm": 0.738699734210968, |
| "learning_rate": 0.00021177579746627643, |
| "loss": 2.8972637176513674, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.3976945244956772, |
| "grad_norm": 0.9454274773597717, |
| "learning_rate": 0.0002113412027814094, |
| "loss": 2.8959421157836913, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.39865513928914503, |
| "grad_norm": 0.7538549304008484, |
| "learning_rate": 0.0002109059888911183, |
| "loss": 2.8942508697509766, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.39961575408261285, |
| "grad_norm": 0.64174884557724, |
| "learning_rate": 0.00021047016018864602, |
| "loss": 2.8981069564819335, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.40057636887608067, |
| "grad_norm": 0.7245832085609436, |
| "learning_rate": 0.00021003372107344167, |
| "loss": 2.896715545654297, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.4015369836695485, |
| "grad_norm": 0.7303643226623535, |
| "learning_rate": 0.00020959667595111603, |
| "loss": 2.90020637512207, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.4024975984630163, |
| "grad_norm": 1.070908784866333, |
| "learning_rate": 0.00020915902923339722, |
| "loss": 2.897067642211914, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.4034582132564842, |
| "grad_norm": 0.7779847383499146, |
| "learning_rate": 0.0002087207853380862, |
| "loss": 2.8957414627075195, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.404418828049952, |
| "grad_norm": 0.6650077104568481, |
| "learning_rate": 0.00020828194868901205, |
| "loss": 2.892515182495117, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.4053794428434198, |
| "grad_norm": 0.9725134968757629, |
| "learning_rate": 0.00020784252371598732, |
| "loss": 2.8961185455322265, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.40634005763688763, |
| "grad_norm": 0.6921088695526123, |
| "learning_rate": 0.00020740251485476345, |
| "loss": 2.893880844116211, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.40730067243035545, |
| "grad_norm": 0.6224314570426941, |
| "learning_rate": 0.00020696192654698592, |
| "loss": 2.891771697998047, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.40826128722382327, |
| "grad_norm": 0.7067698240280151, |
| "learning_rate": 0.00020652076324014927, |
| "loss": 2.8919609069824217, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.4092219020172911, |
| "grad_norm": 0.9510787725448608, |
| "learning_rate": 0.00020607902938755252, |
| "loss": 2.8964000701904298, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.4101825168107589, |
| "grad_norm": 0.6084873676300049, |
| "learning_rate": 0.00020563672944825392, |
| "loss": 2.8933223724365233, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.4111431316042267, |
| "grad_norm": 0.6090896725654602, |
| "learning_rate": 0.00020519386788702602, |
| "loss": 2.88956298828125, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.41210374639769454, |
| "grad_norm": 1.0817956924438477, |
| "learning_rate": 0.0002047504491743107, |
| "loss": 2.8921178817749023, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.41306436119116235, |
| "grad_norm": 0.7050410509109497, |
| "learning_rate": 0.000204306477786174, |
| "loss": 2.8883323669433594, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.4140249759846302, |
| "grad_norm": 0.9106734991073608, |
| "learning_rate": 0.00020386195820426082, |
| "loss": 2.8929920196533203, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.414985590778098, |
| "grad_norm": 0.7352846264839172, |
| "learning_rate": 0.00020341689491574984, |
| "loss": 2.891267776489258, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.4159462055715658, |
| "grad_norm": 0.6910091042518616, |
| "learning_rate": 0.00020297129241330817, |
| "loss": 2.890258026123047, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.4169068203650336, |
| "grad_norm": 0.7590439319610596, |
| "learning_rate": 0.00020252515519504592, |
| "loss": 2.8909364700317384, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.41786743515850144, |
| "grad_norm": 0.6669980883598328, |
| "learning_rate": 0.0002020784877644709, |
| "loss": 2.890909194946289, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.41882804995196926, |
| "grad_norm": 0.7879086136817932, |
| "learning_rate": 0.00020163129463044308, |
| "loss": 2.888330841064453, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.4197886647454371, |
| "grad_norm": 0.799680769443512, |
| "learning_rate": 0.0002011835803071292, |
| "loss": 2.888754653930664, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.4207492795389049, |
| "grad_norm": 0.8745085597038269, |
| "learning_rate": 0.00020073534931395697, |
| "loss": 2.8860883712768555, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.4217098943323727, |
| "grad_norm": 0.7884883284568787, |
| "learning_rate": 0.00020028660617556985, |
| "loss": 2.8914257049560548, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.42267050912584053, |
| "grad_norm": 0.6534205079078674, |
| "learning_rate": 0.00019983735542178086, |
| "loss": 2.887241744995117, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.42363112391930835, |
| "grad_norm": 0.9296805262565613, |
| "learning_rate": 0.00019938760158752725, |
| "loss": 2.8888803482055665, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.42459173871277617, |
| "grad_norm": 1.0822457075119019, |
| "learning_rate": 0.00019893734921282448, |
| "loss": 2.8933456420898436, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.425552353506244, |
| "grad_norm": 0.8483099937438965, |
| "learning_rate": 0.0001984866028427207, |
| "loss": 2.885776901245117, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.4265129682997118, |
| "grad_norm": 0.7693598866462708, |
| "learning_rate": 0.00019803536702725044, |
| "loss": 2.8912368774414063, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.4274735830931796, |
| "grad_norm": 0.6365734338760376, |
| "learning_rate": 0.00019758364632138908, |
| "loss": 2.88284912109375, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.42843419788664744, |
| "grad_norm": 0.8677585124969482, |
| "learning_rate": 0.0001971314452850066, |
| "loss": 2.8882091522216795, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.42939481268011526, |
| "grad_norm": 0.8231710195541382, |
| "learning_rate": 0.00019667876848282167, |
| "loss": 2.88732795715332, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.4303554274735831, |
| "grad_norm": 0.5195154547691345, |
| "learning_rate": 0.0001962256204843556, |
| "loss": 2.8842351913452147, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.4313160422670509, |
| "grad_norm": 0.7365444302558899, |
| "learning_rate": 0.00019577200586388618, |
| "loss": 2.8888904571533205, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.4322766570605187, |
| "grad_norm": 0.6714667081832886, |
| "learning_rate": 0.00019531792920040133, |
| "loss": 2.888100814819336, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.4332372718539865, |
| "grad_norm": 0.8434675335884094, |
| "learning_rate": 0.0001948633950775532, |
| "loss": 2.8872095108032227, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.43419788664745435, |
| "grad_norm": 0.7106018662452698, |
| "learning_rate": 0.00019440840808361174, |
| "loss": 2.8836999893188477, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.43515850144092216, |
| "grad_norm": 0.967634379863739, |
| "learning_rate": 0.00019395297281141828, |
| "loss": 2.8807979583740235, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.43611911623439004, |
| "grad_norm": 0.6999104022979736, |
| "learning_rate": 0.0001934970938583393, |
| "loss": 2.887973403930664, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.43707973102785785, |
| "grad_norm": 0.6316190958023071, |
| "learning_rate": 0.00019304077582622003, |
| "loss": 2.8830514907836915, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.43804034582132567, |
| "grad_norm": 0.990618884563446, |
| "learning_rate": 0.00019258402332133798, |
| "loss": 2.890216827392578, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.4390009606147935, |
| "grad_norm": 0.7043749690055847, |
| "learning_rate": 0.00019212684095435637, |
| "loss": 2.8833900451660157, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.4399615754082613, |
| "grad_norm": 0.7047979235649109, |
| "learning_rate": 0.00019166923334027765, |
| "loss": 2.884984588623047, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.4409221902017291, |
| "grad_norm": 0.5849128365516663, |
| "learning_rate": 0.00019121120509839692, |
| "loss": 2.8848838806152344, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.44188280499519694, |
| "grad_norm": 0.8806740045547485, |
| "learning_rate": 0.0001907527608522552, |
| "loss": 2.8797868728637694, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.44284341978866476, |
| "grad_norm": 0.7895880341529846, |
| "learning_rate": 0.000190293905229593, |
| "loss": 2.8813777923583985, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.4438040345821326, |
| "grad_norm": 1.0096721649169922, |
| "learning_rate": 0.00018983464286230327, |
| "loss": 2.8826440811157226, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.4447646493756004, |
| "grad_norm": 0.6461132764816284, |
| "learning_rate": 0.00018937497838638509, |
| "loss": 2.881867599487305, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.4457252641690682, |
| "grad_norm": 0.8887806534767151, |
| "learning_rate": 0.0001889149164418963, |
| "loss": 2.88212890625, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.44668587896253603, |
| "grad_norm": 0.6228038668632507, |
| "learning_rate": 0.00018845446167290705, |
| "loss": 2.88170166015625, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.44764649375600385, |
| "grad_norm": 0.6716971397399902, |
| "learning_rate": 0.0001879936187274529, |
| "loss": 2.8785728454589843, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.44860710854947167, |
| "grad_norm": 0.6986984014511108, |
| "learning_rate": 0.00018753239225748796, |
| "loss": 2.881254196166992, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.4495677233429395, |
| "grad_norm": 0.6882680654525757, |
| "learning_rate": 0.0001870707869188375, |
| "loss": 2.877424621582031, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.4505283381364073, |
| "grad_norm": 0.9753093123435974, |
| "learning_rate": 0.00018660880737115146, |
| "loss": 2.8826948165893556, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.4514889529298751, |
| "grad_norm": 0.6546221971511841, |
| "learning_rate": 0.0001861464582778572, |
| "loss": 2.8837528228759766, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.45244956772334294, |
| "grad_norm": 0.6572368741035461, |
| "learning_rate": 0.00018568374430611242, |
| "loss": 2.880933952331543, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.45341018251681076, |
| "grad_norm": 0.6908612847328186, |
| "learning_rate": 0.00018522067012675798, |
| "loss": 2.8826961517333984, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.4543707973102786, |
| "grad_norm": 0.7534862160682678, |
| "learning_rate": 0.00018475724041427106, |
| "loss": 2.8773014068603517, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.4553314121037464, |
| "grad_norm": 0.9261667132377625, |
| "learning_rate": 0.00018429345984671743, |
| "loss": 2.8801780700683595, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.4562920268972142, |
| "grad_norm": 0.9364944100379944, |
| "learning_rate": 0.0001838293331057048, |
| "loss": 2.880086135864258, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.457252641690682, |
| "grad_norm": 0.6954669952392578, |
| "learning_rate": 0.00018336486487633528, |
| "loss": 2.8802162170410157, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.45821325648414984, |
| "grad_norm": 0.5674038529396057, |
| "learning_rate": 0.000182900059847158, |
| "loss": 2.8759918212890625, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.45917387127761766, |
| "grad_norm": 0.558390200138092, |
| "learning_rate": 0.00018243492271012202, |
| "loss": 2.8748876571655275, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.4601344860710855, |
| "grad_norm": 0.7045537233352661, |
| "learning_rate": 0.00018196945816052867, |
| "loss": 2.8770776748657227, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.4610951008645533, |
| "grad_norm": 0.8516057729721069, |
| "learning_rate": 0.00018150367089698452, |
| "loss": 2.8759332656860352, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.4620557156580211, |
| "grad_norm": 0.6226221323013306, |
| "learning_rate": 0.00018103756562135373, |
| "loss": 2.880674362182617, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.46301633045148893, |
| "grad_norm": 0.5688340067863464, |
| "learning_rate": 0.0001805711470387105, |
| "loss": 2.874104690551758, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.46397694524495675, |
| "grad_norm": 0.5873801708221436, |
| "learning_rate": 0.00018010441985729183, |
| "loss": 2.8717109680175783, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.46493756003842457, |
| "grad_norm": 0.8382697105407715, |
| "learning_rate": 0.00017963738878844966, |
| "loss": 2.8790111541748047, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.4658981748318924, |
| "grad_norm": 0.7427976727485657, |
| "learning_rate": 0.00017917005854660374, |
| "loss": 2.8731239318847654, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.4668587896253602, |
| "grad_norm": 0.6539490818977356, |
| "learning_rate": 0.00017870243384919364, |
| "loss": 2.8791481018066407, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.4678194044188281, |
| "grad_norm": 0.8121071457862854, |
| "learning_rate": 0.0001782345194166314, |
| "loss": 2.877638053894043, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.4687800192122959, |
| "grad_norm": 0.6476060152053833, |
| "learning_rate": 0.00017776631997225365, |
| "loss": 2.8710798263549804, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.4697406340057637, |
| "grad_norm": 0.5659114122390747, |
| "learning_rate": 0.0001772978402422742, |
| "loss": 2.873188018798828, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.47070124879923153, |
| "grad_norm": 0.6289011836051941, |
| "learning_rate": 0.0001768290849557361, |
| "loss": 2.872249984741211, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.47166186359269935, |
| "grad_norm": 0.7904647588729858, |
| "learning_rate": 0.00017636005884446397, |
| "loss": 2.873155975341797, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.47262247838616717, |
| "grad_norm": 0.6264911890029907, |
| "learning_rate": 0.00017589076664301637, |
| "loss": 2.875007438659668, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.473583093179635, |
| "grad_norm": 0.8897266387939453, |
| "learning_rate": 0.00017542121308863776, |
| "loss": 2.874278259277344, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.4745437079731028, |
| "grad_norm": 0.8211498856544495, |
| "learning_rate": 0.00017495140292121084, |
| "loss": 2.875082015991211, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.4755043227665706, |
| "grad_norm": 0.6536585688591003, |
| "learning_rate": 0.0001744813408832088, |
| "loss": 2.872761535644531, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.47646493756003844, |
| "grad_norm": 0.6122596859931946, |
| "learning_rate": 0.0001740110317196472, |
| "loss": 2.873042678833008, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.47742555235350626, |
| "grad_norm": 0.6163274645805359, |
| "learning_rate": 0.0001735404801780362, |
| "loss": 2.8703638076782227, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.4783861671469741, |
| "grad_norm": 0.8691967129707336, |
| "learning_rate": 0.0001730696910083326, |
| "loss": 2.873611259460449, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.4793467819404419, |
| "grad_norm": 0.7458942532539368, |
| "learning_rate": 0.0001725986689628921, |
| "loss": 2.871821403503418, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.4803073967339097, |
| "grad_norm": 0.6633113622665405, |
| "learning_rate": 0.00017212741879642096, |
| "loss": 2.8725128173828125, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.4812680115273775, |
| "grad_norm": 0.684518039226532, |
| "learning_rate": 0.00017165594526592833, |
| "loss": 2.874650764465332, |
| "step": 2505 |
| }, |
| { |
| "epoch": 0.48222862632084534, |
| "grad_norm": 0.6810330748558044, |
| "learning_rate": 0.000171184253130678, |
| "loss": 2.8709695816040037, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.48318924111431316, |
| "grad_norm": 0.7958024740219116, |
| "learning_rate": 0.00017071234715214045, |
| "loss": 2.872270202636719, |
| "step": 2515 |
| }, |
| { |
| "epoch": 0.484149855907781, |
| "grad_norm": 0.6600337624549866, |
| "learning_rate": 0.0001702402320939449, |
| "loss": 2.8687416076660157, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.4851104707012488, |
| "grad_norm": 0.6404789090156555, |
| "learning_rate": 0.00016976791272183098, |
| "loss": 2.865874481201172, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.4860710854947166, |
| "grad_norm": 0.5435463190078735, |
| "learning_rate": 0.0001692953938036008, |
| "loss": 2.8678897857666015, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.48703170028818443, |
| "grad_norm": 0.8794398903846741, |
| "learning_rate": 0.00016882268010907087, |
| "loss": 2.871311569213867, |
| "step": 2535 |
| }, |
| { |
| "epoch": 0.48799231508165225, |
| "grad_norm": 0.8902005553245544, |
| "learning_rate": 0.00016834977641002377, |
| "loss": 2.870677947998047, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.48895292987512007, |
| "grad_norm": 0.6775867342948914, |
| "learning_rate": 0.00016787668748016008, |
| "loss": 2.868155670166016, |
| "step": 2545 |
| }, |
| { |
| "epoch": 0.4899135446685879, |
| "grad_norm": 0.5421539545059204, |
| "learning_rate": 0.00016740341809505017, |
| "loss": 2.8710710525512697, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.4908741594620557, |
| "grad_norm": 0.5883712768554688, |
| "learning_rate": 0.000166929973032086, |
| "loss": 2.8703285217285157, |
| "step": 2555 |
| }, |
| { |
| "epoch": 0.4918347742555235, |
| "grad_norm": 0.8570505380630493, |
| "learning_rate": 0.0001664563570704329, |
| "loss": 2.8731103897094727, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.49279538904899134, |
| "grad_norm": 0.8919458985328674, |
| "learning_rate": 0.0001659825749909814, |
| "loss": 2.8725284576416015, |
| "step": 2565 |
| }, |
| { |
| "epoch": 0.49375600384245916, |
| "grad_norm": 0.6126694083213806, |
| "learning_rate": 0.00016550863157629888, |
| "loss": 2.8706518173217774, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.494716618635927, |
| "grad_norm": 0.45519402623176575, |
| "learning_rate": 0.00016503453161058123, |
| "loss": 2.8679710388183595, |
| "step": 2575 |
| }, |
| { |
| "epoch": 0.4956772334293948, |
| "grad_norm": 0.7353713512420654, |
| "learning_rate": 0.00016456027987960466, |
| "loss": 2.8688129425048827, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.4966378482228626, |
| "grad_norm": 0.8744354248046875, |
| "learning_rate": 0.00016408588117067743, |
| "loss": 2.8654415130615236, |
| "step": 2585 |
| }, |
| { |
| "epoch": 0.49759846301633043, |
| "grad_norm": 0.824774980545044, |
| "learning_rate": 0.00016361134027259136, |
| "loss": 2.8741382598876952, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.49855907780979825, |
| "grad_norm": 0.6276190876960754, |
| "learning_rate": 0.00016313666197557373, |
| "loss": 2.8700138092041017, |
| "step": 2595 |
| }, |
| { |
| "epoch": 0.49951969260326606, |
| "grad_norm": 0.5131474733352661, |
| "learning_rate": 0.00016266185107123864, |
| "loss": 2.8695526123046875, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.5004803073967339, |
| "grad_norm": 0.7874789237976074, |
| "learning_rate": 0.00016218691235253893, |
| "loss": 2.8659042358398437, |
| "step": 2605 |
| }, |
| { |
| "epoch": 0.5014409221902018, |
| "grad_norm": 0.6303560137748718, |
| "learning_rate": 0.0001617118506137175, |
| "loss": 2.8703582763671873, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.5024015369836695, |
| "grad_norm": 0.8049387335777283, |
| "learning_rate": 0.00016123667065025914, |
| "loss": 2.8671377182006834, |
| "step": 2615 |
| }, |
| { |
| "epoch": 0.5033621517771374, |
| "grad_norm": 0.8616132140159607, |
| "learning_rate": 0.00016076137725884218, |
| "loss": 2.8674648284912108, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.5043227665706052, |
| "grad_norm": 0.6320975422859192, |
| "learning_rate": 0.0001602859752372897, |
| "loss": 2.8637058258056642, |
| "step": 2625 |
| }, |
| { |
| "epoch": 0.505283381364073, |
| "grad_norm": 0.5506896376609802, |
| "learning_rate": 0.00015981046938452146, |
| "loss": 2.8641489028930662, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.5062439961575408, |
| "grad_norm": 0.6218773722648621, |
| "learning_rate": 0.0001593348645005054, |
| "loss": 2.8639698028564453, |
| "step": 2635 |
| }, |
| { |
| "epoch": 0.5072046109510087, |
| "grad_norm": 0.9080044627189636, |
| "learning_rate": 0.00015885916538620906, |
| "loss": 2.8647804260253906, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.5081652257444764, |
| "grad_norm": 0.8615972399711609, |
| "learning_rate": 0.00015838337684355121, |
| "loss": 2.8677547454833983, |
| "step": 2645 |
| }, |
| { |
| "epoch": 0.5091258405379443, |
| "grad_norm": 0.570344090461731, |
| "learning_rate": 0.0001579075036753533, |
| "loss": 2.866165542602539, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.5100864553314121, |
| "grad_norm": 0.5077627897262573, |
| "learning_rate": 0.00015743155068529102, |
| "loss": 2.8662036895751952, |
| "step": 2655 |
| }, |
| { |
| "epoch": 0.5110470701248799, |
| "grad_norm": 0.7941848635673523, |
| "learning_rate": 0.0001569555226778459, |
| "loss": 2.8637313842773438, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.5120076849183477, |
| "grad_norm": 0.9668124914169312, |
| "learning_rate": 0.0001564794244582567, |
| "loss": 2.867812156677246, |
| "step": 2665 |
| }, |
| { |
| "epoch": 0.5129682997118156, |
| "grad_norm": 0.6352429389953613, |
| "learning_rate": 0.0001560032608324709, |
| "loss": 2.8658679962158202, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.5139289145052833, |
| "grad_norm": 0.4856127202510834, |
| "learning_rate": 0.00015552703660709618, |
| "loss": 2.8620628356933593, |
| "step": 2675 |
| }, |
| { |
| "epoch": 0.5148895292987512, |
| "grad_norm": 1.1079636812210083, |
| "learning_rate": 0.00015505075658935207, |
| "loss": 2.865533447265625, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.515850144092219, |
| "grad_norm": 0.5639813542366028, |
| "learning_rate": 0.0001545744255870212, |
| "loss": 2.867781066894531, |
| "step": 2685 |
| }, |
| { |
| "epoch": 0.5168107588856868, |
| "grad_norm": 0.6282705664634705, |
| "learning_rate": 0.00015409804840840088, |
| "loss": 2.8628158569335938, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.5177713736791547, |
| "grad_norm": 0.6844084858894348, |
| "learning_rate": 0.0001536216298622545, |
| "loss": 2.865097427368164, |
| "step": 2695 |
| }, |
| { |
| "epoch": 0.5187319884726225, |
| "grad_norm": 0.7825571894645691, |
| "learning_rate": 0.00015314517475776318, |
| "loss": 2.863981246948242, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.5196926032660903, |
| "grad_norm": 0.6711815595626831, |
| "learning_rate": 0.00015266868790447685, |
| "loss": 2.862168884277344, |
| "step": 2705 |
| }, |
| { |
| "epoch": 0.5206532180595581, |
| "grad_norm": 0.5164412260055542, |
| "learning_rate": 0.0001521921741122661, |
| "loss": 2.8566638946533205, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.521613832853026, |
| "grad_norm": 0.6021407246589661, |
| "learning_rate": 0.00015171563819127342, |
| "loss": 2.85965576171875, |
| "step": 2715 |
| }, |
| { |
| "epoch": 0.5225744476464937, |
| "grad_norm": 0.8277438282966614, |
| "learning_rate": 0.00015123908495186464, |
| "loss": 2.863536071777344, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.5235350624399616, |
| "grad_norm": 0.6992095708847046, |
| "learning_rate": 0.0001507625192045804, |
| "loss": 2.866368865966797, |
| "step": 2725 |
| }, |
| { |
| "epoch": 0.5244956772334294, |
| "grad_norm": 0.6141840815544128, |
| "learning_rate": 0.00015028594576008773, |
| "loss": 2.8606613159179686, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.5254562920268973, |
| "grad_norm": 0.6499273777008057, |
| "learning_rate": 0.00014980936942913113, |
| "loss": 2.8631092071533204, |
| "step": 2735 |
| }, |
| { |
| "epoch": 0.526416906820365, |
| "grad_norm": 0.5648180842399597, |
| "learning_rate": 0.00014933279502248444, |
| "loss": 2.857931900024414, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.5273775216138329, |
| "grad_norm": 0.6316075325012207, |
| "learning_rate": 0.000148856227350902, |
| "loss": 2.8574670791625976, |
| "step": 2745 |
| }, |
| { |
| "epoch": 0.5283381364073007, |
| "grad_norm": 0.8975266218185425, |
| "learning_rate": 0.00014837967122507015, |
| "loss": 2.864010238647461, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.5292987512007685, |
| "grad_norm": 0.7359750866889954, |
| "learning_rate": 0.00014790313145555863, |
| "loss": 2.861468505859375, |
| "step": 2755 |
| }, |
| { |
| "epoch": 0.5302593659942363, |
| "grad_norm": 0.5477508902549744, |
| "learning_rate": 0.00014742661285277228, |
| "loss": 2.8594310760498045, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.5312199807877042, |
| "grad_norm": 0.6290469765663147, |
| "learning_rate": 0.00014695012022690205, |
| "loss": 2.860156440734863, |
| "step": 2765 |
| }, |
| { |
| "epoch": 0.5321805955811719, |
| "grad_norm": 0.5689098834991455, |
| "learning_rate": 0.00014647365838787673, |
| "loss": 2.8548521041870116, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.5331412103746398, |
| "grad_norm": 0.7737315893173218, |
| "learning_rate": 0.00014599723214531434, |
| "loss": 2.8601730346679686, |
| "step": 2775 |
| }, |
| { |
| "epoch": 0.5341018251681076, |
| "grad_norm": 0.7761661410331726, |
| "learning_rate": 0.0001455208463084737, |
| "loss": 2.857926940917969, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.5350624399615754, |
| "grad_norm": 0.6857746839523315, |
| "learning_rate": 0.00014504450568620557, |
| "loss": 2.8620637893676757, |
| "step": 2785 |
| }, |
| { |
| "epoch": 0.5360230547550432, |
| "grad_norm": 0.5136231780052185, |
| "learning_rate": 0.00014456821508690432, |
| "loss": 2.8587860107421874, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.5369836695485111, |
| "grad_norm": 0.5590301752090454, |
| "learning_rate": 0.0001440919793184595, |
| "loss": 2.855500030517578, |
| "step": 2795 |
| }, |
| { |
| "epoch": 0.5379442843419788, |
| "grad_norm": 0.7252662181854248, |
| "learning_rate": 0.00014361580318820696, |
| "loss": 2.8574901580810548, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.5389048991354467, |
| "grad_norm": 0.6491255760192871, |
| "learning_rate": 0.00014313969150288083, |
| "loss": 2.857944297790527, |
| "step": 2805 |
| }, |
| { |
| "epoch": 0.5398655139289145, |
| "grad_norm": 0.8175961375236511, |
| "learning_rate": 0.00014266364906856442, |
| "loss": 2.857021713256836, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.5408261287223823, |
| "grad_norm": 0.7656385898590088, |
| "learning_rate": 0.00014218768069064214, |
| "loss": 2.8591373443603514, |
| "step": 2815 |
| }, |
| { |
| "epoch": 0.5417867435158501, |
| "grad_norm": 0.7636440396308899, |
| "learning_rate": 0.00014171179117375082, |
| "loss": 2.8583093643188477, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.542747358309318, |
| "grad_norm": 0.6154850721359253, |
| "learning_rate": 0.00014123598532173113, |
| "loss": 2.859964942932129, |
| "step": 2825 |
| }, |
| { |
| "epoch": 0.5437079731027857, |
| "grad_norm": 0.49419260025024414, |
| "learning_rate": 0.00014076026793757943, |
| "loss": 2.8569236755371095, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.5446685878962536, |
| "grad_norm": 0.6245793104171753, |
| "learning_rate": 0.00014028464382339877, |
| "loss": 2.857840728759766, |
| "step": 2835 |
| }, |
| { |
| "epoch": 0.5456292026897214, |
| "grad_norm": 0.7936309576034546, |
| "learning_rate": 0.0001398091177803509, |
| "loss": 2.858272171020508, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.5465898174831892, |
| "grad_norm": 0.6912634372711182, |
| "learning_rate": 0.00013933369460860748, |
| "loss": 2.855249786376953, |
| "step": 2845 |
| }, |
| { |
| "epoch": 0.547550432276657, |
| "grad_norm": 0.4704606831073761, |
| "learning_rate": 0.00013885837910730168, |
| "loss": 2.853233528137207, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.5485110470701249, |
| "grad_norm": 0.6878814697265625, |
| "learning_rate": 0.00013838317607448004, |
| "loss": 2.856007194519043, |
| "step": 2855 |
| }, |
| { |
| "epoch": 0.5494716618635928, |
| "grad_norm": 0.7811667323112488, |
| "learning_rate": 0.00013790809030705354, |
| "loss": 2.8576026916503907, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.5504322766570605, |
| "grad_norm": 0.5080620050430298, |
| "learning_rate": 0.00013743312660074962, |
| "loss": 2.856093978881836, |
| "step": 2865 |
| }, |
| { |
| "epoch": 0.5513928914505284, |
| "grad_norm": 0.7438220977783203, |
| "learning_rate": 0.00013695828975006336, |
| "loss": 2.85654354095459, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.5523535062439962, |
| "grad_norm": 0.5278438329696655, |
| "learning_rate": 0.00013648358454820957, |
| "loss": 2.857866668701172, |
| "step": 2875 |
| }, |
| { |
| "epoch": 0.553314121037464, |
| "grad_norm": 0.5583338737487793, |
| "learning_rate": 0.00013600901578707402, |
| "loss": 2.8554916381835938, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.5542747358309318, |
| "grad_norm": 0.6105745434761047, |
| "learning_rate": 0.00013553458825716502, |
| "loss": 2.8572811126708983, |
| "step": 2885 |
| }, |
| { |
| "epoch": 0.5552353506243997, |
| "grad_norm": 0.5137814879417419, |
| "learning_rate": 0.0001350603067475655, |
| "loss": 2.856470489501953, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.5561959654178674, |
| "grad_norm": 0.6973247528076172, |
| "learning_rate": 0.00013458617604588418, |
| "loss": 2.8541500091552736, |
| "step": 2895 |
| }, |
| { |
| "epoch": 0.5571565802113353, |
| "grad_norm": 0.6607674956321716, |
| "learning_rate": 0.00013411220093820773, |
| "loss": 2.856244659423828, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.5581171950048031, |
| "grad_norm": 0.6131708025932312, |
| "learning_rate": 0.00013363838620905188, |
| "loss": 2.8549783706665037, |
| "step": 2905 |
| }, |
| { |
| "epoch": 0.5590778097982709, |
| "grad_norm": 0.5899085998535156, |
| "learning_rate": 0.00013316473664131347, |
| "loss": 2.8583316802978516, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.5600384245917387, |
| "grad_norm": 0.6078781485557556, |
| "learning_rate": 0.00013269125701622243, |
| "loss": 2.855908203125, |
| "step": 2915 |
| }, |
| { |
| "epoch": 0.5609990393852066, |
| "grad_norm": 0.5581336617469788, |
| "learning_rate": 0.00013221795211329281, |
| "loss": 2.853131866455078, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.5619596541786743, |
| "grad_norm": 0.5300495624542236, |
| "learning_rate": 0.00013174482671027526, |
| "loss": 2.8522682189941406, |
| "step": 2925 |
| }, |
| { |
| "epoch": 0.5629202689721422, |
| "grad_norm": 0.6495129466056824, |
| "learning_rate": 0.00013127188558310823, |
| "loss": 2.857743835449219, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.56388088376561, |
| "grad_norm": 0.6372506618499756, |
| "learning_rate": 0.0001307991335058702, |
| "loss": 2.8534168243408202, |
| "step": 2935 |
| }, |
| { |
| "epoch": 0.5648414985590778, |
| "grad_norm": 0.5662026405334473, |
| "learning_rate": 0.00013032657525073122, |
| "loss": 2.8524166107177735, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.5658021133525456, |
| "grad_norm": 0.5620723366737366, |
| "learning_rate": 0.00012985421558790473, |
| "loss": 2.8510919570922852, |
| "step": 2945 |
| }, |
| { |
| "epoch": 0.5667627281460135, |
| "grad_norm": 0.861655056476593, |
| "learning_rate": 0.00012938205928559964, |
| "loss": 2.8496784210205077, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.5677233429394812, |
| "grad_norm": 0.7627875804901123, |
| "learning_rate": 0.00012891011110997187, |
| "loss": 2.8515321731567385, |
| "step": 2955 |
| }, |
| { |
| "epoch": 0.5686839577329491, |
| "grad_norm": 0.600219190120697, |
| "learning_rate": 0.0001284383758250767, |
| "loss": 2.8534393310546875, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.5696445725264169, |
| "grad_norm": 0.5384979844093323, |
| "learning_rate": 0.00012796685819282009, |
| "loss": 2.8518402099609377, |
| "step": 2965 |
| }, |
| { |
| "epoch": 0.5706051873198847, |
| "grad_norm": 0.5790613293647766, |
| "learning_rate": 0.0001274955629729111, |
| "loss": 2.850631332397461, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.5715658021133525, |
| "grad_norm": 0.7250127196311951, |
| "learning_rate": 0.0001270244949228136, |
| "loss": 2.8556766510009766, |
| "step": 2975 |
| }, |
| { |
| "epoch": 0.5725264169068204, |
| "grad_norm": 0.6143919229507446, |
| "learning_rate": 0.00012655365879769826, |
| "loss": 2.852082061767578, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.5734870317002881, |
| "grad_norm": 0.5853344202041626, |
| "learning_rate": 0.00012608305935039475, |
| "loss": 2.8520870208740234, |
| "step": 2985 |
| }, |
| { |
| "epoch": 0.574447646493756, |
| "grad_norm": 0.4826011061668396, |
| "learning_rate": 0.00012561270133134344, |
| "loss": 2.850848388671875, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.5754082612872238, |
| "grad_norm": 0.7613359689712524, |
| "learning_rate": 0.00012514258948854773, |
| "loss": 2.8502517700195313, |
| "step": 2995 |
| }, |
| { |
| "epoch": 0.5763688760806917, |
| "grad_norm": 0.751300036907196, |
| "learning_rate": 0.00012467272856752593, |
| "loss": 2.852705383300781, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.5773294908741594, |
| "grad_norm": 0.5232334733009338, |
| "learning_rate": 0.0001242031233112634, |
| "loss": 2.8527690887451174, |
| "step": 3005 |
| }, |
| { |
| "epoch": 0.5782901056676273, |
| "grad_norm": 0.5062333941459656, |
| "learning_rate": 0.00012373377846016493, |
| "loss": 2.848916435241699, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.579250720461095, |
| "grad_norm": 0.4421253800392151, |
| "learning_rate": 0.0001232646987520064, |
| "loss": 2.852520561218262, |
| "step": 3015 |
| }, |
| { |
| "epoch": 0.5802113352545629, |
| "grad_norm": 0.6208221912384033, |
| "learning_rate": 0.00012279588892188736, |
| "loss": 2.850700569152832, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.5811719500480308, |
| "grad_norm": 0.7289994359016418, |
| "learning_rate": 0.000122327353702183, |
| "loss": 2.850907325744629, |
| "step": 3025 |
| }, |
| { |
| "epoch": 0.5821325648414986, |
| "grad_norm": 0.5431981086730957, |
| "learning_rate": 0.0001218590978224966, |
| "loss": 2.8476612091064455, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.5830931796349664, |
| "grad_norm": 0.5733211040496826, |
| "learning_rate": 0.00012139112600961159, |
| "loss": 2.850396728515625, |
| "step": 3035 |
| }, |
| { |
| "epoch": 0.5840537944284342, |
| "grad_norm": 0.6280592679977417, |
| "learning_rate": 0.00012092344298744383, |
| "loss": 2.8513126373291016, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.5850144092219021, |
| "grad_norm": 0.7266437411308289, |
| "learning_rate": 0.00012045605347699411, |
| "loss": 2.853240966796875, |
| "step": 3045 |
| }, |
| { |
| "epoch": 0.5859750240153698, |
| "grad_norm": 0.5711589455604553, |
| "learning_rate": 0.00011998896219630029, |
| "loss": 2.8492050170898438, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.5869356388088377, |
| "grad_norm": 0.5127094984054565, |
| "learning_rate": 0.0001195221738603899, |
| "loss": 2.8453636169433594, |
| "step": 3055 |
| }, |
| { |
| "epoch": 0.5878962536023055, |
| "grad_norm": 0.5526590943336487, |
| "learning_rate": 0.00011905569318123223, |
| "loss": 2.8491466522216795, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.5888568683957733, |
| "grad_norm": 0.6706989407539368, |
| "learning_rate": 0.00011858952486769114, |
| "loss": 2.849506378173828, |
| "step": 3065 |
| }, |
| { |
| "epoch": 0.5898174831892411, |
| "grad_norm": 0.5911527276039124, |
| "learning_rate": 0.00011812367362547716, |
| "loss": 2.848992919921875, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.590778097982709, |
| "grad_norm": 0.5355327725410461, |
| "learning_rate": 0.0001176581441571002, |
| "loss": 2.851682662963867, |
| "step": 3075 |
| }, |
| { |
| "epoch": 0.5917387127761767, |
| "grad_norm": 0.5494570136070251, |
| "learning_rate": 0.00011719294116182217, |
| "loss": 2.847641944885254, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.5926993275696446, |
| "grad_norm": 0.681022047996521, |
| "learning_rate": 0.00011672806933560925, |
| "loss": 2.847896957397461, |
| "step": 3085 |
| }, |
| { |
| "epoch": 0.5936599423631124, |
| "grad_norm": 0.7260766625404358, |
| "learning_rate": 0.00011626353337108476, |
| "loss": 2.8473175048828123, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.5946205571565802, |
| "grad_norm": 0.6017476320266724, |
| "learning_rate": 0.00011579933795748164, |
| "loss": 2.8471202850341797, |
| "step": 3095 |
| }, |
| { |
| "epoch": 0.595581171950048, |
| "grad_norm": 0.47198686003685, |
| "learning_rate": 0.00011533548778059508, |
| "loss": 2.8486038208007813, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.5965417867435159, |
| "grad_norm": 0.4362870752811432, |
| "learning_rate": 0.00011487198752273552, |
| "loss": 2.848790168762207, |
| "step": 3105 |
| }, |
| { |
| "epoch": 0.5975024015369836, |
| "grad_norm": 0.6905494928359985, |
| "learning_rate": 0.0001144088418626809, |
| "loss": 2.849490928649902, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.5984630163304515, |
| "grad_norm": 0.6234802007675171, |
| "learning_rate": 0.00011394605547562989, |
| "loss": 2.848898696899414, |
| "step": 3115 |
| }, |
| { |
| "epoch": 0.5994236311239193, |
| "grad_norm": 0.4275026023387909, |
| "learning_rate": 0.00011348363303315434, |
| "loss": 2.846969413757324, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.6003842459173871, |
| "grad_norm": 0.4885198771953583, |
| "learning_rate": 0.00011302157920315244, |
| "loss": 2.8450719833374025, |
| "step": 3125 |
| }, |
| { |
| "epoch": 0.6013448607108549, |
| "grad_norm": 0.5203735828399658, |
| "learning_rate": 0.00011255989864980133, |
| "loss": 2.8464969635009765, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.6023054755043228, |
| "grad_norm": 0.6056355834007263, |
| "learning_rate": 0.00011209859603351015, |
| "loss": 2.8485231399536133, |
| "step": 3135 |
| }, |
| { |
| "epoch": 0.6032660902977905, |
| "grad_norm": 0.5373650789260864, |
| "learning_rate": 0.00011163767601087301, |
| "loss": 2.846028518676758, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.6042267050912584, |
| "grad_norm": 0.525007963180542, |
| "learning_rate": 0.00011117714323462186, |
| "loss": 2.846885871887207, |
| "step": 3145 |
| }, |
| { |
| "epoch": 0.6051873198847262, |
| "grad_norm": 0.4438735544681549, |
| "learning_rate": 0.00011071700235357979, |
| "loss": 2.845564079284668, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.6061479346781941, |
| "grad_norm": 0.5859609842300415, |
| "learning_rate": 0.00011025725801261373, |
| "loss": 2.844126510620117, |
| "step": 3155 |
| }, |
| { |
| "epoch": 0.6071085494716618, |
| "grad_norm": 0.595695436000824, |
| "learning_rate": 0.00010979791485258788, |
| "loss": 2.8454761505126953, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.6080691642651297, |
| "grad_norm": 0.523353099822998, |
| "learning_rate": 0.00010933897751031671, |
| "loss": 2.8453132629394533, |
| "step": 3165 |
| }, |
| { |
| "epoch": 0.6090297790585975, |
| "grad_norm": 0.44268059730529785, |
| "learning_rate": 0.00010888045061851807, |
| "loss": 2.8437095642089845, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.6099903938520653, |
| "grad_norm": 0.5390836596488953, |
| "learning_rate": 0.00010842233880576681, |
| "loss": 2.8442451477050783, |
| "step": 3175 |
| }, |
| { |
| "epoch": 0.6109510086455331, |
| "grad_norm": 0.6214951872825623, |
| "learning_rate": 0.0001079646466964475, |
| "loss": 2.842816162109375, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.611911623439001, |
| "grad_norm": 0.5127742886543274, |
| "learning_rate": 0.00010750737891070824, |
| "loss": 2.8442523956298826, |
| "step": 3185 |
| }, |
| { |
| "epoch": 0.6128722382324687, |
| "grad_norm": 0.5821230411529541, |
| "learning_rate": 0.00010705054006441371, |
| "loss": 2.8449642181396486, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.6138328530259366, |
| "grad_norm": 0.5417553782463074, |
| "learning_rate": 0.00010659413476909865, |
| "loss": 2.8446380615234377, |
| "step": 3195 |
| }, |
| { |
| "epoch": 0.6147934678194045, |
| "grad_norm": 0.5219445824623108, |
| "learning_rate": 0.00010613816763192152, |
| "loss": 2.843789291381836, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.6157540826128722, |
| "grad_norm": 0.5322816967964172, |
| "learning_rate": 0.00010568264325561763, |
| "loss": 2.8418731689453125, |
| "step": 3205 |
| }, |
| { |
| "epoch": 0.6167146974063401, |
| "grad_norm": 0.713876485824585, |
| "learning_rate": 0.000105227566238453, |
| "loss": 2.848464584350586, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.6176753121998079, |
| "grad_norm": 0.5759865641593933, |
| "learning_rate": 0.00010477294117417762, |
| "loss": 2.84826545715332, |
| "step": 3215 |
| }, |
| { |
| "epoch": 0.6186359269932757, |
| "grad_norm": 0.5045075416564941, |
| "learning_rate": 0.00010431877265197955, |
| "loss": 2.844080352783203, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.6195965417867435, |
| "grad_norm": 0.5640796422958374, |
| "learning_rate": 0.00010386506525643808, |
| "loss": 2.8412174224853515, |
| "step": 3225 |
| }, |
| { |
| "epoch": 0.6205571565802114, |
| "grad_norm": 0.5091181397438049, |
| "learning_rate": 0.00010341182356747771, |
| "loss": 2.8444406509399416, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.6215177713736791, |
| "grad_norm": 0.4901158809661865, |
| "learning_rate": 0.00010295905216032203, |
| "loss": 2.843181610107422, |
| "step": 3235 |
| }, |
| { |
| "epoch": 0.622478386167147, |
| "grad_norm": 0.48062700033187866, |
| "learning_rate": 0.00010250675560544717, |
| "loss": 2.84393424987793, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.6234390009606148, |
| "grad_norm": 0.8245052099227905, |
| "learning_rate": 0.00010205493846853618, |
| "loss": 2.8429317474365234, |
| "step": 3245 |
| }, |
| { |
| "epoch": 0.6243996157540826, |
| "grad_norm": 0.5051743984222412, |
| "learning_rate": 0.00010160360531043239, |
| "loss": 2.8431800842285155, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.6253602305475504, |
| "grad_norm": 0.45200181007385254, |
| "learning_rate": 0.00010115276068709377, |
| "loss": 2.8441776275634765, |
| "step": 3255 |
| }, |
| { |
| "epoch": 0.6263208453410183, |
| "grad_norm": 0.4492049217224121, |
| "learning_rate": 0.00010070240914954676, |
| "loss": 2.840077018737793, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.627281460134486, |
| "grad_norm": 0.5113126039505005, |
| "learning_rate": 0.00010025255524384033, |
| "loss": 2.8416378021240236, |
| "step": 3265 |
| }, |
| { |
| "epoch": 0.6282420749279539, |
| "grad_norm": 0.45182526111602783, |
| "learning_rate": 9.980320351100028e-05, |
| "loss": 2.842592239379883, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.6292026897214217, |
| "grad_norm": 0.675184965133667, |
| "learning_rate": 9.935435848698307e-05, |
| "loss": 2.840932846069336, |
| "step": 3275 |
| }, |
| { |
| "epoch": 0.6301633045148896, |
| "grad_norm": 0.4641052186489105, |
| "learning_rate": 9.890602470263037e-05, |
| "loss": 2.8466796875, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.6311239193083573, |
| "grad_norm": 0.5004357099533081, |
| "learning_rate": 9.845820668362308e-05, |
| "loss": 2.8424007415771486, |
| "step": 3285 |
| }, |
| { |
| "epoch": 0.6320845341018252, |
| "grad_norm": 0.5009520053863525, |
| "learning_rate": 9.801090895043566e-05, |
| "loss": 2.8406450271606447, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.633045148895293, |
| "grad_norm": 0.5677304863929749, |
| "learning_rate": 9.756413601829083e-05, |
| "loss": 2.8411323547363283, |
| "step": 3295 |
| }, |
| { |
| "epoch": 0.6340057636887608, |
| "grad_norm": 0.48396027088165283, |
| "learning_rate": 9.711789239711344e-05, |
| "loss": 2.842608642578125, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.6349663784822286, |
| "grad_norm": 0.5474041700363159, |
| "learning_rate": 9.667218259148547e-05, |
| "loss": 2.8427316665649416, |
| "step": 3305 |
| }, |
| { |
| "epoch": 0.6359269932756965, |
| "grad_norm": 0.619439423084259, |
| "learning_rate": 9.62270111006001e-05, |
| "loss": 2.8402175903320312, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.6368876080691642, |
| "grad_norm": 0.4645543098449707, |
| "learning_rate": 9.57823824182168e-05, |
| "loss": 2.8401821136474608, |
| "step": 3315 |
| }, |
| { |
| "epoch": 0.6378482228626321, |
| "grad_norm": 0.4728531837463379, |
| "learning_rate": 9.53383010326155e-05, |
| "loss": 2.840835762023926, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.6388088376560999, |
| "grad_norm": 0.4560253322124481, |
| "learning_rate": 9.489477142655147e-05, |
| "loss": 2.839691925048828, |
| "step": 3325 |
| }, |
| { |
| "epoch": 0.6397694524495677, |
| "grad_norm": 0.5300701856613159, |
| "learning_rate": 9.445179807721012e-05, |
| "loss": 2.8393108367919924, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.6407300672430355, |
| "grad_norm": 0.5008341670036316, |
| "learning_rate": 9.400938545616173e-05, |
| "loss": 2.841996765136719, |
| "step": 3335 |
| }, |
| { |
| "epoch": 0.6416906820365034, |
| "grad_norm": 0.5530736446380615, |
| "learning_rate": 9.356753802931646e-05, |
| "loss": 2.837991142272949, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.6426512968299711, |
| "grad_norm": 0.5178174376487732, |
| "learning_rate": 9.312626025687897e-05, |
| "loss": 2.8412078857421874, |
| "step": 3345 |
| }, |
| { |
| "epoch": 0.643611911623439, |
| "grad_norm": 0.4595809280872345, |
| "learning_rate": 9.268555659330364e-05, |
| "loss": 2.841719055175781, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.6445725264169068, |
| "grad_norm": 0.34829553961753845, |
| "learning_rate": 9.22454314872496e-05, |
| "loss": 2.8385791778564453, |
| "step": 3355 |
| }, |
| { |
| "epoch": 0.6455331412103746, |
| "grad_norm": 0.5107468366622925, |
| "learning_rate": 9.180588938153566e-05, |
| "loss": 2.837472343444824, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.6464937560038425, |
| "grad_norm": 0.4454675316810608, |
| "learning_rate": 9.136693471309568e-05, |
| "loss": 2.845228576660156, |
| "step": 3365 |
| }, |
| { |
| "epoch": 0.6474543707973103, |
| "grad_norm": 0.5800176858901978, |
| "learning_rate": 9.092857191293356e-05, |
| "loss": 2.843001937866211, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.6484149855907781, |
| "grad_norm": 0.4466376304626465, |
| "learning_rate": 9.049080540607875e-05, |
| "loss": 2.837605857849121, |
| "step": 3375 |
| }, |
| { |
| "epoch": 0.6493756003842459, |
| "grad_norm": 0.3897906541824341, |
| "learning_rate": 9.005363961154126e-05, |
| "loss": 2.8372968673706054, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.6503362151777138, |
| "grad_norm": 0.554328441619873, |
| "learning_rate": 8.961707894226735e-05, |
| "loss": 2.8404136657714845, |
| "step": 3385 |
| }, |
| { |
| "epoch": 0.6512968299711815, |
| "grad_norm": 0.5811151266098022, |
| "learning_rate": 8.918112780509494e-05, |
| "loss": 2.8356746673583983, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.6522574447646494, |
| "grad_norm": 0.5617589354515076, |
| "learning_rate": 8.874579060070894e-05, |
| "loss": 2.8357141494750975, |
| "step": 3395 |
| }, |
| { |
| "epoch": 0.6532180595581172, |
| "grad_norm": 0.4976288378238678, |
| "learning_rate": 8.831107172359707e-05, |
| "loss": 2.8398956298828124, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.654178674351585, |
| "grad_norm": 0.4745360314846039, |
| "learning_rate": 8.787697556200519e-05, |
| "loss": 2.8399864196777345, |
| "step": 3405 |
| }, |
| { |
| "epoch": 0.6551392891450528, |
| "grad_norm": 0.4591399133205414, |
| "learning_rate": 8.744350649789347e-05, |
| "loss": 2.8361705780029296, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.6560999039385207, |
| "grad_norm": 0.4175853729248047, |
| "learning_rate": 8.701066890689166e-05, |
| "loss": 2.835278129577637, |
| "step": 3415 |
| }, |
| { |
| "epoch": 0.6570605187319885, |
| "grad_norm": 0.4387359619140625, |
| "learning_rate": 8.657846715825508e-05, |
| "loss": 2.838814544677734, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.6580211335254563, |
| "grad_norm": 0.5446368455886841, |
| "learning_rate": 8.61469056148209e-05, |
| "loss": 2.8376935958862304, |
| "step": 3425 |
| }, |
| { |
| "epoch": 0.6589817483189241, |
| "grad_norm": 0.4309311509132385, |
| "learning_rate": 8.571598863296342e-05, |
| "loss": 2.8386699676513674, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.659942363112392, |
| "grad_norm": 0.4344281554222107, |
| "learning_rate": 8.528572056255065e-05, |
| "loss": 2.8364212036132814, |
| "step": 3435 |
| }, |
| { |
| "epoch": 0.6609029779058597, |
| "grad_norm": 0.4265115261077881, |
| "learning_rate": 8.485610574690021e-05, |
| "loss": 2.8368595123291014, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.6618635926993276, |
| "grad_norm": 0.530190646648407, |
| "learning_rate": 8.442714852273523e-05, |
| "loss": 2.836482048034668, |
| "step": 3445 |
| }, |
| { |
| "epoch": 0.6628242074927954, |
| "grad_norm": 0.4156145751476288, |
| "learning_rate": 8.399885322014123e-05, |
| "loss": 2.837312698364258, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.6637848222862632, |
| "grad_norm": 0.3237242102622986, |
| "learning_rate": 8.35712241625216e-05, |
| "loss": 2.837934112548828, |
| "step": 3455 |
| }, |
| { |
| "epoch": 0.664745437079731, |
| "grad_norm": 0.5120749473571777, |
| "learning_rate": 8.314426566655458e-05, |
| "loss": 2.8360355377197264, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.6657060518731989, |
| "grad_norm": 0.32652583718299866, |
| "learning_rate": 8.271798204214942e-05, |
| "loss": 2.83548526763916, |
| "step": 3465 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.4441000521183014, |
| "learning_rate": 8.229237759240289e-05, |
| "loss": 2.832575225830078, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.6676272814601345, |
| "grad_norm": 0.46819108724594116, |
| "learning_rate": 8.186745661355595e-05, |
| "loss": 2.832535171508789, |
| "step": 3475 |
| }, |
| { |
| "epoch": 0.6685878962536023, |
| "grad_norm": 0.42160722613334656, |
| "learning_rate": 8.144322339495012e-05, |
| "loss": 2.8376684188842773, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.6695485110470701, |
| "grad_norm": 0.4239354431629181, |
| "learning_rate": 8.101968221898453e-05, |
| "loss": 2.8348594665527345, |
| "step": 3485 |
| }, |
| { |
| "epoch": 0.6705091258405379, |
| "grad_norm": 0.465847373008728, |
| "learning_rate": 8.059683736107245e-05, |
| "loss": 2.8386322021484376, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.6714697406340058, |
| "grad_norm": 0.4507569372653961, |
| "learning_rate": 8.017469308959823e-05, |
| "loss": 2.836222839355469, |
| "step": 3495 |
| }, |
| { |
| "epoch": 0.6724303554274735, |
| "grad_norm": 0.35617345571517944, |
| "learning_rate": 7.97532536658742e-05, |
| "loss": 2.837454605102539, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.6733909702209414, |
| "grad_norm": 0.5097410082817078, |
| "learning_rate": 7.933252334409766e-05, |
| "loss": 2.836711883544922, |
| "step": 3505 |
| }, |
| { |
| "epoch": 0.6743515850144092, |
| "grad_norm": 0.37498244643211365, |
| "learning_rate": 7.891250637130779e-05, |
| "loss": 2.8351585388183596, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.675312199807877, |
| "grad_norm": 0.3829497992992401, |
| "learning_rate": 7.849320698734306e-05, |
| "loss": 2.839457321166992, |
| "step": 3515 |
| }, |
| { |
| "epoch": 0.6762728146013448, |
| "grad_norm": 0.40281057357788086, |
| "learning_rate": 7.80746294247982e-05, |
| "loss": 2.8323028564453123, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.6772334293948127, |
| "grad_norm": 0.5071494579315186, |
| "learning_rate": 7.765677790898155e-05, |
| "loss": 2.8315807342529298, |
| "step": 3525 |
| }, |
| { |
| "epoch": 0.6781940441882806, |
| "grad_norm": 0.4299395680427551, |
| "learning_rate": 7.723965665787255e-05, |
| "loss": 2.8363826751708983, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.6791546589817483, |
| "grad_norm": 0.4607393443584442, |
| "learning_rate": 7.682326988207877e-05, |
| "loss": 2.835020065307617, |
| "step": 3535 |
| }, |
| { |
| "epoch": 0.6801152737752162, |
| "grad_norm": 0.5409980416297913, |
| "learning_rate": 7.640762178479382e-05, |
| "loss": 2.8327972412109377, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.681075888568684, |
| "grad_norm": 0.4520939290523529, |
| "learning_rate": 7.599271656175476e-05, |
| "loss": 2.837631607055664, |
| "step": 3545 |
| }, |
| { |
| "epoch": 0.6820365033621518, |
| "grad_norm": 0.4245823323726654, |
| "learning_rate": 7.557855840119976e-05, |
| "loss": 2.833856201171875, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.6829971181556196, |
| "grad_norm": 0.3175192177295685, |
| "learning_rate": 7.516515148382576e-05, |
| "loss": 2.833210754394531, |
| "step": 3555 |
| }, |
| { |
| "epoch": 0.6839577329490875, |
| "grad_norm": 0.34715819358825684, |
| "learning_rate": 7.475249998274621e-05, |
| "loss": 2.8319278717041017, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.6849183477425552, |
| "grad_norm": 0.6079280376434326, |
| "learning_rate": 7.434060806344936e-05, |
| "loss": 2.836623764038086, |
| "step": 3565 |
| }, |
| { |
| "epoch": 0.6858789625360231, |
| "grad_norm": 0.4253065884113312, |
| "learning_rate": 7.392947988375555e-05, |
| "loss": 2.8338897705078123, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.6868395773294909, |
| "grad_norm": 0.42361024022102356, |
| "learning_rate": 7.351911959377585e-05, |
| "loss": 2.8336849212646484, |
| "step": 3575 |
| }, |
| { |
| "epoch": 0.6878001921229587, |
| "grad_norm": 0.3799959719181061, |
| "learning_rate": 7.310953133586981e-05, |
| "loss": 2.831894874572754, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.6887608069164265, |
| "grad_norm": 0.3994083106517792, |
| "learning_rate": 7.27007192446036e-05, |
| "loss": 2.833780860900879, |
| "step": 3585 |
| }, |
| { |
| "epoch": 0.6897214217098944, |
| "grad_norm": 0.5485339760780334, |
| "learning_rate": 7.229268744670883e-05, |
| "loss": 2.831420135498047, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.6906820365033621, |
| "grad_norm": 0.3720037341117859, |
| "learning_rate": 7.188544006104e-05, |
| "loss": 2.8320638656616213, |
| "step": 3595 |
| }, |
| { |
| "epoch": 0.69164265129683, |
| "grad_norm": 0.4130679666996002, |
| "learning_rate": 7.147898119853367e-05, |
| "loss": 2.832315444946289, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.6926032660902978, |
| "grad_norm": 0.38745787739753723, |
| "learning_rate": 7.107331496216676e-05, |
| "loss": 2.8339397430419924, |
| "step": 3605 |
| }, |
| { |
| "epoch": 0.6935638808837656, |
| "grad_norm": 0.3671323359012604, |
| "learning_rate": 7.066844544691474e-05, |
| "loss": 2.835890007019043, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.6945244956772334, |
| "grad_norm": 0.644256055355072, |
| "learning_rate": 7.026437673971107e-05, |
| "loss": 2.831973075866699, |
| "step": 3615 |
| }, |
| { |
| "epoch": 0.6954851104707013, |
| "grad_norm": 0.38100337982177734, |
| "learning_rate": 6.986111291940511e-05, |
| "loss": 2.8321693420410154, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.696445725264169, |
| "grad_norm": 0.3376613259315491, |
| "learning_rate": 6.945865805672154e-05, |
| "loss": 2.8267669677734375, |
| "step": 3625 |
| }, |
| { |
| "epoch": 0.6974063400576369, |
| "grad_norm": 0.3127301335334778, |
| "learning_rate": 6.905701621421904e-05, |
| "loss": 2.8354618072509767, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.6983669548511047, |
| "grad_norm": 0.3760312497615814, |
| "learning_rate": 6.865619144624914e-05, |
| "loss": 2.8312320709228516, |
| "step": 3635 |
| }, |
| { |
| "epoch": 0.6993275696445725, |
| "grad_norm": 0.44218409061431885, |
| "learning_rate": 6.825618779891577e-05, |
| "loss": 2.8325376510620117, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.7002881844380403, |
| "grad_norm": 0.47172650694847107, |
| "learning_rate": 6.785700931003381e-05, |
| "loss": 2.8269069671630858, |
| "step": 3645 |
| }, |
| { |
| "epoch": 0.7012487992315082, |
| "grad_norm": 0.3911219537258148, |
| "learning_rate": 6.745866000908874e-05, |
| "loss": 2.8321979522705076, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.7022094140249759, |
| "grad_norm": 0.3831585943698883, |
| "learning_rate": 6.706114391719586e-05, |
| "loss": 2.8329093933105467, |
| "step": 3655 |
| }, |
| { |
| "epoch": 0.7031700288184438, |
| "grad_norm": 0.41433626413345337, |
| "learning_rate": 6.66644650470597e-05, |
| "loss": 2.833950424194336, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.7041306436119116, |
| "grad_norm": 0.34397903084754944, |
| "learning_rate": 6.626862740293338e-05, |
| "loss": 2.830823516845703, |
| "step": 3665 |
| }, |
| { |
| "epoch": 0.7050912584053795, |
| "grad_norm": 0.39430761337280273, |
| "learning_rate": 6.587363498057845e-05, |
| "loss": 2.8328922271728514, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.7060518731988472, |
| "grad_norm": 0.3444637656211853, |
| "learning_rate": 6.547949176722437e-05, |
| "loss": 2.8278175354003907, |
| "step": 3675 |
| }, |
| { |
| "epoch": 0.7070124879923151, |
| "grad_norm": 0.4200674295425415, |
| "learning_rate": 6.508620174152826e-05, |
| "loss": 2.8334108352661134, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.7079731027857828, |
| "grad_norm": 0.5146929621696472, |
| "learning_rate": 6.469376887353491e-05, |
| "loss": 2.8332847595214843, |
| "step": 3685 |
| }, |
| { |
| "epoch": 0.7089337175792507, |
| "grad_norm": 0.41079193353652954, |
| "learning_rate": 6.430219712463634e-05, |
| "loss": 2.8310791015625, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.7098943323727186, |
| "grad_norm": 0.3751753866672516, |
| "learning_rate": 6.391149044753238e-05, |
| "loss": 2.831695556640625, |
| "step": 3695 |
| }, |
| { |
| "epoch": 0.7108549471661864, |
| "grad_norm": 0.3280157744884491, |
| "learning_rate": 6.352165278619012e-05, |
| "loss": 2.8301315307617188, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.7118155619596542, |
| "grad_norm": 0.2948898673057556, |
| "learning_rate": 6.313268807580462e-05, |
| "loss": 2.833317756652832, |
| "step": 3705 |
| }, |
| { |
| "epoch": 0.712776176753122, |
| "grad_norm": 0.4521695077419281, |
| "learning_rate": 6.274460024275895e-05, |
| "loss": 2.831614685058594, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.7137367915465899, |
| "grad_norm": 0.4257628619670868, |
| "learning_rate": 6.235739320458442e-05, |
| "loss": 2.828181838989258, |
| "step": 3715 |
| }, |
| { |
| "epoch": 0.7146974063400576, |
| "grad_norm": 0.3514285087585449, |
| "learning_rate": 6.197107086992156e-05, |
| "loss": 2.830687713623047, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.7156580211335255, |
| "grad_norm": 0.3143792748451233, |
| "learning_rate": 6.158563713847994e-05, |
| "loss": 2.8303714752197267, |
| "step": 3725 |
| }, |
| { |
| "epoch": 0.7166186359269933, |
| "grad_norm": 0.29875609278678894, |
| "learning_rate": 6.12010959009994e-05, |
| "loss": 2.832419013977051, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.7175792507204611, |
| "grad_norm": 0.2965286374092102, |
| "learning_rate": 6.081745103921047e-05, |
| "loss": 2.8294748306274413, |
| "step": 3735 |
| }, |
| { |
| "epoch": 0.7185398655139289, |
| "grad_norm": 0.3416411578655243, |
| "learning_rate": 6.043470642579516e-05, |
| "loss": 2.8330385208129885, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.7195004803073968, |
| "grad_norm": 0.4225616455078125, |
| "learning_rate": 6.005286592434828e-05, |
| "loss": 2.8280052185058593, |
| "step": 3745 |
| }, |
| { |
| "epoch": 0.7204610951008645, |
| "grad_norm": 0.42786410450935364, |
| "learning_rate": 5.967193338933778e-05, |
| "loss": 2.8302026748657227, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.7214217098943324, |
| "grad_norm": 0.34491264820098877, |
| "learning_rate": 5.9291912666066405e-05, |
| "loss": 2.8301876068115233, |
| "step": 3755 |
| }, |
| { |
| "epoch": 0.7223823246878002, |
| "grad_norm": 0.32240840792655945, |
| "learning_rate": 5.891280759063265e-05, |
| "loss": 2.8271007537841797, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.723342939481268, |
| "grad_norm": 0.35663703083992004, |
| "learning_rate": 5.853462198989184e-05, |
| "loss": 2.8305374145507813, |
| "step": 3765 |
| }, |
| { |
| "epoch": 0.7243035542747358, |
| "grad_norm": 0.3207663595676422, |
| "learning_rate": 5.815735968141813e-05, |
| "loss": 2.83172607421875, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.7252641690682037, |
| "grad_norm": 0.38391485810279846, |
| "learning_rate": 5.778102447346514e-05, |
| "loss": 2.8313671112060548, |
| "step": 3775 |
| }, |
| { |
| "epoch": 0.7262247838616714, |
| "grad_norm": 0.4688833951950073, |
| "learning_rate": 5.740562016492811e-05, |
| "loss": 2.8275665283203124, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.7271853986551393, |
| "grad_norm": 0.3272785246372223, |
| "learning_rate": 5.703115054530537e-05, |
| "loss": 2.8282421112060545, |
| "step": 3785 |
| }, |
| { |
| "epoch": 0.7281460134486071, |
| "grad_norm": 0.3496851623058319, |
| "learning_rate": 5.665761939466008e-05, |
| "loss": 2.829297637939453, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.729106628242075, |
| "grad_norm": 0.3146269917488098, |
| "learning_rate": 5.628503048358207e-05, |
| "loss": 2.8277191162109374, |
| "step": 3795 |
| }, |
| { |
| "epoch": 0.7300672430355427, |
| "grad_norm": 0.34081220626831055, |
| "learning_rate": 5.591338757314968e-05, |
| "loss": 2.831098937988281, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.7310278578290106, |
| "grad_norm": 0.2704191505908966, |
| "learning_rate": 5.554269441489204e-05, |
| "loss": 2.8295705795288084, |
| "step": 3805 |
| }, |
| { |
| "epoch": 0.7319884726224783, |
| "grad_norm": 0.29791897535324097, |
| "learning_rate": 5.517295475075102e-05, |
| "loss": 2.8285537719726563, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.7329490874159462, |
| "grad_norm": 0.3568146228790283, |
| "learning_rate": 5.4804172313043465e-05, |
| "loss": 2.8266357421875, |
| "step": 3815 |
| }, |
| { |
| "epoch": 0.733909702209414, |
| "grad_norm": 0.3287736475467682, |
| "learning_rate": 5.443635082442363e-05, |
| "loss": 2.8287899017333986, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.7348703170028819, |
| "grad_norm": 0.30268165469169617, |
| "learning_rate": 5.4069493997845356e-05, |
| "loss": 2.8289199829101563, |
| "step": 3825 |
| }, |
| { |
| "epoch": 0.7358309317963496, |
| "grad_norm": 0.28268566727638245, |
| "learning_rate": 5.3703605536524905e-05, |
| "loss": 2.8304866790771483, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.7367915465898175, |
| "grad_norm": 0.35326439142227173, |
| "learning_rate": 5.333868913390338e-05, |
| "loss": 2.8253864288330077, |
| "step": 3835 |
| }, |
| { |
| "epoch": 0.7377521613832853, |
| "grad_norm": 0.2834617793560028, |
| "learning_rate": 5.2974748473609505e-05, |
| "loss": 2.8260841369628906, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.7387127761767531, |
| "grad_norm": 0.24708347022533417, |
| "learning_rate": 5.261178722942242e-05, |
| "loss": 2.829148864746094, |
| "step": 3845 |
| }, |
| { |
| "epoch": 0.7396733909702209, |
| "grad_norm": 0.34735795855522156, |
| "learning_rate": 5.224980906523462e-05, |
| "loss": 2.827422332763672, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.7406340057636888, |
| "grad_norm": 0.4254157841205597, |
| "learning_rate": 5.188881763501486e-05, |
| "loss": 2.8264198303222656, |
| "step": 3855 |
| }, |
| { |
| "epoch": 0.7415946205571565, |
| "grad_norm": 0.43587496876716614, |
| "learning_rate": 5.152881658277147e-05, |
| "loss": 2.828824996948242, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.7425552353506244, |
| "grad_norm": 0.34229278564453125, |
| "learning_rate": 5.1169809542515404e-05, |
| "loss": 2.83102970123291, |
| "step": 3865 |
| }, |
| { |
| "epoch": 0.7435158501440923, |
| "grad_norm": 0.2842935025691986, |
| "learning_rate": 5.081180013822368e-05, |
| "loss": 2.826640510559082, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.74447646493756, |
| "grad_norm": 0.29888179898262024, |
| "learning_rate": 5.045479198380272e-05, |
| "loss": 2.8275705337524415, |
| "step": 3875 |
| }, |
| { |
| "epoch": 0.7454370797310279, |
| "grad_norm": 0.31076836585998535, |
| "learning_rate": 5.009878868305171e-05, |
| "loss": 2.8290031433105467, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.7463976945244957, |
| "grad_norm": 0.36958396434783936, |
| "learning_rate": 4.9743793829626736e-05, |
| "loss": 2.8280960083007813, |
| "step": 3885 |
| }, |
| { |
| "epoch": 0.7473583093179635, |
| "grad_norm": 0.35068604350090027, |
| "learning_rate": 4.9389811007003834e-05, |
| "loss": 2.8277267456054687, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.7483189241114313, |
| "grad_norm": 0.2839006781578064, |
| "learning_rate": 4.903684378844333e-05, |
| "loss": 2.8289569854736327, |
| "step": 3895 |
| }, |
| { |
| "epoch": 0.7492795389048992, |
| "grad_norm": 0.313565194606781, |
| "learning_rate": 4.86848957369536e-05, |
| "loss": 2.8278987884521483, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.7502401536983669, |
| "grad_norm": 0.2784314453601837, |
| "learning_rate": 4.8333970405254904e-05, |
| "loss": 2.823335647583008, |
| "step": 3905 |
| }, |
| { |
| "epoch": 0.7512007684918348, |
| "grad_norm": 0.31032121181488037, |
| "learning_rate": 4.798407133574405e-05, |
| "loss": 2.8286136627197265, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.7521613832853026, |
| "grad_norm": 0.2912919819355011, |
| "learning_rate": 4.7635202060457945e-05, |
| "loss": 2.8263755798339845, |
| "step": 3915 |
| }, |
| { |
| "epoch": 0.7531219980787704, |
| "grad_norm": 0.3187970519065857, |
| "learning_rate": 4.72873661010385e-05, |
| "loss": 2.8279151916503906, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.7540826128722382, |
| "grad_norm": 0.34837865829467773, |
| "learning_rate": 4.694056696869688e-05, |
| "loss": 2.8299861907958985, |
| "step": 3925 |
| }, |
| { |
| "epoch": 0.7550432276657061, |
| "grad_norm": 0.28154686093330383, |
| "learning_rate": 4.659480816417785e-05, |
| "loss": 2.8240270614624023, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.7560038424591738, |
| "grad_norm": 0.3153039515018463, |
| "learning_rate": 4.6250093177725e-05, |
| "loss": 2.8291126251220704, |
| "step": 3935 |
| }, |
| { |
| "epoch": 0.7569644572526417, |
| "grad_norm": 0.2972996234893799, |
| "learning_rate": 4.590642548904479e-05, |
| "loss": 2.8271560668945312, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.7579250720461095, |
| "grad_norm": 0.30244746804237366, |
| "learning_rate": 4.5563808567272e-05, |
| "loss": 2.825422668457031, |
| "step": 3945 |
| }, |
| { |
| "epoch": 0.7588856868395774, |
| "grad_norm": 0.32528969645500183, |
| "learning_rate": 4.52222458709345e-05, |
| "loss": 2.8255084991455077, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.7598463016330451, |
| "grad_norm": 0.31731516122817993, |
| "learning_rate": 4.4881740847918155e-05, |
| "loss": 2.82420654296875, |
| "step": 3955 |
| }, |
| { |
| "epoch": 0.760806916426513, |
| "grad_norm": 0.32816174626350403, |
| "learning_rate": 4.454229693543251e-05, |
| "loss": 2.829576873779297, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.7617675312199808, |
| "grad_norm": 0.2828548848628998, |
| "learning_rate": 4.420391755997548e-05, |
| "loss": 2.827322769165039, |
| "step": 3965 |
| }, |
| { |
| "epoch": 0.7627281460134486, |
| "grad_norm": 0.2661912143230438, |
| "learning_rate": 4.386660613729925e-05, |
| "loss": 2.8287160873413084, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.7636887608069164, |
| "grad_norm": 0.2875749468803406, |
| "learning_rate": 4.35303660723756e-05, |
| "loss": 2.822760009765625, |
| "step": 3975 |
| }, |
| { |
| "epoch": 0.7646493756003843, |
| "grad_norm": 0.2423306703567505, |
| "learning_rate": 4.3195200759361455e-05, |
| "loss": 2.826748275756836, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.765609990393852, |
| "grad_norm": 0.2602522671222687, |
| "learning_rate": 4.2861113581564884e-05, |
| "loss": 2.82442569732666, |
| "step": 3985 |
| }, |
| { |
| "epoch": 0.7665706051873199, |
| "grad_norm": 0.27814000844955444, |
| "learning_rate": 4.252810791141054e-05, |
| "loss": 2.8286266326904297, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.7675312199807877, |
| "grad_norm": 0.2583538889884949, |
| "learning_rate": 4.2196187110406054e-05, |
| "loss": 2.8250932693481445, |
| "step": 3995 |
| }, |
| { |
| "epoch": 0.7684918347742555, |
| "grad_norm": 0.29075777530670166, |
| "learning_rate": 4.186535452910784e-05, |
| "loss": 2.8221099853515623, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.7694524495677233, |
| "grad_norm": 0.26990872621536255, |
| "learning_rate": 4.153561350708732e-05, |
| "loss": 2.8275081634521486, |
| "step": 4005 |
| }, |
| { |
| "epoch": 0.7704130643611912, |
| "grad_norm": 0.28701838850975037, |
| "learning_rate": 4.12069673728973e-05, |
| "loss": 2.8243141174316406, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.7713736791546589, |
| "grad_norm": 0.25467079877853394, |
| "learning_rate": 4.087941944403815e-05, |
| "loss": 2.820818328857422, |
| "step": 4015 |
| }, |
| { |
| "epoch": 0.7723342939481268, |
| "grad_norm": 0.26495566964149475, |
| "learning_rate": 4.0552973026924625e-05, |
| "loss": 2.824423980712891, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.7732949087415946, |
| "grad_norm": 0.29741546511650085, |
| "learning_rate": 4.022763141685226e-05, |
| "loss": 2.824292755126953, |
| "step": 4025 |
| }, |
| { |
| "epoch": 0.7742555235350624, |
| "grad_norm": 0.2792549729347229, |
| "learning_rate": 3.990339789796418e-05, |
| "loss": 2.8274419784545897, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.7752161383285303, |
| "grad_norm": 0.3055638372898102, |
| "learning_rate": 3.958027574321794e-05, |
| "loss": 2.825086975097656, |
| "step": 4035 |
| }, |
| { |
| "epoch": 0.7761767531219981, |
| "grad_norm": 0.24175697565078735, |
| "learning_rate": 3.9258268214352566e-05, |
| "loss": 2.82196159362793, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.777137367915466, |
| "grad_norm": 0.3098554313182831, |
| "learning_rate": 3.893737856185538e-05, |
| "loss": 2.827454948425293, |
| "step": 4045 |
| }, |
| { |
| "epoch": 0.7780979827089337, |
| "grad_norm": 0.2785423696041107, |
| "learning_rate": 3.861761002492952e-05, |
| "loss": 2.8235404968261717, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.7790585975024016, |
| "grad_norm": 0.3094455599784851, |
| "learning_rate": 3.8298965831461024e-05, |
| "loss": 2.8234750747680666, |
| "step": 4055 |
| }, |
| { |
| "epoch": 0.7800192122958693, |
| "grad_norm": 0.2724841237068176, |
| "learning_rate": 3.798144919798631e-05, |
| "loss": 2.8219326019287108, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.7809798270893372, |
| "grad_norm": 0.2618357837200165, |
| "learning_rate": 3.766506332965976e-05, |
| "loss": 2.822661209106445, |
| "step": 4065 |
| }, |
| { |
| "epoch": 0.781940441882805, |
| "grad_norm": 0.23582673072814941, |
| "learning_rate": 3.734981142022117e-05, |
| "loss": 2.8221805572509764, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.7829010566762729, |
| "grad_norm": 0.23418720066547394, |
| "learning_rate": 3.70356966519638e-05, |
| "loss": 2.826119804382324, |
| "step": 4075 |
| }, |
| { |
| "epoch": 0.7838616714697406, |
| "grad_norm": 0.24657297134399414, |
| "learning_rate": 3.672272219570199e-05, |
| "loss": 2.8231496810913086, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.7848222862632085, |
| "grad_norm": 0.2549417018890381, |
| "learning_rate": 3.641089121073934e-05, |
| "loss": 2.8232580184936524, |
| "step": 4085 |
| }, |
| { |
| "epoch": 0.7857829010566763, |
| "grad_norm": 0.2880878746509552, |
| "learning_rate": 3.610020684483674e-05, |
| "loss": 2.823045349121094, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.7867435158501441, |
| "grad_norm": 0.27378255128860474, |
| "learning_rate": 3.579067223418046e-05, |
| "loss": 2.8220659255981446, |
| "step": 4095 |
| }, |
| { |
| "epoch": 0.7877041306436119, |
| "grad_norm": 0.26168304681777954, |
| "learning_rate": 3.548229050335089e-05, |
| "loss": 2.82537727355957, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.7886647454370798, |
| "grad_norm": 0.2955664098262787, |
| "learning_rate": 3.517506476529045e-05, |
| "loss": 2.8250572204589846, |
| "step": 4105 |
| }, |
| { |
| "epoch": 0.7896253602305475, |
| "grad_norm": 0.3224260210990906, |
| "learning_rate": 3.486899812127264e-05, |
| "loss": 2.8248443603515625, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.7905859750240154, |
| "grad_norm": 0.24833816289901733, |
| "learning_rate": 3.456409366087054e-05, |
| "loss": 2.8247648239135743, |
| "step": 4115 |
| }, |
| { |
| "epoch": 0.7915465898174832, |
| "grad_norm": 0.23027928173542023, |
| "learning_rate": 3.426035446192546e-05, |
| "loss": 2.822571563720703, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.792507204610951, |
| "grad_norm": 0.24701501429080963, |
| "learning_rate": 3.395778359051634e-05, |
| "loss": 2.8267234802246093, |
| "step": 4125 |
| }, |
| { |
| "epoch": 0.7934678194044188, |
| "grad_norm": 0.22311267256736755, |
| "learning_rate": 3.365638410092819e-05, |
| "loss": 2.8257022857666017, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.7944284341978867, |
| "grad_norm": 0.23521657288074493, |
| "learning_rate": 3.3356159035621746e-05, |
| "loss": 2.8220043182373047, |
| "step": 4135 |
| }, |
| { |
| "epoch": 0.7953890489913544, |
| "grad_norm": 0.2287784218788147, |
| "learning_rate": 3.3057111425202614e-05, |
| "loss": 2.8221038818359374, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.7963496637848223, |
| "grad_norm": 0.20921269059181213, |
| "learning_rate": 3.275924428839043e-05, |
| "loss": 2.8214012145996095, |
| "step": 4145 |
| }, |
| { |
| "epoch": 0.7973102785782901, |
| "grad_norm": 0.19233739376068115, |
| "learning_rate": 3.246256063198895e-05, |
| "loss": 2.8214378356933594, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.7982708933717579, |
| "grad_norm": 0.2111852765083313, |
| "learning_rate": 3.216706345085499e-05, |
| "loss": 2.826046943664551, |
| "step": 4155 |
| }, |
| { |
| "epoch": 0.7992315081652257, |
| "grad_norm": 0.20778204500675201, |
| "learning_rate": 3.187275572786878e-05, |
| "loss": 2.8189327239990236, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.8001921229586936, |
| "grad_norm": 0.23499462008476257, |
| "learning_rate": 3.15796404339036e-05, |
| "loss": 2.8209259033203127, |
| "step": 4165 |
| }, |
| { |
| "epoch": 0.8011527377521613, |
| "grad_norm": 0.23061686754226685, |
| "learning_rate": 3.128772052779569e-05, |
| "loss": 2.8280948638916015, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.8021133525456292, |
| "grad_norm": 0.2535129487514496, |
| "learning_rate": 3.099699895631474e-05, |
| "loss": 2.820085906982422, |
| "step": 4175 |
| }, |
| { |
| "epoch": 0.803073967339097, |
| "grad_norm": 0.191250279545784, |
| "learning_rate": 3.0707478654133706e-05, |
| "loss": 2.8241729736328125, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.8040345821325648, |
| "grad_norm": 0.22104866802692413, |
| "learning_rate": 3.041916254379949e-05, |
| "loss": 2.824297332763672, |
| "step": 4185 |
| }, |
| { |
| "epoch": 0.8049951969260326, |
| "grad_norm": 0.19869548082351685, |
| "learning_rate": 3.0132053535703342e-05, |
| "loss": 2.8250572204589846, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.8059558117195005, |
| "grad_norm": 0.2149796336889267, |
| "learning_rate": 2.984615452805147e-05, |
| "loss": 2.8241449356079102, |
| "step": 4195 |
| }, |
| { |
| "epoch": 0.8069164265129684, |
| "grad_norm": 0.20605628192424774, |
| "learning_rate": 2.9561468406835865e-05, |
| "loss": 2.82540283203125, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.8078770413064361, |
| "grad_norm": 0.22707077860832214, |
| "learning_rate": 2.927799804580495e-05, |
| "loss": 2.823532485961914, |
| "step": 4205 |
| }, |
| { |
| "epoch": 0.808837656099904, |
| "grad_norm": 0.2316194772720337, |
| "learning_rate": 2.8995746306434853e-05, |
| "loss": 2.819510078430176, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.8097982708933718, |
| "grad_norm": 0.23179353773593903, |
| "learning_rate": 2.871471603790035e-05, |
| "loss": 2.8233768463134767, |
| "step": 4215 |
| }, |
| { |
| "epoch": 0.8107588856868396, |
| "grad_norm": 0.21509668231010437, |
| "learning_rate": 2.8434910077046163e-05, |
| "loss": 2.8241294860839843, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.8117195004803074, |
| "grad_norm": 0.20974694192409515, |
| "learning_rate": 2.8156331248358295e-05, |
| "loss": 2.8189987182617187, |
| "step": 4225 |
| }, |
| { |
| "epoch": 0.8126801152737753, |
| "grad_norm": 0.18374113738536835, |
| "learning_rate": 2.787898236393556e-05, |
| "loss": 2.8204113006591798, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.813640730067243, |
| "grad_norm": 0.2046244740486145, |
| "learning_rate": 2.7602866223461044e-05, |
| "loss": 2.82018928527832, |
| "step": 4235 |
| }, |
| { |
| "epoch": 0.8146013448607109, |
| "grad_norm": 0.20929695665836334, |
| "learning_rate": 2.7327985614174143e-05, |
| "loss": 2.8256500244140623, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.8155619596541787, |
| "grad_norm": 0.19047677516937256, |
| "learning_rate": 2.7054343310842115e-05, |
| "loss": 2.8188905715942383, |
| "step": 4245 |
| }, |
| { |
| "epoch": 0.8165225744476465, |
| "grad_norm": 0.19980789721012115, |
| "learning_rate": 2.6781942075732294e-05, |
| "loss": 2.819545364379883, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.8174831892411143, |
| "grad_norm": 0.22025620937347412, |
| "learning_rate": 2.65107846585841e-05, |
| "loss": 2.8238851547241213, |
| "step": 4255 |
| }, |
| { |
| "epoch": 0.8184438040345822, |
| "grad_norm": 0.1966722011566162, |
| "learning_rate": 2.624087379658123e-05, |
| "loss": 2.818410873413086, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.8194044188280499, |
| "grad_norm": 0.24230927228927612, |
| "learning_rate": 2.5972212214324162e-05, |
| "loss": 2.8239215850830077, |
| "step": 4265 |
| }, |
| { |
| "epoch": 0.8203650336215178, |
| "grad_norm": 0.208970844745636, |
| "learning_rate": 2.5704802623802595e-05, |
| "loss": 2.821892738342285, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.8213256484149856, |
| "grad_norm": 0.21934658288955688, |
| "learning_rate": 2.5438647724368054e-05, |
| "loss": 2.8246734619140623, |
| "step": 4275 |
| }, |
| { |
| "epoch": 0.8222862632084534, |
| "grad_norm": 0.23692256212234497, |
| "learning_rate": 2.5173750202706666e-05, |
| "loss": 2.8202524185180664, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.8232468780019212, |
| "grad_norm": 0.1973477005958557, |
| "learning_rate": 2.491011273281189e-05, |
| "loss": 2.820056915283203, |
| "step": 4285 |
| }, |
| { |
| "epoch": 0.8242074927953891, |
| "grad_norm": 0.18771995604038239, |
| "learning_rate": 2.4647737975957954e-05, |
| "loss": 2.824921989440918, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.8251681075888568, |
| "grad_norm": 0.19082146883010864, |
| "learning_rate": 2.4386628580672396e-05, |
| "loss": 2.8206953048706054, |
| "step": 4295 |
| }, |
| { |
| "epoch": 0.8261287223823247, |
| "grad_norm": 0.17984676361083984, |
| "learning_rate": 2.4126787182709796e-05, |
| "loss": 2.8229496002197267, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.8270893371757925, |
| "grad_norm": 0.21605776250362396, |
| "learning_rate": 2.3868216405025002e-05, |
| "loss": 2.822156524658203, |
| "step": 4305 |
| }, |
| { |
| "epoch": 0.8280499519692603, |
| "grad_norm": 0.19645941257476807, |
| "learning_rate": 2.361091885774652e-05, |
| "loss": 2.821907615661621, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.8290105667627281, |
| "grad_norm": 0.215690478682518, |
| "learning_rate": 2.3354897138150536e-05, |
| "loss": 2.823683166503906, |
| "step": 4315 |
| }, |
| { |
| "epoch": 0.829971181556196, |
| "grad_norm": 0.19755929708480835, |
| "learning_rate": 2.3100153830634218e-05, |
| "loss": 2.823313903808594, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.8309317963496637, |
| "grad_norm": 0.19786836206912994, |
| "learning_rate": 2.284669150669001e-05, |
| "loss": 2.820745849609375, |
| "step": 4325 |
| }, |
| { |
| "epoch": 0.8318924111431316, |
| "grad_norm": 0.2134370356798172, |
| "learning_rate": 2.259451272487957e-05, |
| "loss": 2.820496368408203, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.8328530259365994, |
| "grad_norm": 0.1759730875492096, |
| "learning_rate": 2.234362003080772e-05, |
| "loss": 2.820623779296875, |
| "step": 4335 |
| }, |
| { |
| "epoch": 0.8338136407300673, |
| "grad_norm": 0.18195176124572754, |
| "learning_rate": 2.2094015957097215e-05, |
| "loss": 2.818101692199707, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.834774255523535, |
| "grad_norm": 0.1784997135400772, |
| "learning_rate": 2.1845703023362647e-05, |
| "loss": 2.820517921447754, |
| "step": 4345 |
| }, |
| { |
| "epoch": 0.8357348703170029, |
| "grad_norm": 0.1846170425415039, |
| "learning_rate": 2.159868373618544e-05, |
| "loss": 2.8230045318603514, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.8366954851104706, |
| "grad_norm": 0.21063563227653503, |
| "learning_rate": 2.13529605890883e-05, |
| "loss": 2.8197351455688477, |
| "step": 4355 |
| }, |
| { |
| "epoch": 0.8376560999039385, |
| "grad_norm": 0.1774480789899826, |
| "learning_rate": 2.110853606251004e-05, |
| "loss": 2.8187665939331055, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.8386167146974063, |
| "grad_norm": 0.18442946672439575, |
| "learning_rate": 2.0865412623780858e-05, |
| "loss": 2.8202911376953126, |
| "step": 4365 |
| }, |
| { |
| "epoch": 0.8395773294908742, |
| "grad_norm": 0.16251033544540405, |
| "learning_rate": 2.0623592727096916e-05, |
| "loss": 2.81771240234375, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.840537944284342, |
| "grad_norm": 0.21092604100704193, |
| "learning_rate": 2.0383078813496e-05, |
| "loss": 2.8186887741088866, |
| "step": 4375 |
| }, |
| { |
| "epoch": 0.8414985590778098, |
| "grad_norm": 0.16519330441951752, |
| "learning_rate": 2.014387331083268e-05, |
| "loss": 2.819426345825195, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.8424591738712777, |
| "grad_norm": 0.17469525337219238, |
| "learning_rate": 1.990597863375389e-05, |
| "loss": 2.8173801422119142, |
| "step": 4385 |
| }, |
| { |
| "epoch": 0.8434197886647454, |
| "grad_norm": 0.17096151411533356, |
| "learning_rate": 1.966939718367444e-05, |
| "loss": 2.818032455444336, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.8443804034582133, |
| "grad_norm": 0.18871955573558807, |
| "learning_rate": 1.9434131348752842e-05, |
| "loss": 2.8212139129638674, |
| "step": 4395 |
| }, |
| { |
| "epoch": 0.8453410182516811, |
| "grad_norm": 0.1763574182987213, |
| "learning_rate": 1.920018350386725e-05, |
| "loss": 2.8204654693603515, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.8463016330451489, |
| "grad_norm": 0.17636708915233612, |
| "learning_rate": 1.8967556010591423e-05, |
| "loss": 2.8186906814575194, |
| "step": 4405 |
| }, |
| { |
| "epoch": 0.8472622478386167, |
| "grad_norm": 0.17625270783901215, |
| "learning_rate": 1.873625121717089e-05, |
| "loss": 2.822053337097168, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.8482228626320846, |
| "grad_norm": 0.15954363346099854, |
| "learning_rate": 1.850627145849926e-05, |
| "loss": 2.816944885253906, |
| "step": 4415 |
| }, |
| { |
| "epoch": 0.8491834774255523, |
| "grad_norm": 0.16209356486797333, |
| "learning_rate": 1.8277619056094684e-05, |
| "loss": 2.817719650268555, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.8501440922190202, |
| "grad_norm": 0.17170670628547668, |
| "learning_rate": 1.805029631807632e-05, |
| "loss": 2.818096160888672, |
| "step": 4425 |
| }, |
| { |
| "epoch": 0.851104707012488, |
| "grad_norm": 0.15733708441257477, |
| "learning_rate": 1.7824305539141165e-05, |
| "loss": 2.819830322265625, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.8520653218059558, |
| "grad_norm": 0.1653648316860199, |
| "learning_rate": 1.7599649000540828e-05, |
| "loss": 2.8197336196899414, |
| "step": 4435 |
| }, |
| { |
| "epoch": 0.8530259365994236, |
| "grad_norm": 0.1656099557876587, |
| "learning_rate": 1.7376328970058382e-05, |
| "loss": 2.816743278503418, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.8539865513928915, |
| "grad_norm": 0.15552137792110443, |
| "learning_rate": 1.715434770198582e-05, |
| "loss": 2.823103332519531, |
| "step": 4445 |
| }, |
| { |
| "epoch": 0.8549471661863592, |
| "grad_norm": 0.17172972857952118, |
| "learning_rate": 1.6933707437100852e-05, |
| "loss": 2.8175386428833007, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.8559077809798271, |
| "grad_norm": 0.162519633769989, |
| "learning_rate": 1.67144104026446e-05, |
| "loss": 2.816008949279785, |
| "step": 4455 |
| }, |
| { |
| "epoch": 0.8568683957732949, |
| "grad_norm": 0.1861235499382019, |
| "learning_rate": 1.6496458812299073e-05, |
| "loss": 2.816597557067871, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.8578290105667628, |
| "grad_norm": 0.16897127032279968, |
| "learning_rate": 1.6279854866164586e-05, |
| "loss": 2.8191097259521483, |
| "step": 4465 |
| }, |
| { |
| "epoch": 0.8587896253602305, |
| "grad_norm": 0.16436539590358734, |
| "learning_rate": 1.6064600750737995e-05, |
| "loss": 2.8192588806152346, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.8597502401536984, |
| "grad_norm": 0.17348939180374146, |
| "learning_rate": 1.5850698638890093e-05, |
| "loss": 2.817781448364258, |
| "step": 4475 |
| }, |
| { |
| "epoch": 0.8607108549471661, |
| "grad_norm": 0.17937251925468445, |
| "learning_rate": 1.563815068984418e-05, |
| "loss": 2.8207448959350585, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.861671469740634, |
| "grad_norm": 0.15728498995304108, |
| "learning_rate": 1.54269590491539e-05, |
| "loss": 2.8215309143066407, |
| "step": 4485 |
| }, |
| { |
| "epoch": 0.8626320845341018, |
| "grad_norm": 0.15450921654701233, |
| "learning_rate": 1.521712584868166e-05, |
| "loss": 2.8194122314453125, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.8635926993275697, |
| "grad_norm": 0.15388450026512146, |
| "learning_rate": 1.5008653206577376e-05, |
| "loss": 2.8159854888916014, |
| "step": 4495 |
| }, |
| { |
| "epoch": 0.8645533141210374, |
| "grad_norm": 0.1582845151424408, |
| "learning_rate": 1.4801543227256685e-05, |
| "loss": 2.8205434799194338, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.8655139289145053, |
| "grad_norm": 0.15384982526302338, |
| "learning_rate": 1.4595798001379965e-05, |
| "loss": 2.820438003540039, |
| "step": 4505 |
| }, |
| { |
| "epoch": 0.866474543707973, |
| "grad_norm": 0.13886255025863647, |
| "learning_rate": 1.4391419605831218e-05, |
| "loss": 2.819916534423828, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.8674351585014409, |
| "grad_norm": 0.15293574333190918, |
| "learning_rate": 1.4188410103696957e-05, |
| "loss": 2.8186580657958986, |
| "step": 4515 |
| }, |
| { |
| "epoch": 0.8683957732949087, |
| "grad_norm": 0.15670090913772583, |
| "learning_rate": 1.398677154424559e-05, |
| "loss": 2.8180870056152343, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.8693563880883766, |
| "grad_norm": 0.13321813941001892, |
| "learning_rate": 1.3786505962906475e-05, |
| "loss": 2.816482162475586, |
| "step": 4525 |
| }, |
| { |
| "epoch": 0.8703170028818443, |
| "grad_norm": 0.15356488525867462, |
| "learning_rate": 1.3587615381249622e-05, |
| "loss": 2.8203657150268553, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.8712776176753122, |
| "grad_norm": 0.16018545627593994, |
| "learning_rate": 1.3390101806965165e-05, |
| "loss": 2.821044158935547, |
| "step": 4535 |
| }, |
| { |
| "epoch": 0.8722382324687801, |
| "grad_norm": 0.154199481010437, |
| "learning_rate": 1.3193967233843083e-05, |
| "loss": 2.818889617919922, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.8731988472622478, |
| "grad_norm": 0.16034984588623047, |
| "learning_rate": 1.2999213641753164e-05, |
| "loss": 2.8158634185791014, |
| "step": 4545 |
| }, |
| { |
| "epoch": 0.8741594620557157, |
| "grad_norm": 0.15023402869701385, |
| "learning_rate": 1.280584299662486e-05, |
| "loss": 2.8189815521240233, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.8751200768491835, |
| "grad_norm": 0.15561360120773315, |
| "learning_rate": 1.2613857250427622e-05, |
| "loss": 2.8163185119628906, |
| "step": 4555 |
| }, |
| { |
| "epoch": 0.8760806916426513, |
| "grad_norm": 0.13934877514839172, |
| "learning_rate": 1.2423258341151088e-05, |
| "loss": 2.8184600830078126, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.8770413064361191, |
| "grad_norm": 0.16308888792991638, |
| "learning_rate": 1.2234048192785539e-05, |
| "loss": 2.8192535400390626, |
| "step": 4565 |
| }, |
| { |
| "epoch": 0.878001921229587, |
| "grad_norm": 0.13949905335903168, |
| "learning_rate": 1.2046228715302509e-05, |
| "loss": 2.8167484283447264, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.8789625360230547, |
| "grad_norm": 0.19002912938594818, |
| "learning_rate": 1.1859801804635471e-05, |
| "loss": 2.818648910522461, |
| "step": 4575 |
| }, |
| { |
| "epoch": 0.8799231508165226, |
| "grad_norm": 0.1495082676410675, |
| "learning_rate": 1.167476934266065e-05, |
| "loss": 2.81795654296875, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.8808837656099904, |
| "grad_norm": 0.14693421125411987, |
| "learning_rate": 1.1491133197178177e-05, |
| "loss": 2.8210113525390623, |
| "step": 4585 |
| }, |
| { |
| "epoch": 0.8818443804034583, |
| "grad_norm": 0.15263605117797852, |
| "learning_rate": 1.1308895221893088e-05, |
| "loss": 2.8196102142333985, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.882804995196926, |
| "grad_norm": 0.14515769481658936, |
| "learning_rate": 1.1128057256396684e-05, |
| "loss": 2.819786262512207, |
| "step": 4595 |
| }, |
| { |
| "epoch": 0.8837656099903939, |
| "grad_norm": 0.13253174722194672, |
| "learning_rate": 1.0948621126147978e-05, |
| "loss": 2.8158369064331055, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.8847262247838616, |
| "grad_norm": 0.1385597586631775, |
| "learning_rate": 1.0770588642455092e-05, |
| "loss": 2.8145606994628904, |
| "step": 4605 |
| }, |
| { |
| "epoch": 0.8856868395773295, |
| "grad_norm": 0.15321281552314758, |
| "learning_rate": 1.0593961602457346e-05, |
| "loss": 2.822213554382324, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.8866474543707973, |
| "grad_norm": 0.1366969645023346, |
| "learning_rate": 1.041874178910666e-05, |
| "loss": 2.8188133239746094, |
| "step": 4615 |
| }, |
| { |
| "epoch": 0.8876080691642652, |
| "grad_norm": 0.1428319811820984, |
| "learning_rate": 1.0244930971149918e-05, |
| "loss": 2.8199222564697264, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.8885686839577329, |
| "grad_norm": 0.13369108736515045, |
| "learning_rate": 1.0072530903110942e-05, |
| "loss": 2.8198825836181642, |
| "step": 4625 |
| }, |
| { |
| "epoch": 0.8895292987512008, |
| "grad_norm": 0.14023332297801971, |
| "learning_rate": 9.901543325272753e-06, |
| "loss": 2.8179443359375, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.8904899135446686, |
| "grad_norm": 0.1439591497182846, |
| "learning_rate": 9.731969963660224e-06, |
| "loss": 2.82034969329834, |
| "step": 4635 |
| }, |
| { |
| "epoch": 0.8914505283381364, |
| "grad_norm": 0.1384945958852768, |
| "learning_rate": 9.56381253002233e-06, |
| "loss": 2.8165306091308593, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.8924111431316042, |
| "grad_norm": 0.14330501854419708, |
| "learning_rate": 9.397072721815113e-06, |
| "loss": 2.819794464111328, |
| "step": 4645 |
| }, |
| { |
| "epoch": 0.8933717579250721, |
| "grad_norm": 0.14544479548931122, |
| "learning_rate": 9.231752222184496e-06, |
| "loss": 2.8153934478759766, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.8943323727185398, |
| "grad_norm": 0.13283999264240265, |
| "learning_rate": 9.067852699949197e-06, |
| "loss": 2.8222179412841797, |
| "step": 4655 |
| }, |
| { |
| "epoch": 0.8952929875120077, |
| "grad_norm": 0.1298431158065796, |
| "learning_rate": 8.905375809584053e-06, |
| "loss": 2.8179580688476564, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.8962536023054755, |
| "grad_norm": 0.13331957161426544, |
| "learning_rate": 8.744323191203129e-06, |
| "loss": 2.8178497314453126, |
| "step": 4665 |
| }, |
| { |
| "epoch": 0.8972142170989433, |
| "grad_norm": 0.1336248815059662, |
| "learning_rate": 8.5846964705433e-06, |
| "loss": 2.817870330810547, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.8981748318924111, |
| "grad_norm": 0.13920700550079346, |
| "learning_rate": 8.426497258947813e-06, |
| "loss": 2.8151180267333986, |
| "step": 4675 |
| }, |
| { |
| "epoch": 0.899135446685879, |
| "grad_norm": 0.12426065653562546, |
| "learning_rate": 8.269727153349915e-06, |
| "loss": 2.8183738708496096, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.9000960614793467, |
| "grad_norm": 0.13054883480072021, |
| "learning_rate": 8.114387736256966e-06, |
| "loss": 2.818001556396484, |
| "step": 4685 |
| }, |
| { |
| "epoch": 0.9010566762728146, |
| "grad_norm": 0.13032682240009308, |
| "learning_rate": 7.960480575734162e-06, |
| "loss": 2.8181526184082033, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.9020172910662824, |
| "grad_norm": 0.14295165240764618, |
| "learning_rate": 7.808007225388963e-06, |
| "loss": 2.8167598724365233, |
| "step": 4695 |
| }, |
| { |
| "epoch": 0.9029779058597502, |
| "grad_norm": 0.13148348033428192, |
| "learning_rate": 7.656969224355285e-06, |
| "loss": 2.817153549194336, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.9039385206532181, |
| "grad_norm": 0.13584092259407043, |
| "learning_rate": 7.507368097277994e-06, |
| "loss": 2.813400077819824, |
| "step": 4705 |
| }, |
| { |
| "epoch": 0.9048991354466859, |
| "grad_norm": 0.13805082440376282, |
| "learning_rate": 7.359205354297499e-06, |
| "loss": 2.8138927459716796, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.9058597502401537, |
| "grad_norm": 0.1245737373828888, |
| "learning_rate": 7.21248249103451e-06, |
| "loss": 2.8152067184448244, |
| "step": 4715 |
| }, |
| { |
| "epoch": 0.9068203650336215, |
| "grad_norm": 0.12117403000593185, |
| "learning_rate": 7.067200988574983e-06, |
| "loss": 2.8162109375, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.9077809798270894, |
| "grad_norm": 0.12167931348085403, |
| "learning_rate": 6.923362313455094e-06, |
| "loss": 2.811721611022949, |
| "step": 4725 |
| }, |
| { |
| "epoch": 0.9087415946205571, |
| "grad_norm": 0.12101319432258606, |
| "learning_rate": 6.780967917646518e-06, |
| "loss": 2.8149484634399413, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.909702209414025, |
| "grad_norm": 0.13186109066009521, |
| "learning_rate": 6.640019238541727e-06, |
| "loss": 2.821930694580078, |
| "step": 4735 |
| }, |
| { |
| "epoch": 0.9106628242074928, |
| "grad_norm": 0.12498224526643753, |
| "learning_rate": 6.5005176989394335e-06, |
| "loss": 2.819742774963379, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.9116234390009607, |
| "grad_norm": 0.12152751535177231, |
| "learning_rate": 6.362464707030334e-06, |
| "loss": 2.818644332885742, |
| "step": 4745 |
| }, |
| { |
| "epoch": 0.9125840537944284, |
| "grad_norm": 0.12503238022327423, |
| "learning_rate": 6.225861656382825e-06, |
| "loss": 2.8183855056762694, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.9135446685878963, |
| "grad_norm": 0.1193387359380722, |
| "learning_rate": 6.090709925928938e-06, |
| "loss": 2.8130598068237305, |
| "step": 4755 |
| }, |
| { |
| "epoch": 0.914505283381364, |
| "grad_norm": 0.12339451164007187, |
| "learning_rate": 5.957010879950446e-06, |
| "loss": 2.81536979675293, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.9154658981748319, |
| "grad_norm": 0.11361610144376755, |
| "learning_rate": 5.824765868065101e-06, |
| "loss": 2.814308929443359, |
| "step": 4765 |
| }, |
| { |
| "epoch": 0.9164265129682997, |
| "grad_norm": 0.11605164408683777, |
| "learning_rate": 5.69397622521291e-06, |
| "loss": 2.8164608001708986, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.9173871277617676, |
| "grad_norm": 0.11599262058734894, |
| "learning_rate": 5.564643271642799e-06, |
| "loss": 2.817517852783203, |
| "step": 4775 |
| }, |
| { |
| "epoch": 0.9183477425552353, |
| "grad_norm": 0.11421811580657959, |
| "learning_rate": 5.436768312899226e-06, |
| "loss": 2.8213729858398438, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.9193083573487032, |
| "grad_norm": 0.11812606453895569, |
| "learning_rate": 5.310352639808968e-06, |
| "loss": 2.816654586791992, |
| "step": 4785 |
| }, |
| { |
| "epoch": 0.920268972142171, |
| "grad_norm": 0.12096104770898819, |
| "learning_rate": 5.185397528468155e-06, |
| "loss": 2.8137371063232424, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.9212295869356388, |
| "grad_norm": 0.11127284914255142, |
| "learning_rate": 5.061904240229309e-06, |
| "loss": 2.8210193634033205, |
| "step": 4795 |
| }, |
| { |
| "epoch": 0.9221902017291066, |
| "grad_norm": 0.1057458147406578, |
| "learning_rate": 4.939874021688739e-06, |
| "loss": 2.816180419921875, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.9231508165225745, |
| "grad_norm": 0.10853277891874313, |
| "learning_rate": 4.819308104673769e-06, |
| "loss": 2.8158794403076173, |
| "step": 4805 |
| }, |
| { |
| "epoch": 0.9241114313160422, |
| "grad_norm": 0.11734215915203094, |
| "learning_rate": 4.700207706230513e-06, |
| "loss": 2.818410873413086, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.9250720461095101, |
| "grad_norm": 0.11830545216798782, |
| "learning_rate": 4.582574028611435e-06, |
| "loss": 2.8172992706298827, |
| "step": 4815 |
| }, |
| { |
| "epoch": 0.9260326609029779, |
| "grad_norm": 0.17156197130680084, |
| "learning_rate": 4.466408259263243e-06, |
| "loss": 2.816666030883789, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.9269932756964457, |
| "grad_norm": 0.11448012292385101, |
| "learning_rate": 4.351711570815014e-06, |
| "loss": 2.8137338638305662, |
| "step": 4825 |
| }, |
| { |
| "epoch": 0.9279538904899135, |
| "grad_norm": 0.10624872893095016, |
| "learning_rate": 4.238485121066154e-06, |
| "loss": 2.8165781021118166, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.9289145052833814, |
| "grad_norm": 0.12709708511829376, |
| "learning_rate": 4.126730052974908e-06, |
| "loss": 2.814931869506836, |
| "step": 4835 |
| }, |
| { |
| "epoch": 0.9298751200768491, |
| "grad_norm": 0.15824326872825623, |
| "learning_rate": 4.016447494646718e-06, |
| "loss": 2.8185230255126954, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.930835734870317, |
| "grad_norm": 0.12272030860185623, |
| "learning_rate": 3.907638559322817e-06, |
| "loss": 2.8193038940429687, |
| "step": 4845 |
| }, |
| { |
| "epoch": 0.9317963496637848, |
| "grad_norm": 0.14079715311527252, |
| "learning_rate": 3.8003043453691207e-06, |
| "loss": 2.813698959350586, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.9327569644572526, |
| "grad_norm": 0.16456739604473114, |
| "learning_rate": 3.69444593626495e-06, |
| "loss": 2.8172222137451173, |
| "step": 4855 |
| }, |
| { |
| "epoch": 0.9337175792507204, |
| "grad_norm": 0.1126532033085823, |
| "learning_rate": 3.5900644005922465e-06, |
| "loss": 2.8200458526611327, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.9346781940441883, |
| "grad_norm": 0.12338647246360779, |
| "learning_rate": 3.48716079202474e-06, |
| "loss": 2.8146718978881835, |
| "step": 4865 |
| }, |
| { |
| "epoch": 0.9356388088376562, |
| "grad_norm": 0.13380901515483856, |
| "learning_rate": 3.385736149317264e-06, |
| "loss": 2.81722412109375, |
| "step": 4870 |
| }, |
| { |
| "epoch": 0.9365994236311239, |
| "grad_norm": 0.12491049617528915, |
| "learning_rate": 3.2857914962953926e-06, |
| "loss": 2.8170299530029297, |
| "step": 4875 |
| }, |
| { |
| "epoch": 0.9375600384245918, |
| "grad_norm": 0.14564266800880432, |
| "learning_rate": 3.1873278418449e-06, |
| "loss": 2.814952087402344, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.9385206532180596, |
| "grad_norm": 0.1412232220172882, |
| "learning_rate": 3.090346179901837e-06, |
| "loss": 2.816575622558594, |
| "step": 4885 |
| }, |
| { |
| "epoch": 0.9394812680115274, |
| "grad_norm": 0.12283110618591309, |
| "learning_rate": 2.99484748944227e-06, |
| "loss": 2.816323471069336, |
| "step": 4890 |
| }, |
| { |
| "epoch": 0.9404418828049952, |
| "grad_norm": 0.131371408700943, |
| "learning_rate": 2.9008327344724913e-06, |
| "loss": 2.8180400848388674, |
| "step": 4895 |
| }, |
| { |
| "epoch": 0.9414024975984631, |
| "grad_norm": 0.10252773761749268, |
| "learning_rate": 2.808302864019374e-06, |
| "loss": 2.8142473220825197, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.9423631123919308, |
| "grad_norm": 0.12445132434368134, |
| "learning_rate": 2.7172588121205983e-06, |
| "loss": 2.8168582916259766, |
| "step": 4905 |
| }, |
| { |
| "epoch": 0.9433237271853987, |
| "grad_norm": 0.1234014481306076, |
| "learning_rate": 2.62770149781541e-06, |
| "loss": 2.817163848876953, |
| "step": 4910 |
| }, |
| { |
| "epoch": 0.9442843419788665, |
| "grad_norm": 0.1050441563129425, |
| "learning_rate": 2.539631825135191e-06, |
| "loss": 2.823550796508789, |
| "step": 4915 |
| }, |
| { |
| "epoch": 0.9452449567723343, |
| "grad_norm": 0.11725042015314102, |
| "learning_rate": 2.4530506830944876e-06, |
| "loss": 2.820196533203125, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.9462055715658021, |
| "grad_norm": 0.11576535552740097, |
| "learning_rate": 2.367958945681897e-06, |
| "loss": 2.814539337158203, |
| "step": 4925 |
| }, |
| { |
| "epoch": 0.94716618635927, |
| "grad_norm": 0.10262072086334229, |
| "learning_rate": 2.2843574718513114e-06, |
| "loss": 2.8174243927001954, |
| "step": 4930 |
| }, |
| { |
| "epoch": 0.9481268011527377, |
| "grad_norm": 0.10643728822469711, |
| "learning_rate": 2.2022471055132553e-06, |
| "loss": 2.815760040283203, |
| "step": 4935 |
| }, |
| { |
| "epoch": 0.9490874159462056, |
| "grad_norm": 0.09736967086791992, |
| "learning_rate": 2.1216286755263434e-06, |
| "loss": 2.8164119720458984, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.9500480307396734, |
| "grad_norm": 0.1281130164861679, |
| "learning_rate": 2.042502995688905e-06, |
| "loss": 2.8168426513671876, |
| "step": 4945 |
| }, |
| { |
| "epoch": 0.9510086455331412, |
| "grad_norm": 0.10349207371473312, |
| "learning_rate": 1.964870864730789e-06, |
| "loss": 2.8166751861572266, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.951969260326609, |
| "grad_norm": 0.13925877213478088, |
| "learning_rate": 1.8887330663053536e-06, |
| "loss": 2.816518783569336, |
| "step": 4955 |
| }, |
| { |
| "epoch": 0.9529298751200769, |
| "grad_norm": 0.09690409153699875, |
| "learning_rate": 1.8140903689814079e-06, |
| "loss": 2.8186017990112306, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.9538904899135446, |
| "grad_norm": 0.11781689524650574, |
| "learning_rate": 1.740943526235583e-06, |
| "loss": 2.816529083251953, |
| "step": 4965 |
| }, |
| { |
| "epoch": 0.9548511047070125, |
| "grad_norm": 0.14491458237171173, |
| "learning_rate": 1.6692932764447054e-06, |
| "loss": 2.8170598983764648, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.9558117195004803, |
| "grad_norm": 0.09681481868028641, |
| "learning_rate": 1.5991403428783188e-06, |
| "loss": 2.8186452865600584, |
| "step": 4975 |
| }, |
| { |
| "epoch": 0.9567723342939481, |
| "grad_norm": 0.10160388797521591, |
| "learning_rate": 1.5304854336913752e-06, |
| "loss": 2.8164340972900392, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.9577329490874159, |
| "grad_norm": 0.09434349089860916, |
| "learning_rate": 1.463329241917105e-06, |
| "loss": 2.8180526733398437, |
| "step": 4985 |
| }, |
| { |
| "epoch": 0.9586935638808838, |
| "grad_norm": 0.10586917400360107, |
| "learning_rate": 1.397672445460024e-06, |
| "loss": 2.813650894165039, |
| "step": 4990 |
| }, |
| { |
| "epoch": 0.9596541786743515, |
| "grad_norm": 0.09675531089305878, |
| "learning_rate": 1.333515707089089e-06, |
| "loss": 2.8159751892089844, |
| "step": 4995 |
| }, |
| { |
| "epoch": 0.9606147934678194, |
| "grad_norm": 0.1004442647099495, |
| "learning_rate": 1.2708596744309685e-06, |
| "loss": 2.816268730163574, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.9615754082612872, |
| "grad_norm": 0.09772492200136185, |
| "learning_rate": 1.209704979963616e-06, |
| "loss": 2.8217578887939454, |
| "step": 5005 |
| }, |
| { |
| "epoch": 0.962536023054755, |
| "grad_norm": 0.09763862192630768, |
| "learning_rate": 1.1500522410096912e-06, |
| "loss": 2.8170074462890624, |
| "step": 5010 |
| }, |
| { |
| "epoch": 0.9634966378482228, |
| "grad_norm": 0.09506134688854218, |
| "learning_rate": 1.0919020597305649e-06, |
| "loss": 2.8169906616210936, |
| "step": 5015 |
| }, |
| { |
| "epoch": 0.9644572526416907, |
| "grad_norm": 0.09169244021177292, |
| "learning_rate": 1.0352550231200407e-06, |
| "loss": 2.818475341796875, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.9654178674351584, |
| "grad_norm": 0.09423839300870895, |
| "learning_rate": 9.801117029985596e-07, |
| "loss": 2.816048812866211, |
| "step": 5025 |
| }, |
| { |
| "epoch": 0.9663784822286263, |
| "grad_norm": 0.0988612100481987, |
| "learning_rate": 9.264726560073377e-07, |
| "loss": 2.8169836044311523, |
| "step": 5030 |
| }, |
| { |
| "epoch": 0.9673390970220941, |
| "grad_norm": 0.09173572063446045, |
| "learning_rate": 8.743384236028051e-07, |
| "loss": 2.8156835556030275, |
| "step": 5035 |
| }, |
| { |
| "epoch": 0.968299711815562, |
| "grad_norm": 0.09405640512704849, |
| "learning_rate": 8.237095320511589e-07, |
| "loss": 2.8169689178466797, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.9692603266090298, |
| "grad_norm": 0.09245765209197998, |
| "learning_rate": 7.745864924229351e-07, |
| "loss": 2.8151527404785157, |
| "step": 5045 |
| }, |
| { |
| "epoch": 0.9702209414024976, |
| "grad_norm": 0.09277483075857162, |
| "learning_rate": 7.26969800588012e-07, |
| "loss": 2.8148799896240235, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.9711815561959655, |
| "grad_norm": 0.09106960892677307, |
| "learning_rate": 6.808599372104817e-07, |
| "loss": 2.812705230712891, |
| "step": 5055 |
| }, |
| { |
| "epoch": 0.9721421709894332, |
| "grad_norm": 0.09490223228931427, |
| "learning_rate": 6.362573677438199e-07, |
| "loss": 2.8207538604736326, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.9731027857829011, |
| "grad_norm": 0.0929575189948082, |
| "learning_rate": 5.931625424262731e-07, |
| "loss": 2.817534637451172, |
| "step": 5065 |
| }, |
| { |
| "epoch": 0.9740634005763689, |
| "grad_norm": 0.0907764732837677, |
| "learning_rate": 5.51575896276163e-07, |
| "loss": 2.8162796020507814, |
| "step": 5070 |
| }, |
| { |
| "epoch": 0.9750240153698367, |
| "grad_norm": 0.09307243674993515, |
| "learning_rate": 5.11497849087622e-07, |
| "loss": 2.813416862487793, |
| "step": 5075 |
| }, |
| { |
| "epoch": 0.9759846301633045, |
| "grad_norm": 0.0937948152422905, |
| "learning_rate": 4.7292880542634805e-07, |
| "loss": 2.8132902145385743, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.9769452449567724, |
| "grad_norm": 0.0905749499797821, |
| "learning_rate": 4.358691546254067e-07, |
| "loss": 2.814370346069336, |
| "step": 5085 |
| }, |
| { |
| "epoch": 0.9779058597502401, |
| "grad_norm": 0.08927202224731445, |
| "learning_rate": 4.0031927078145176e-07, |
| "loss": 2.8167686462402344, |
| "step": 5090 |
| }, |
| { |
| "epoch": 0.978866474543708, |
| "grad_norm": 0.09388428926467896, |
| "learning_rate": 3.662795127508111e-07, |
| "loss": 2.818276786804199, |
| "step": 5095 |
| }, |
| { |
| "epoch": 0.9798270893371758, |
| "grad_norm": 0.08715725690126419, |
| "learning_rate": 3.3375022414598994e-07, |
| "loss": 2.8158248901367187, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.9807877041306436, |
| "grad_norm": 0.08722332119941711, |
| "learning_rate": 3.027317333321233e-07, |
| "loss": 2.8168060302734377, |
| "step": 5105 |
| }, |
| { |
| "epoch": 0.9817483189241114, |
| "grad_norm": 0.08853864669799805, |
| "learning_rate": 2.7322435342364556e-07, |
| "loss": 2.814595603942871, |
| "step": 5110 |
| }, |
| { |
| "epoch": 0.9827089337175793, |
| "grad_norm": 0.08678946644067764, |
| "learning_rate": 2.452283822812262e-07, |
| "loss": 2.8177654266357424, |
| "step": 5115 |
| }, |
| { |
| "epoch": 0.983669548511047, |
| "grad_norm": 0.09480496495962143, |
| "learning_rate": 2.1874410250863893e-07, |
| "loss": 2.8194339752197264, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.9846301633045149, |
| "grad_norm": 0.08985020220279694, |
| "learning_rate": 1.9377178145003059e-07, |
| "loss": 2.8135372161865235, |
| "step": 5125 |
| }, |
| { |
| "epoch": 0.9855907780979827, |
| "grad_norm": 0.08478531241416931, |
| "learning_rate": 1.7031167118708998e-07, |
| "loss": 2.8173141479492188, |
| "step": 5130 |
| }, |
| { |
| "epoch": 0.9865513928914506, |
| "grad_norm": 0.08679889887571335, |
| "learning_rate": 1.4836400853666662e-07, |
| "loss": 2.8155088424682617, |
| "step": 5135 |
| }, |
| { |
| "epoch": 0.9875120076849183, |
| "grad_norm": 0.0879487469792366, |
| "learning_rate": 1.2792901504820595e-07, |
| "loss": 2.8137956619262696, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.9884726224783862, |
| "grad_norm": 0.0856156200170517, |
| "learning_rate": 1.0900689700166776e-07, |
| "loss": 2.8191757202148438, |
| "step": 5145 |
| }, |
| { |
| "epoch": 0.989433237271854, |
| "grad_norm": 0.08409229665994644, |
| "learning_rate": 9.159784540531124e-08, |
| "loss": 2.8132045745849608, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.9903938520653218, |
| "grad_norm": 0.08568743616342545, |
| "learning_rate": 7.57020359938798e-08, |
| "loss": 2.814972496032715, |
| "step": 5155 |
| }, |
| { |
| "epoch": 0.9913544668587896, |
| "grad_norm": 0.08696384727954865, |
| "learning_rate": 6.131962922673595e-08, |
| "loss": 2.817178153991699, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.9923150816522575, |
| "grad_norm": 0.08134496212005615, |
| "learning_rate": 4.845077028631239e-08, |
| "loss": 2.818017578125, |
| "step": 5165 |
| }, |
| { |
| "epoch": 0.9932756964457252, |
| "grad_norm": 0.08577294647693634, |
| "learning_rate": 3.709558907659671e-08, |
| "loss": 2.8143449783325196, |
| "step": 5170 |
| }, |
| { |
| "epoch": 0.9942363112391931, |
| "grad_norm": 0.08689381182193756, |
| "learning_rate": 2.7254200221848988e-08, |
| "loss": 2.8173112869262695, |
| "step": 5175 |
| }, |
| { |
| "epoch": 0.9951969260326609, |
| "grad_norm": 0.0842234268784523, |
| "learning_rate": 1.8926703065436087e-08, |
| "loss": 2.815708541870117, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.9961575408261287, |
| "grad_norm": 0.08280805498361588, |
| "learning_rate": 1.2113181668815808e-08, |
| "loss": 2.8149890899658203, |
| "step": 5185 |
| }, |
| { |
| "epoch": 0.9971181556195965, |
| "grad_norm": 0.08539645373821259, |
| "learning_rate": 6.813704810704201e-09, |
| "loss": 2.8178043365478516, |
| "step": 5190 |
| }, |
| { |
| "epoch": 0.9980787704130644, |
| "grad_norm": 0.08579740673303604, |
| "learning_rate": 3.028325986392799e-09, |
| "loss": 2.816665267944336, |
| "step": 5195 |
| }, |
| { |
| "epoch": 0.9990393852065321, |
| "grad_norm": 0.08700989931821823, |
| "learning_rate": 7.570834071823905e-10, |
| "loss": 2.8163612365722654, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.08663682639598846, |
| "learning_rate": 0.0, |
| "loss": 2.8194324493408205, |
| "step": 5205 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 5205, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.4701096488778754e+20, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|