| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 31526, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.003171985028230667, |
| "grad_norm": 6.952805519104004, |
| "learning_rate": 4.9949248239548315e-05, |
| "loss": 7.5095, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.006343970056461334, |
| "grad_norm": 0.7631180882453918, |
| "learning_rate": 4.9897439150753875e-05, |
| "loss": 0.581, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.009515955084692, |
| "grad_norm": 0.6402416229248047, |
| "learning_rate": 4.98445727336167e-05, |
| "loss": 0.4056, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.012687940112922668, |
| "grad_norm": 0.2075737714767456, |
| "learning_rate": 4.979170631647952e-05, |
| "loss": 0.3261, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.015859925141153332, |
| "grad_norm": 0.3269546329975128, |
| "learning_rate": 4.973883989934234e-05, |
| "loss": 0.2932, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.019031910169384, |
| "grad_norm": 0.42410802841186523, |
| "learning_rate": 4.968597348220517e-05, |
| "loss": 0.3021, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.02220389519761467, |
| "grad_norm": 0.26918208599090576, |
| "learning_rate": 4.963310706506799e-05, |
| "loss": 0.3534, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.025375880225845335, |
| "grad_norm": 0.36581048369407654, |
| "learning_rate": 4.958024064793081e-05, |
| "loss": 0.3136, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.028547865254076002, |
| "grad_norm": 0.3738636076450348, |
| "learning_rate": 4.9527374230793634e-05, |
| "loss": 0.2982, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.031719850282306665, |
| "grad_norm": 0.3204258680343628, |
| "learning_rate": 4.9474507813656455e-05, |
| "loss": 0.3183, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.034891835310537335, |
| "grad_norm": 0.24417226016521454, |
| "learning_rate": 4.942164139651928e-05, |
| "loss": 0.2906, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.038063820338768, |
| "grad_norm": 0.3362484574317932, |
| "learning_rate": 4.93687749793821e-05, |
| "loss": 0.3457, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.04123580536699867, |
| "grad_norm": 0.4605356454849243, |
| "learning_rate": 4.931590856224492e-05, |
| "loss": 0.3072, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.04440779039522934, |
| "grad_norm": 0.5630594491958618, |
| "learning_rate": 4.926304214510775e-05, |
| "loss": 0.3201, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.04757977542346, |
| "grad_norm": 0.5788861513137817, |
| "learning_rate": 4.9210175727970564e-05, |
| "loss": 0.2825, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.05075176045169067, |
| "grad_norm": 0.26267775893211365, |
| "learning_rate": 4.9157309310833386e-05, |
| "loss": 0.3336, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.053923745479921334, |
| "grad_norm": 0.34053167700767517, |
| "learning_rate": 4.9104442893696214e-05, |
| "loss": 0.314, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.057095730508152004, |
| "grad_norm": 0.3658903241157532, |
| "learning_rate": 4.9051576476559036e-05, |
| "loss": 0.335, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.06026771553638267, |
| "grad_norm": 0.5043643712997437, |
| "learning_rate": 4.899871005942185e-05, |
| "loss": 0.2956, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.06343970056461333, |
| "grad_norm": 0.4024595320224762, |
| "learning_rate": 4.894584364228468e-05, |
| "loss": 0.3452, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.066611685592844, |
| "grad_norm": 0.19914183020591736, |
| "learning_rate": 4.88929772251475e-05, |
| "loss": 0.2788, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.06978367062107467, |
| "grad_norm": 0.3954038619995117, |
| "learning_rate": 4.8840110808010316e-05, |
| "loss": 0.3452, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.07295565564930534, |
| "grad_norm": 0.2236422300338745, |
| "learning_rate": 4.8787244390873145e-05, |
| "loss": 0.292, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.076127640677536, |
| "grad_norm": 0.3291652202606201, |
| "learning_rate": 4.873437797373597e-05, |
| "loss": 0.2955, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.07929962570576667, |
| "grad_norm": 0.46171942353248596, |
| "learning_rate": 4.868151155659879e-05, |
| "loss": 0.3074, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.08247161073399734, |
| "grad_norm": 0.22759978473186493, |
| "learning_rate": 4.862864513946161e-05, |
| "loss": 0.3017, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.085643595762228, |
| "grad_norm": 0.23650604486465454, |
| "learning_rate": 4.857577872232443e-05, |
| "loss": 0.3176, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.08881558079045868, |
| "grad_norm": 0.5810359120368958, |
| "learning_rate": 4.8522912305187254e-05, |
| "loss": 0.3382, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.09198756581868933, |
| "grad_norm": 0.33472833037376404, |
| "learning_rate": 4.8470045888050075e-05, |
| "loss": 0.3031, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.09515955084692, |
| "grad_norm": 0.38241392374038696, |
| "learning_rate": 4.84171794709129e-05, |
| "loss": 0.2811, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.09833153587515067, |
| "grad_norm": 0.29483163356781006, |
| "learning_rate": 4.8364313053775726e-05, |
| "loss": 0.2733, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.10150352090338134, |
| "grad_norm": 0.7687553763389587, |
| "learning_rate": 4.831144663663855e-05, |
| "loss": 0.3021, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.104675505931612, |
| "grad_norm": 0.23088738322257996, |
| "learning_rate": 4.825858021950136e-05, |
| "loss": 0.2768, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.10784749095984267, |
| "grad_norm": 0.4174398183822632, |
| "learning_rate": 4.820571380236419e-05, |
| "loss": 0.2826, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.11101947598807334, |
| "grad_norm": 0.8025326132774353, |
| "learning_rate": 4.815284738522701e-05, |
| "loss": 0.342, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.11419146101630401, |
| "grad_norm": 0.22708263993263245, |
| "learning_rate": 4.809998096808983e-05, |
| "loss": 0.2597, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.11736344604453466, |
| "grad_norm": 0.7445570826530457, |
| "learning_rate": 4.8047114550952656e-05, |
| "loss": 0.3205, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.12053543107276533, |
| "grad_norm": 0.3659399151802063, |
| "learning_rate": 4.799424813381548e-05, |
| "loss": 0.3078, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.123707416100996, |
| "grad_norm": 0.21260209381580353, |
| "learning_rate": 4.79413817166783e-05, |
| "loss": 0.2728, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.12687940112922666, |
| "grad_norm": 0.44993868470191956, |
| "learning_rate": 4.788851529954112e-05, |
| "loss": 0.3012, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.13005138615745734, |
| "grad_norm": 0.47664666175842285, |
| "learning_rate": 4.783564888240394e-05, |
| "loss": 0.3808, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.133223371185688, |
| "grad_norm": 0.2831490933895111, |
| "learning_rate": 4.7782782465266765e-05, |
| "loss": 0.2902, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.13639535621391868, |
| "grad_norm": 0.22913455963134766, |
| "learning_rate": 4.772991604812959e-05, |
| "loss": 0.3305, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.13956734124214934, |
| "grad_norm": 0.3116960823535919, |
| "learning_rate": 4.767704963099241e-05, |
| "loss": 0.259, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.14273932627038, |
| "grad_norm": 0.2142101675271988, |
| "learning_rate": 4.762418321385523e-05, |
| "loss": 0.2946, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.14591131129861068, |
| "grad_norm": 0.31184491515159607, |
| "learning_rate": 4.757131679671806e-05, |
| "loss": 0.3192, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.14908329632684134, |
| "grad_norm": 0.34447625279426575, |
| "learning_rate": 4.7518450379580874e-05, |
| "loss": 0.258, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.152255281355072, |
| "grad_norm": 0.2478509545326233, |
| "learning_rate": 4.74655839624437e-05, |
| "loss": 0.2644, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.15542726638330268, |
| "grad_norm": 0.4509633183479309, |
| "learning_rate": 4.7412717545306524e-05, |
| "loss": 0.3223, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.15859925141153333, |
| "grad_norm": 0.5341806411743164, |
| "learning_rate": 4.735985112816934e-05, |
| "loss": 0.3158, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.16177123643976402, |
| "grad_norm": 0.3863554298877716, |
| "learning_rate": 4.730698471103217e-05, |
| "loss": 0.3017, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.16494322146799467, |
| "grad_norm": 0.5030498504638672, |
| "learning_rate": 4.725411829389499e-05, |
| "loss": 0.3043, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.16811520649622533, |
| "grad_norm": 0.18916811048984528, |
| "learning_rate": 4.720125187675781e-05, |
| "loss": 0.2842, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.171287191524456, |
| "grad_norm": 0.36252668499946594, |
| "learning_rate": 4.714838545962063e-05, |
| "loss": 0.3019, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.17445917655268667, |
| "grad_norm": 0.47955018281936646, |
| "learning_rate": 4.7095519042483454e-05, |
| "loss": 0.2992, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.17763116158091735, |
| "grad_norm": 0.20012222230434418, |
| "learning_rate": 4.7042652625346276e-05, |
| "loss": 0.2525, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.180803146609148, |
| "grad_norm": 0.30269569158554077, |
| "learning_rate": 4.6989786208209105e-05, |
| "loss": 0.2789, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.18397513163737866, |
| "grad_norm": 0.20235884189605713, |
| "learning_rate": 4.693691979107192e-05, |
| "loss": 0.2819, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.18714711666560935, |
| "grad_norm": 0.21970972418785095, |
| "learning_rate": 4.688405337393474e-05, |
| "loss": 0.2649, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.19031910169384, |
| "grad_norm": 0.3686061501502991, |
| "learning_rate": 4.683118695679757e-05, |
| "loss": 0.2724, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.19349108672207066, |
| "grad_norm": 0.5213696360588074, |
| "learning_rate": 4.6778320539660385e-05, |
| "loss": 0.3447, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.19666307175030134, |
| "grad_norm": 0.385406494140625, |
| "learning_rate": 4.6725454122523213e-05, |
| "loss": 0.3139, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.199835056778532, |
| "grad_norm": 0.578931450843811, |
| "learning_rate": 4.6672587705386035e-05, |
| "loss": 0.3217, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.20300704180676268, |
| "grad_norm": 0.39177459478378296, |
| "learning_rate": 4.661972128824886e-05, |
| "loss": 0.2639, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.20617902683499334, |
| "grad_norm": 0.4169202148914337, |
| "learning_rate": 4.656685487111168e-05, |
| "loss": 0.3087, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.209351011863224, |
| "grad_norm": 0.4254414141178131, |
| "learning_rate": 4.65139884539745e-05, |
| "loss": 0.2634, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.21252299689145468, |
| "grad_norm": 0.4798215627670288, |
| "learning_rate": 4.646112203683732e-05, |
| "loss": 0.3065, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.21569498191968534, |
| "grad_norm": 0.5004227161407471, |
| "learning_rate": 4.6408255619700144e-05, |
| "loss": 0.2469, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.21886696694791602, |
| "grad_norm": 0.3792094886302948, |
| "learning_rate": 4.6355389202562966e-05, |
| "loss": 0.3019, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.22203895197614668, |
| "grad_norm": 0.701235294342041, |
| "learning_rate": 4.630252278542579e-05, |
| "loss": 0.369, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.22521093700437733, |
| "grad_norm": 0.3253133296966553, |
| "learning_rate": 4.6249656368288616e-05, |
| "loss": 0.277, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.22838292203260802, |
| "grad_norm": 0.49311327934265137, |
| "learning_rate": 4.619678995115143e-05, |
| "loss": 0.2913, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.23155490706083867, |
| "grad_norm": 0.18701878190040588, |
| "learning_rate": 4.614392353401425e-05, |
| "loss": 0.3095, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.23472689208906933, |
| "grad_norm": 0.6811497807502747, |
| "learning_rate": 4.609105711687708e-05, |
| "loss": 0.2754, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.2378988771173, |
| "grad_norm": 0.16985131800174713, |
| "learning_rate": 4.6038190699739896e-05, |
| "loss": 0.3032, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.24107086214553067, |
| "grad_norm": 0.33355677127838135, |
| "learning_rate": 4.598532428260272e-05, |
| "loss": 0.31, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.24424284717376135, |
| "grad_norm": 0.3561393916606903, |
| "learning_rate": 4.5932457865465546e-05, |
| "loss": 0.3527, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.247414832201992, |
| "grad_norm": 0.5302127599716187, |
| "learning_rate": 4.587959144832837e-05, |
| "loss": 0.2963, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.2505868172302227, |
| "grad_norm": 0.26475265622138977, |
| "learning_rate": 4.582672503119119e-05, |
| "loss": 0.2859, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.2537588022584533, |
| "grad_norm": 0.42244717478752136, |
| "learning_rate": 4.577385861405401e-05, |
| "loss": 0.2915, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.256930787286684, |
| "grad_norm": 0.20778138935565948, |
| "learning_rate": 4.5720992196916833e-05, |
| "loss": 0.2775, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.2601027723149147, |
| "grad_norm": 0.21787141263484955, |
| "learning_rate": 4.5668125779779655e-05, |
| "loss": 0.2971, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.2632747573431453, |
| "grad_norm": 0.19819681346416473, |
| "learning_rate": 4.561525936264248e-05, |
| "loss": 0.3213, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.266446742371376, |
| "grad_norm": 0.23251983523368835, |
| "learning_rate": 4.55623929455053e-05, |
| "loss": 0.2691, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.2696187273996067, |
| "grad_norm": 0.26158884167671204, |
| "learning_rate": 4.550952652836812e-05, |
| "loss": 0.3758, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.27279071242783737, |
| "grad_norm": 0.18944093585014343, |
| "learning_rate": 4.545666011123094e-05, |
| "loss": 0.2989, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.275962697456068, |
| "grad_norm": 0.2313028872013092, |
| "learning_rate": 4.5403793694093764e-05, |
| "loss": 0.3013, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.2791346824842987, |
| "grad_norm": 0.2733168601989746, |
| "learning_rate": 4.535092727695659e-05, |
| "loss": 0.3073, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.28230666751252936, |
| "grad_norm": 0.5805867314338684, |
| "learning_rate": 4.529806085981941e-05, |
| "loss": 0.2595, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.28547865254076, |
| "grad_norm": 0.38282257318496704, |
| "learning_rate": 4.524519444268223e-05, |
| "loss": 0.2867, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.2886506375689907, |
| "grad_norm": 0.2380545735359192, |
| "learning_rate": 4.519232802554506e-05, |
| "loss": 0.2848, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.29182262259722136, |
| "grad_norm": 0.36206936836242676, |
| "learning_rate": 4.513946160840788e-05, |
| "loss": 0.3017, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.294994607625452, |
| "grad_norm": 0.2402886301279068, |
| "learning_rate": 4.5086595191270694e-05, |
| "loss": 0.2957, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.29816659265368267, |
| "grad_norm": 0.29904839396476746, |
| "learning_rate": 4.503372877413352e-05, |
| "loss": 0.3743, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.30133857768191336, |
| "grad_norm": 0.43882688879966736, |
| "learning_rate": 4.4980862356996345e-05, |
| "loss": 0.3218, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.304510562710144, |
| "grad_norm": 0.3032098412513733, |
| "learning_rate": 4.4927995939859166e-05, |
| "loss": 0.3373, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.30768254773837467, |
| "grad_norm": 0.5561183094978333, |
| "learning_rate": 4.487512952272199e-05, |
| "loss": 0.3756, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.31085453276660535, |
| "grad_norm": 0.2683407962322235, |
| "learning_rate": 4.482226310558481e-05, |
| "loss": 0.3041, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.31402651779483604, |
| "grad_norm": 0.44373977184295654, |
| "learning_rate": 4.476939668844763e-05, |
| "loss": 0.3054, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.31719850282306666, |
| "grad_norm": 0.23822714388370514, |
| "learning_rate": 4.4716530271310453e-05, |
| "loss": 0.2684, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.32037048785129735, |
| "grad_norm": 0.37610000371932983, |
| "learning_rate": 4.4663663854173275e-05, |
| "loss": 0.3111, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.32354247287952803, |
| "grad_norm": 0.2191406935453415, |
| "learning_rate": 4.4610797437036104e-05, |
| "loss": 0.3164, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.32671445790775866, |
| "grad_norm": 0.31773659586906433, |
| "learning_rate": 4.4557931019898925e-05, |
| "loss": 0.2996, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.32988644293598934, |
| "grad_norm": 0.3252709209918976, |
| "learning_rate": 4.450506460276174e-05, |
| "loss": 0.3224, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.33305842796422, |
| "grad_norm": 0.34400445222854614, |
| "learning_rate": 4.445219818562457e-05, |
| "loss": 0.2975, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.33623041299245066, |
| "grad_norm": 0.16536681354045868, |
| "learning_rate": 4.439933176848739e-05, |
| "loss": 0.2726, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.33940239802068134, |
| "grad_norm": 0.23107750713825226, |
| "learning_rate": 4.4346465351350206e-05, |
| "loss": 0.2814, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.342574383048912, |
| "grad_norm": 0.22565191984176636, |
| "learning_rate": 4.4293598934213034e-05, |
| "loss": 0.2877, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.34574636807714265, |
| "grad_norm": 0.25360986590385437, |
| "learning_rate": 4.4240732517075856e-05, |
| "loss": 0.3075, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.34891835310537334, |
| "grad_norm": 0.42394259572029114, |
| "learning_rate": 4.418786609993868e-05, |
| "loss": 0.2732, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.352090338133604, |
| "grad_norm": 0.5393642783164978, |
| "learning_rate": 4.41349996828015e-05, |
| "loss": 0.342, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.3552623231618347, |
| "grad_norm": 0.4016542136669159, |
| "learning_rate": 4.408213326566432e-05, |
| "loss": 0.2756, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.35843430819006533, |
| "grad_norm": 0.2234315276145935, |
| "learning_rate": 4.402926684852714e-05, |
| "loss": 0.3487, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.361606293218296, |
| "grad_norm": 0.2084522843360901, |
| "learning_rate": 4.3976400431389965e-05, |
| "loss": 0.357, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.3647782782465267, |
| "grad_norm": 0.2758818566799164, |
| "learning_rate": 4.3923534014252786e-05, |
| "loss": 0.3605, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.3679502632747573, |
| "grad_norm": 0.20652857422828674, |
| "learning_rate": 4.387066759711561e-05, |
| "loss": 0.3164, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.371122248302988, |
| "grad_norm": 0.3151554763317108, |
| "learning_rate": 4.381780117997844e-05, |
| "loss": 0.3447, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.3742942333312187, |
| "grad_norm": 0.3141522705554962, |
| "learning_rate": 4.376493476284125e-05, |
| "loss": 0.2973, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.3774662183594493, |
| "grad_norm": 0.47755780816078186, |
| "learning_rate": 4.371206834570408e-05, |
| "loss": 0.3411, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.38063820338768, |
| "grad_norm": 0.2301286906003952, |
| "learning_rate": 4.36592019285669e-05, |
| "loss": 0.2651, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.3838101884159107, |
| "grad_norm": 0.2510074973106384, |
| "learning_rate": 4.360633551142972e-05, |
| "loss": 0.2988, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.3869821734441413, |
| "grad_norm": 0.26201292872428894, |
| "learning_rate": 4.3553469094292545e-05, |
| "loss": 0.262, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.390154158472372, |
| "grad_norm": 0.1688852608203888, |
| "learning_rate": 4.350060267715537e-05, |
| "loss": 0.3006, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.3933261435006027, |
| "grad_norm": 0.475284218788147, |
| "learning_rate": 4.344773626001819e-05, |
| "loss": 0.3003, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.39649812852883337, |
| "grad_norm": 0.4884473383426666, |
| "learning_rate": 4.339486984288101e-05, |
| "loss": 0.2861, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.399670113557064, |
| "grad_norm": 0.2931898534297943, |
| "learning_rate": 4.334200342574383e-05, |
| "loss": 0.2841, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.4028420985852947, |
| "grad_norm": 0.26861268281936646, |
| "learning_rate": 4.3289137008606654e-05, |
| "loss": 0.3191, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.40601408361352537, |
| "grad_norm": 0.4085983335971832, |
| "learning_rate": 4.3236270591469476e-05, |
| "loss": 0.3429, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.409186068641756, |
| "grad_norm": 0.5681502819061279, |
| "learning_rate": 4.31834041743323e-05, |
| "loss": 0.2732, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.4123580536699867, |
| "grad_norm": 0.17655836045742035, |
| "learning_rate": 4.313053775719512e-05, |
| "loss": 0.2403, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.41553003869821736, |
| "grad_norm": 0.25269463658332825, |
| "learning_rate": 4.307767134005795e-05, |
| "loss": 0.3231, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.418702023726448, |
| "grad_norm": 0.26235464215278625, |
| "learning_rate": 4.302480492292076e-05, |
| "loss": 0.2952, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.4218740087546787, |
| "grad_norm": 0.3551720380783081, |
| "learning_rate": 4.2971938505783585e-05, |
| "loss": 0.2934, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.42504599378290936, |
| "grad_norm": 0.19850347936153412, |
| "learning_rate": 4.291907208864641e-05, |
| "loss": 0.2669, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.42821797881114, |
| "grad_norm": 0.6945760250091553, |
| "learning_rate": 4.2866205671509235e-05, |
| "loss": 0.3636, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.43138996383937067, |
| "grad_norm": 0.1777346432209015, |
| "learning_rate": 4.281333925437206e-05, |
| "loss": 0.3069, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.43456194886760136, |
| "grad_norm": 0.4449566900730133, |
| "learning_rate": 4.276047283723488e-05, |
| "loss": 0.3688, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.43773393389583204, |
| "grad_norm": 0.2210356742143631, |
| "learning_rate": 4.27076064200977e-05, |
| "loss": 0.2659, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.44090591892406267, |
| "grad_norm": 0.265536367893219, |
| "learning_rate": 4.265474000296052e-05, |
| "loss": 0.3184, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.44407790395229335, |
| "grad_norm": 0.2273561656475067, |
| "learning_rate": 4.2601873585823344e-05, |
| "loss": 0.2729, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.44724988898052404, |
| "grad_norm": 0.2259570211172104, |
| "learning_rate": 4.2549007168686165e-05, |
| "loss": 0.2893, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.45042187400875466, |
| "grad_norm": 0.2315446436405182, |
| "learning_rate": 4.2496140751548994e-05, |
| "loss": 0.295, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.45359385903698535, |
| "grad_norm": 0.26081445813179016, |
| "learning_rate": 4.244327433441181e-05, |
| "loss": 0.3424, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.45676584406521603, |
| "grad_norm": 0.33118560910224915, |
| "learning_rate": 4.239040791727463e-05, |
| "loss": 0.3039, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.45993782909344666, |
| "grad_norm": 0.34620553255081177, |
| "learning_rate": 4.233754150013746e-05, |
| "loss": 0.313, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.46310981412167734, |
| "grad_norm": 0.2923032343387604, |
| "learning_rate": 4.2284675083000274e-05, |
| "loss": 0.285, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.466281799149908, |
| "grad_norm": 0.27615100145339966, |
| "learning_rate": 4.2231808665863096e-05, |
| "loss": 0.3385, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.46945378417813866, |
| "grad_norm": 0.3606735169887543, |
| "learning_rate": 4.2178942248725924e-05, |
| "loss": 0.3031, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.47262576920636934, |
| "grad_norm": 0.2961825132369995, |
| "learning_rate": 4.2126075831588746e-05, |
| "loss": 0.3671, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.4757977542346, |
| "grad_norm": 0.1403179168701172, |
| "learning_rate": 4.207320941445156e-05, |
| "loss": 0.2606, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.4789697392628307, |
| "grad_norm": 0.2120542675256729, |
| "learning_rate": 4.202034299731439e-05, |
| "loss": 0.2723, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.48214172429106134, |
| "grad_norm": 0.42080938816070557, |
| "learning_rate": 4.196747658017721e-05, |
| "loss": 0.3048, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.485313709319292, |
| "grad_norm": 0.2501380443572998, |
| "learning_rate": 4.191461016304003e-05, |
| "loss": 0.3641, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.4884856943475227, |
| "grad_norm": 0.2869213819503784, |
| "learning_rate": 4.1861743745902855e-05, |
| "loss": 0.3156, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.49165767937575333, |
| "grad_norm": 0.5819279551506042, |
| "learning_rate": 4.180887732876568e-05, |
| "loss": 0.3228, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.494829664403984, |
| "grad_norm": 0.3455282151699066, |
| "learning_rate": 4.17560109116285e-05, |
| "loss": 0.2957, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.4980016494322147, |
| "grad_norm": 0.14816895127296448, |
| "learning_rate": 4.170314449449132e-05, |
| "loss": 0.3039, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.5011736344604454, |
| "grad_norm": 0.5370512008666992, |
| "learning_rate": 4.165027807735414e-05, |
| "loss": 0.3572, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.5043456194886761, |
| "grad_norm": 0.3175135850906372, |
| "learning_rate": 4.159741166021697e-05, |
| "loss": 0.2527, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.5075176045169066, |
| "grad_norm": 0.24236038327217102, |
| "learning_rate": 4.1544545243079785e-05, |
| "loss": 0.3078, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.5106895895451373, |
| "grad_norm": 0.38427793979644775, |
| "learning_rate": 4.149167882594261e-05, |
| "loss": 0.313, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.513861574573368, |
| "grad_norm": 0.3454573154449463, |
| "learning_rate": 4.1438812408805436e-05, |
| "loss": 0.312, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.5170335596015987, |
| "grad_norm": 0.23383672535419464, |
| "learning_rate": 4.138594599166826e-05, |
| "loss": 0.3082, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.5202055446298294, |
| "grad_norm": 0.25619155168533325, |
| "learning_rate": 4.133307957453107e-05, |
| "loss": 0.315, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.5233775296580601, |
| "grad_norm": 0.17233146727085114, |
| "learning_rate": 4.12802131573939e-05, |
| "loss": 0.2746, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.5265495146862906, |
| "grad_norm": 0.305403470993042, |
| "learning_rate": 4.122734674025672e-05, |
| "loss": 0.3822, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.5297214997145213, |
| "grad_norm": 0.1978190392255783, |
| "learning_rate": 4.117448032311954e-05, |
| "loss": 0.3654, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.532893484742752, |
| "grad_norm": 0.1691414713859558, |
| "learning_rate": 4.1121613905982366e-05, |
| "loss": 0.2663, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.5360654697709827, |
| "grad_norm": 0.30035603046417236, |
| "learning_rate": 4.106874748884519e-05, |
| "loss": 0.3726, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.5392374547992134, |
| "grad_norm": 0.2909483015537262, |
| "learning_rate": 4.101588107170801e-05, |
| "loss": 0.2711, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.542409439827444, |
| "grad_norm": 0.1674415022134781, |
| "learning_rate": 4.096301465457083e-05, |
| "loss": 0.2736, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.5455814248556747, |
| "grad_norm": 0.33168473839759827, |
| "learning_rate": 4.091014823743365e-05, |
| "loss": 0.3428, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.5487534098839053, |
| "grad_norm": 0.389967143535614, |
| "learning_rate": 4.0857281820296475e-05, |
| "loss": 0.2841, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.551925394912136, |
| "grad_norm": 0.2906075716018677, |
| "learning_rate": 4.0804415403159303e-05, |
| "loss": 0.2447, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.5550973799403667, |
| "grad_norm": 0.5243480205535889, |
| "learning_rate": 4.075154898602212e-05, |
| "loss": 0.2743, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.5582693649685974, |
| "grad_norm": 0.3285157084465027, |
| "learning_rate": 4.069868256888495e-05, |
| "loss": 0.2498, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.561441349996828, |
| "grad_norm": 0.7118728756904602, |
| "learning_rate": 4.064581615174777e-05, |
| "loss": 0.2666, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.5646133350250587, |
| "grad_norm": 0.25792092084884644, |
| "learning_rate": 4.0592949734610584e-05, |
| "loss": 0.2903, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.5677853200532893, |
| "grad_norm": 0.6102173924446106, |
| "learning_rate": 4.054008331747341e-05, |
| "loss": 0.2847, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.57095730508152, |
| "grad_norm": 0.16453662514686584, |
| "learning_rate": 4.0487216900336234e-05, |
| "loss": 0.2895, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.5741292901097507, |
| "grad_norm": 0.4610302448272705, |
| "learning_rate": 4.0434350483199056e-05, |
| "loss": 0.3077, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.5773012751379814, |
| "grad_norm": 0.2923647165298462, |
| "learning_rate": 4.038148406606188e-05, |
| "loss": 0.2588, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.580473260166212, |
| "grad_norm": 0.25572457909584045, |
| "learning_rate": 4.03286176489247e-05, |
| "loss": 0.2902, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.5836452451944427, |
| "grad_norm": 0.28391504287719727, |
| "learning_rate": 4.027575123178752e-05, |
| "loss": 0.2972, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.5868172302226734, |
| "grad_norm": 0.23603619635105133, |
| "learning_rate": 4.022288481465034e-05, |
| "loss": 0.295, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.589989215250904, |
| "grad_norm": 0.23293623328208923, |
| "learning_rate": 4.0170018397513164e-05, |
| "loss": 0.2754, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.5931612002791347, |
| "grad_norm": 0.38438886404037476, |
| "learning_rate": 4.0117151980375986e-05, |
| "loss": 0.3086, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.5963331853073653, |
| "grad_norm": 0.2958177626132965, |
| "learning_rate": 4.0064285563238815e-05, |
| "loss": 0.2789, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.599505170335596, |
| "grad_norm": 0.19555646181106567, |
| "learning_rate": 4.001141914610163e-05, |
| "loss": 0.2437, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.6026771553638267, |
| "grad_norm": 0.24617354571819305, |
| "learning_rate": 3.995855272896445e-05, |
| "loss": 0.2823, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.6058491403920574, |
| "grad_norm": 0.2656566798686981, |
| "learning_rate": 3.990568631182728e-05, |
| "loss": 0.2593, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.609021125420288, |
| "grad_norm": 0.17703703045845032, |
| "learning_rate": 3.9852819894690095e-05, |
| "loss": 0.2781, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.6121931104485187, |
| "grad_norm": 0.28496983647346497, |
| "learning_rate": 3.9799953477552923e-05, |
| "loss": 0.2852, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.6153650954767493, |
| "grad_norm": 0.16364213824272156, |
| "learning_rate": 3.9747087060415745e-05, |
| "loss": 0.3279, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.61853708050498, |
| "grad_norm": 0.5835040211677551, |
| "learning_rate": 3.969422064327857e-05, |
| "loss": 0.3108, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.6217090655332107, |
| "grad_norm": 0.2625332474708557, |
| "learning_rate": 3.964135422614139e-05, |
| "loss": 0.2944, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.6248810505614414, |
| "grad_norm": 0.18694092333316803, |
| "learning_rate": 3.958848780900421e-05, |
| "loss": 0.2827, |
| "step": 9850 |
| }, |
| { |
| "epoch": 0.6280530355896721, |
| "grad_norm": 0.26124364137649536, |
| "learning_rate": 3.953562139186703e-05, |
| "loss": 0.2805, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.6312250206179026, |
| "grad_norm": 0.2587612271308899, |
| "learning_rate": 3.9482754974729854e-05, |
| "loss": 0.3242, |
| "step": 9950 |
| }, |
| { |
| "epoch": 0.6343970056461333, |
| "grad_norm": 0.2706884443759918, |
| "learning_rate": 3.9429888557592676e-05, |
| "loss": 0.268, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.637568990674364, |
| "grad_norm": 0.23814411461353302, |
| "learning_rate": 3.93770221404555e-05, |
| "loss": 0.3485, |
| "step": 10050 |
| }, |
| { |
| "epoch": 0.6407409757025947, |
| "grad_norm": 0.5621163249015808, |
| "learning_rate": 3.9324155723318326e-05, |
| "loss": 0.3154, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.6439129607308254, |
| "grad_norm": 0.42355868220329285, |
| "learning_rate": 3.927128930618114e-05, |
| "loss": 0.2868, |
| "step": 10150 |
| }, |
| { |
| "epoch": 0.6470849457590561, |
| "grad_norm": 0.2288525253534317, |
| "learning_rate": 3.921842288904396e-05, |
| "loss": 0.2522, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.6502569307872866, |
| "grad_norm": 0.22445383667945862, |
| "learning_rate": 3.916555647190679e-05, |
| "loss": 0.3396, |
| "step": 10250 |
| }, |
| { |
| "epoch": 0.6534289158155173, |
| "grad_norm": 0.6962974667549133, |
| "learning_rate": 3.9112690054769606e-05, |
| "loss": 0.3361, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.656600900843748, |
| "grad_norm": 0.28549084067344666, |
| "learning_rate": 3.905982363763243e-05, |
| "loss": 0.2817, |
| "step": 10350 |
| }, |
| { |
| "epoch": 0.6597728858719787, |
| "grad_norm": 0.23823893070220947, |
| "learning_rate": 3.9006957220495256e-05, |
| "loss": 0.2634, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.6629448709002094, |
| "grad_norm": 0.2657338082790375, |
| "learning_rate": 3.895409080335808e-05, |
| "loss": 0.2855, |
| "step": 10450 |
| }, |
| { |
| "epoch": 0.66611685592844, |
| "grad_norm": 0.20627088844776154, |
| "learning_rate": 3.89012243862209e-05, |
| "loss": 0.2857, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.6692888409566707, |
| "grad_norm": 0.3207322061061859, |
| "learning_rate": 3.884941529742647e-05, |
| "loss": 0.317, |
| "step": 10550 |
| }, |
| { |
| "epoch": 0.6724608259849013, |
| "grad_norm": 0.28210651874542236, |
| "learning_rate": 3.879654888028928e-05, |
| "loss": 0.3595, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.675632811013132, |
| "grad_norm": 0.1988239288330078, |
| "learning_rate": 3.874368246315211e-05, |
| "loss": 0.267, |
| "step": 10650 |
| }, |
| { |
| "epoch": 0.6788047960413627, |
| "grad_norm": 0.7708704471588135, |
| "learning_rate": 3.869081604601493e-05, |
| "loss": 0.3021, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.6819767810695934, |
| "grad_norm": 0.2626688778400421, |
| "learning_rate": 3.8637949628877754e-05, |
| "loss": 0.2574, |
| "step": 10750 |
| }, |
| { |
| "epoch": 0.685148766097824, |
| "grad_norm": 0.20237919688224792, |
| "learning_rate": 3.8585083211740576e-05, |
| "loss": 0.2658, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.6883207511260547, |
| "grad_norm": 0.2662367820739746, |
| "learning_rate": 3.85322167946034e-05, |
| "loss": 0.3054, |
| "step": 10850 |
| }, |
| { |
| "epoch": 0.6914927361542853, |
| "grad_norm": 0.3348242938518524, |
| "learning_rate": 3.847935037746622e-05, |
| "loss": 0.3265, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.694664721182516, |
| "grad_norm": 0.44233012199401855, |
| "learning_rate": 3.842648396032904e-05, |
| "loss": 0.2712, |
| "step": 10950 |
| }, |
| { |
| "epoch": 0.6978367062107467, |
| "grad_norm": 0.33227744698524475, |
| "learning_rate": 3.837361754319186e-05, |
| "loss": 0.2904, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.7010086912389774, |
| "grad_norm": 0.4147779047489166, |
| "learning_rate": 3.832075112605469e-05, |
| "loss": 0.3009, |
| "step": 11050 |
| }, |
| { |
| "epoch": 0.704180676267208, |
| "grad_norm": 0.39537376165390015, |
| "learning_rate": 3.8267884708917506e-05, |
| "loss": 0.3238, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.7073526612954387, |
| "grad_norm": 0.18787072598934174, |
| "learning_rate": 3.821501829178033e-05, |
| "loss": 0.2494, |
| "step": 11150 |
| }, |
| { |
| "epoch": 0.7105246463236694, |
| "grad_norm": 0.20560680329799652, |
| "learning_rate": 3.8162151874643156e-05, |
| "loss": 0.2715, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.7136966313519, |
| "grad_norm": 0.29710862040519714, |
| "learning_rate": 3.810928545750598e-05, |
| "loss": 0.3487, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.7168686163801307, |
| "grad_norm": 0.30526480078697205, |
| "learning_rate": 3.805641904036879e-05, |
| "loss": 0.2885, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.7200406014083613, |
| "grad_norm": 0.2582074701786041, |
| "learning_rate": 3.800355262323162e-05, |
| "loss": 0.336, |
| "step": 11350 |
| }, |
| { |
| "epoch": 0.723212586436592, |
| "grad_norm": 0.3673989176750183, |
| "learning_rate": 3.795068620609444e-05, |
| "loss": 0.4197, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.7263845714648227, |
| "grad_norm": 0.19386839866638184, |
| "learning_rate": 3.7897819788957265e-05, |
| "loss": 0.3385, |
| "step": 11450 |
| }, |
| { |
| "epoch": 0.7295565564930534, |
| "grad_norm": 0.21169255673885345, |
| "learning_rate": 3.784495337182009e-05, |
| "loss": 0.305, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.732728541521284, |
| "grad_norm": 0.541127622127533, |
| "learning_rate": 3.779208695468291e-05, |
| "loss": 0.2942, |
| "step": 11550 |
| }, |
| { |
| "epoch": 0.7359005265495147, |
| "grad_norm": 0.4768331050872803, |
| "learning_rate": 3.773922053754573e-05, |
| "loss": 0.3122, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.7390725115777453, |
| "grad_norm": 0.39062756299972534, |
| "learning_rate": 3.768635412040855e-05, |
| "loss": 0.2905, |
| "step": 11650 |
| }, |
| { |
| "epoch": 0.742244496605976, |
| "grad_norm": 0.2553999722003937, |
| "learning_rate": 3.7633487703271374e-05, |
| "loss": 0.2705, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.7454164816342067, |
| "grad_norm": 0.3118399381637573, |
| "learning_rate": 3.7580621286134196e-05, |
| "loss": 0.263, |
| "step": 11750 |
| }, |
| { |
| "epoch": 0.7485884666624374, |
| "grad_norm": 0.1847628504037857, |
| "learning_rate": 3.7527754868997024e-05, |
| "loss": 0.2748, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.7517604516906681, |
| "grad_norm": 0.20181454718112946, |
| "learning_rate": 3.747488845185984e-05, |
| "loss": 0.2801, |
| "step": 11850 |
| }, |
| { |
| "epoch": 0.7549324367188986, |
| "grad_norm": 0.4498727321624756, |
| "learning_rate": 3.742202203472267e-05, |
| "loss": 0.3182, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.7581044217471293, |
| "grad_norm": 0.37652915716171265, |
| "learning_rate": 3.736915561758549e-05, |
| "loss": 0.2782, |
| "step": 11950 |
| }, |
| { |
| "epoch": 0.76127640677536, |
| "grad_norm": 0.2723052203655243, |
| "learning_rate": 3.7316289200448304e-05, |
| "loss": 0.3187, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.7644483918035907, |
| "grad_norm": 0.16256879270076752, |
| "learning_rate": 3.726342278331113e-05, |
| "loss": 0.2866, |
| "step": 12050 |
| }, |
| { |
| "epoch": 0.7676203768318214, |
| "grad_norm": 0.25574353337287903, |
| "learning_rate": 3.7210556366173955e-05, |
| "loss": 0.2848, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.7707923618600521, |
| "grad_norm": 0.18727587163448334, |
| "learning_rate": 3.7157689949036776e-05, |
| "loss": 0.3105, |
| "step": 12150 |
| }, |
| { |
| "epoch": 0.7739643468882826, |
| "grad_norm": 0.25161731243133545, |
| "learning_rate": 3.71048235318996e-05, |
| "loss": 0.2911, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.7771363319165133, |
| "grad_norm": 0.43750718235969543, |
| "learning_rate": 3.705195711476242e-05, |
| "loss": 0.2981, |
| "step": 12250 |
| }, |
| { |
| "epoch": 0.780308316944744, |
| "grad_norm": 0.27956822514533997, |
| "learning_rate": 3.699909069762524e-05, |
| "loss": 0.3155, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.7834803019729747, |
| "grad_norm": 0.1870819479227066, |
| "learning_rate": 3.694622428048806e-05, |
| "loss": 0.2683, |
| "step": 12350 |
| }, |
| { |
| "epoch": 0.7866522870012054, |
| "grad_norm": 0.44053515791893005, |
| "learning_rate": 3.6893357863350885e-05, |
| "loss": 0.3244, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.7898242720294361, |
| "grad_norm": 0.4747866988182068, |
| "learning_rate": 3.684049144621371e-05, |
| "loss": 0.2675, |
| "step": 12450 |
| }, |
| { |
| "epoch": 0.7929962570576667, |
| "grad_norm": 0.2212987244129181, |
| "learning_rate": 3.6787625029076535e-05, |
| "loss": 0.3245, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.7961682420858973, |
| "grad_norm": 0.16488397121429443, |
| "learning_rate": 3.673475861193935e-05, |
| "loss": 0.2579, |
| "step": 12550 |
| }, |
| { |
| "epoch": 0.799340227114128, |
| "grad_norm": 0.15184266865253448, |
| "learning_rate": 3.668189219480217e-05, |
| "loss": 0.3139, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.8025122121423587, |
| "grad_norm": 0.179268479347229, |
| "learning_rate": 3.6629025777665e-05, |
| "loss": 0.2941, |
| "step": 12650 |
| }, |
| { |
| "epoch": 0.8056841971705894, |
| "grad_norm": 0.24617774784564972, |
| "learning_rate": 3.6576159360527816e-05, |
| "loss": 0.2934, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.80885618219882, |
| "grad_norm": 0.3757403790950775, |
| "learning_rate": 3.6523292943390644e-05, |
| "loss": 0.2917, |
| "step": 12750 |
| }, |
| { |
| "epoch": 0.8120281672270507, |
| "grad_norm": 0.360689640045166, |
| "learning_rate": 3.6471483854596204e-05, |
| "loss": 0.278, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.8152001522552813, |
| "grad_norm": 0.38574671745300293, |
| "learning_rate": 3.641861743745903e-05, |
| "loss": 0.2706, |
| "step": 12850 |
| }, |
| { |
| "epoch": 0.818372137283512, |
| "grad_norm": 0.45887231826782227, |
| "learning_rate": 3.6365751020321855e-05, |
| "loss": 0.3111, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.8215441223117427, |
| "grad_norm": 0.22820314764976501, |
| "learning_rate": 3.6312884603184676e-05, |
| "loss": 0.3343, |
| "step": 12950 |
| }, |
| { |
| "epoch": 0.8247161073399734, |
| "grad_norm": 0.18171218037605286, |
| "learning_rate": 3.62600181860475e-05, |
| "loss": 0.2827, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.827888092368204, |
| "grad_norm": 0.2788825035095215, |
| "learning_rate": 3.620715176891032e-05, |
| "loss": 0.331, |
| "step": 13050 |
| }, |
| { |
| "epoch": 0.8310600773964347, |
| "grad_norm": 0.31379690766334534, |
| "learning_rate": 3.615428535177314e-05, |
| "loss": 0.3031, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.8342320624246654, |
| "grad_norm": 0.3020433187484741, |
| "learning_rate": 3.610141893463596e-05, |
| "loss": 0.27, |
| "step": 13150 |
| }, |
| { |
| "epoch": 0.837404047452896, |
| "grad_norm": 0.2358977198600769, |
| "learning_rate": 3.6048552517498785e-05, |
| "loss": 0.3495, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.8405760324811267, |
| "grad_norm": 0.2896983325481415, |
| "learning_rate": 3.599568610036161e-05, |
| "loss": 0.2766, |
| "step": 13250 |
| }, |
| { |
| "epoch": 0.8437480175093574, |
| "grad_norm": 0.30271226167678833, |
| "learning_rate": 3.5942819683224435e-05, |
| "loss": 0.3418, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.846920002537588, |
| "grad_norm": 0.22971239686012268, |
| "learning_rate": 3.588995326608725e-05, |
| "loss": 0.2905, |
| "step": 13350 |
| }, |
| { |
| "epoch": 0.8500919875658187, |
| "grad_norm": 0.22787493467330933, |
| "learning_rate": 3.583708684895007e-05, |
| "loss": 0.2906, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.8532639725940494, |
| "grad_norm": 0.3081256151199341, |
| "learning_rate": 3.57842204318129e-05, |
| "loss": 0.3264, |
| "step": 13450 |
| }, |
| { |
| "epoch": 0.85643595762228, |
| "grad_norm": 0.46066999435424805, |
| "learning_rate": 3.5731354014675716e-05, |
| "loss": 0.2963, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.8596079426505107, |
| "grad_norm": 0.467032253742218, |
| "learning_rate": 3.567848759753854e-05, |
| "loss": 0.3505, |
| "step": 13550 |
| }, |
| { |
| "epoch": 0.8627799276787413, |
| "grad_norm": 0.35964497923851013, |
| "learning_rate": 3.5625621180401366e-05, |
| "loss": 0.2843, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.865951912706972, |
| "grad_norm": 0.3182917833328247, |
| "learning_rate": 3.557275476326419e-05, |
| "loss": 0.3199, |
| "step": 13650 |
| }, |
| { |
| "epoch": 0.8691238977352027, |
| "grad_norm": 0.547640323638916, |
| "learning_rate": 3.551988834612701e-05, |
| "loss": 0.2675, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.8722958827634334, |
| "grad_norm": 0.5345727801322937, |
| "learning_rate": 3.546702192898983e-05, |
| "loss": 0.3218, |
| "step": 13750 |
| }, |
| { |
| "epoch": 0.8754678677916641, |
| "grad_norm": 0.23531897366046906, |
| "learning_rate": 3.541415551185265e-05, |
| "loss": 0.3132, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.8786398528198947, |
| "grad_norm": 0.38224852085113525, |
| "learning_rate": 3.5361289094715475e-05, |
| "loss": 0.3107, |
| "step": 13850 |
| }, |
| { |
| "epoch": 0.8818118378481253, |
| "grad_norm": 0.21280410885810852, |
| "learning_rate": 3.5308422677578296e-05, |
| "loss": 0.3213, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.884983822876356, |
| "grad_norm": 0.23802965879440308, |
| "learning_rate": 3.525555626044112e-05, |
| "loss": 0.3249, |
| "step": 13950 |
| }, |
| { |
| "epoch": 0.8881558079045867, |
| "grad_norm": 0.23534643650054932, |
| "learning_rate": 3.5202689843303947e-05, |
| "loss": 0.2846, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.8913277929328174, |
| "grad_norm": 0.2755154073238373, |
| "learning_rate": 3.514982342616676e-05, |
| "loss": 0.2596, |
| "step": 14050 |
| }, |
| { |
| "epoch": 0.8944997779610481, |
| "grad_norm": 0.20472615957260132, |
| "learning_rate": 3.509695700902958e-05, |
| "loss": 0.2655, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.8976717629892786, |
| "grad_norm": 0.280692994594574, |
| "learning_rate": 3.504409059189241e-05, |
| "loss": 0.3129, |
| "step": 14150 |
| }, |
| { |
| "epoch": 0.9008437480175093, |
| "grad_norm": 0.382570743560791, |
| "learning_rate": 3.4991224174755234e-05, |
| "loss": 0.3144, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.90401573304574, |
| "grad_norm": 0.2799607813358307, |
| "learning_rate": 3.493835775761805e-05, |
| "loss": 0.3225, |
| "step": 14250 |
| }, |
| { |
| "epoch": 0.9071877180739707, |
| "grad_norm": 0.2509687840938568, |
| "learning_rate": 3.488549134048088e-05, |
| "loss": 0.2808, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.9103597031022014, |
| "grad_norm": 0.6318449378013611, |
| "learning_rate": 3.48326249233437e-05, |
| "loss": 0.3053, |
| "step": 14350 |
| }, |
| { |
| "epoch": 0.9135316881304321, |
| "grad_norm": 0.16883951425552368, |
| "learning_rate": 3.4779758506206514e-05, |
| "loss": 0.2634, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.9167036731586627, |
| "grad_norm": 0.1657867729663849, |
| "learning_rate": 3.472689208906934e-05, |
| "loss": 0.3301, |
| "step": 14450 |
| }, |
| { |
| "epoch": 0.9198756581868933, |
| "grad_norm": 0.17061150074005127, |
| "learning_rate": 3.4674025671932164e-05, |
| "loss": 0.2835, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.923047643215124, |
| "grad_norm": 0.4344567656517029, |
| "learning_rate": 3.4621159254794986e-05, |
| "loss": 0.312, |
| "step": 14550 |
| }, |
| { |
| "epoch": 0.9262196282433547, |
| "grad_norm": 0.2930458188056946, |
| "learning_rate": 3.456829283765781e-05, |
| "loss": 0.2989, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.9293916132715854, |
| "grad_norm": 0.2887861728668213, |
| "learning_rate": 3.451542642052063e-05, |
| "loss": 0.3375, |
| "step": 14650 |
| }, |
| { |
| "epoch": 0.932563598299816, |
| "grad_norm": 0.22968149185180664, |
| "learning_rate": 3.446256000338345e-05, |
| "loss": 0.3065, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.9357355833280467, |
| "grad_norm": 0.2681732773780823, |
| "learning_rate": 3.440969358624627e-05, |
| "loss": 0.3132, |
| "step": 14750 |
| }, |
| { |
| "epoch": 0.9389075683562773, |
| "grad_norm": 0.3073856234550476, |
| "learning_rate": 3.4356827169109095e-05, |
| "loss": 0.3223, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.942079553384508, |
| "grad_norm": 0.18574346601963043, |
| "learning_rate": 3.430396075197192e-05, |
| "loss": 0.3083, |
| "step": 14850 |
| }, |
| { |
| "epoch": 0.9452515384127387, |
| "grad_norm": 0.44194427132606506, |
| "learning_rate": 3.4251094334834745e-05, |
| "loss": 0.2727, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.9484235234409694, |
| "grad_norm": 0.29627904295921326, |
| "learning_rate": 3.419822791769756e-05, |
| "loss": 0.3319, |
| "step": 14950 |
| }, |
| { |
| "epoch": 0.9515955084692, |
| "grad_norm": 0.3156539499759674, |
| "learning_rate": 3.414536150056039e-05, |
| "loss": 0.2866, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.9547674934974307, |
| "grad_norm": 0.18663552403450012, |
| "learning_rate": 3.409249508342321e-05, |
| "loss": 0.2708, |
| "step": 15050 |
| }, |
| { |
| "epoch": 0.9579394785256614, |
| "grad_norm": 0.29560723900794983, |
| "learning_rate": 3.4039628666286025e-05, |
| "loss": 0.2739, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.961111463553892, |
| "grad_norm": 0.3896738290786743, |
| "learning_rate": 3.3986762249148854e-05, |
| "loss": 0.3098, |
| "step": 15150 |
| }, |
| { |
| "epoch": 0.9642834485821227, |
| "grad_norm": 0.23948702216148376, |
| "learning_rate": 3.3933895832011675e-05, |
| "loss": 0.2292, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.9674554336103534, |
| "grad_norm": 0.36551278829574585, |
| "learning_rate": 3.38810294148745e-05, |
| "loss": 0.303, |
| "step": 15250 |
| }, |
| { |
| "epoch": 0.970627418638584, |
| "grad_norm": 0.16233482956886292, |
| "learning_rate": 3.382816299773732e-05, |
| "loss": 0.3182, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.9737994036668147, |
| "grad_norm": 0.29786011576652527, |
| "learning_rate": 3.377529658060014e-05, |
| "loss": 0.2966, |
| "step": 15350 |
| }, |
| { |
| "epoch": 0.9769713886950454, |
| "grad_norm": 0.18349693715572357, |
| "learning_rate": 3.372243016346296e-05, |
| "loss": 0.3126, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.980143373723276, |
| "grad_norm": 0.13039042055606842, |
| "learning_rate": 3.3669563746325784e-05, |
| "loss": 0.2645, |
| "step": 15450 |
| }, |
| { |
| "epoch": 0.9833153587515067, |
| "grad_norm": 0.19823278486728668, |
| "learning_rate": 3.3616697329188606e-05, |
| "loss": 0.2843, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.9864873437797373, |
| "grad_norm": 0.2074085921049118, |
| "learning_rate": 3.356383091205143e-05, |
| "loss": 0.2859, |
| "step": 15550 |
| }, |
| { |
| "epoch": 0.989659328807968, |
| "grad_norm": 0.6243526935577393, |
| "learning_rate": 3.3510964494914256e-05, |
| "loss": 0.3081, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.9928313138361987, |
| "grad_norm": 0.19868969917297363, |
| "learning_rate": 3.345809807777707e-05, |
| "loss": 0.2767, |
| "step": 15650 |
| }, |
| { |
| "epoch": 0.9960032988644294, |
| "grad_norm": 0.4235476553440094, |
| "learning_rate": 3.34052316606399e-05, |
| "loss": 0.2831, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.9991752838926601, |
| "grad_norm": 0.29368528723716736, |
| "learning_rate": 3.335236524350272e-05, |
| "loss": 0.3094, |
| "step": 15750 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.3151220977306366, |
| "eval_runtime": 48.1084, |
| "eval_samples_per_second": 34.194, |
| "eval_steps_per_second": 17.107, |
| "step": 15763 |
| }, |
| { |
| "epoch": 1.0023472689208908, |
| "grad_norm": 0.34481731057167053, |
| "learning_rate": 3.329949882636554e-05, |
| "loss": 0.2416, |
| "step": 15800 |
| }, |
| { |
| "epoch": 1.0055192539491213, |
| "grad_norm": 0.34973275661468506, |
| "learning_rate": 3.3246632409228365e-05, |
| "loss": 0.2596, |
| "step": 15850 |
| }, |
| { |
| "epoch": 1.0086912389773521, |
| "grad_norm": 0.3936697840690613, |
| "learning_rate": 3.3193765992091187e-05, |
| "loss": 0.3223, |
| "step": 15900 |
| }, |
| { |
| "epoch": 1.0118632240055827, |
| "grad_norm": 0.2801978588104248, |
| "learning_rate": 3.314089957495401e-05, |
| "loss": 0.2896, |
| "step": 15950 |
| }, |
| { |
| "epoch": 1.0150352090338133, |
| "grad_norm": 0.1787472814321518, |
| "learning_rate": 3.308803315781683e-05, |
| "loss": 0.3416, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.018207194062044, |
| "grad_norm": 0.13754060864448547, |
| "learning_rate": 3.303516674067965e-05, |
| "loss": 0.282, |
| "step": 16050 |
| }, |
| { |
| "epoch": 1.0213791790902746, |
| "grad_norm": 0.24444366991519928, |
| "learning_rate": 3.2982300323542474e-05, |
| "loss": 0.2829, |
| "step": 16100 |
| }, |
| { |
| "epoch": 1.0245511641185054, |
| "grad_norm": 0.46902337670326233, |
| "learning_rate": 3.29294339064053e-05, |
| "loss": 0.3322, |
| "step": 16150 |
| }, |
| { |
| "epoch": 1.027723149146736, |
| "grad_norm": 0.17102986574172974, |
| "learning_rate": 3.287656748926812e-05, |
| "loss": 0.2946, |
| "step": 16200 |
| }, |
| { |
| "epoch": 1.0308951341749666, |
| "grad_norm": 0.5195295214653015, |
| "learning_rate": 3.282370107213094e-05, |
| "loss": 0.2921, |
| "step": 16250 |
| }, |
| { |
| "epoch": 1.0340671192031974, |
| "grad_norm": 0.23874568939208984, |
| "learning_rate": 3.277083465499377e-05, |
| "loss": 0.2764, |
| "step": 16300 |
| }, |
| { |
| "epoch": 1.037239104231428, |
| "grad_norm": 0.2507326304912567, |
| "learning_rate": 3.271796823785658e-05, |
| "loss": 0.2752, |
| "step": 16350 |
| }, |
| { |
| "epoch": 1.0404110892596588, |
| "grad_norm": 0.386338472366333, |
| "learning_rate": 3.2665101820719404e-05, |
| "loss": 0.3205, |
| "step": 16400 |
| }, |
| { |
| "epoch": 1.0435830742878893, |
| "grad_norm": 0.2907971441745758, |
| "learning_rate": 3.261223540358223e-05, |
| "loss": 0.2699, |
| "step": 16450 |
| }, |
| { |
| "epoch": 1.0467550593161201, |
| "grad_norm": 0.3498822748661041, |
| "learning_rate": 3.2559368986445054e-05, |
| "loss": 0.3075, |
| "step": 16500 |
| }, |
| { |
| "epoch": 1.0499270443443507, |
| "grad_norm": 0.1719454526901245, |
| "learning_rate": 3.2506502569307876e-05, |
| "loss": 0.2604, |
| "step": 16550 |
| }, |
| { |
| "epoch": 1.0530990293725813, |
| "grad_norm": 0.2626461684703827, |
| "learning_rate": 3.24536361521707e-05, |
| "loss": 0.308, |
| "step": 16600 |
| }, |
| { |
| "epoch": 1.056271014400812, |
| "grad_norm": 0.26986241340637207, |
| "learning_rate": 3.240076973503352e-05, |
| "loss": 0.2816, |
| "step": 16650 |
| }, |
| { |
| "epoch": 1.0594429994290426, |
| "grad_norm": 0.2187446653842926, |
| "learning_rate": 3.234790331789634e-05, |
| "loss": 0.2749, |
| "step": 16700 |
| }, |
| { |
| "epoch": 1.0626149844572734, |
| "grad_norm": 0.2777579128742218, |
| "learning_rate": 3.229503690075916e-05, |
| "loss": 0.3258, |
| "step": 16750 |
| }, |
| { |
| "epoch": 1.065786969485504, |
| "grad_norm": 0.29376596212387085, |
| "learning_rate": 3.2242170483621985e-05, |
| "loss": 0.3239, |
| "step": 16800 |
| }, |
| { |
| "epoch": 1.0689589545137348, |
| "grad_norm": 0.25620236992836, |
| "learning_rate": 3.218930406648481e-05, |
| "loss": 0.2619, |
| "step": 16850 |
| }, |
| { |
| "epoch": 1.0721309395419654, |
| "grad_norm": 0.30874136090278625, |
| "learning_rate": 3.213643764934763e-05, |
| "loss": 0.2822, |
| "step": 16900 |
| }, |
| { |
| "epoch": 1.075302924570196, |
| "grad_norm": 0.3903138041496277, |
| "learning_rate": 3.208357123221045e-05, |
| "loss": 0.2874, |
| "step": 16950 |
| }, |
| { |
| "epoch": 1.0784749095984267, |
| "grad_norm": 0.22503992915153503, |
| "learning_rate": 3.203070481507328e-05, |
| "loss": 0.2311, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.0816468946266573, |
| "grad_norm": 0.17416654527187347, |
| "learning_rate": 3.1977838397936093e-05, |
| "loss": 0.2739, |
| "step": 17050 |
| }, |
| { |
| "epoch": 1.084818879654888, |
| "grad_norm": 0.2830020487308502, |
| "learning_rate": 3.1924971980798915e-05, |
| "loss": 0.3134, |
| "step": 17100 |
| }, |
| { |
| "epoch": 1.0879908646831187, |
| "grad_norm": 0.3325769305229187, |
| "learning_rate": 3.1872105563661744e-05, |
| "loss": 0.2577, |
| "step": 17150 |
| }, |
| { |
| "epoch": 1.0911628497113495, |
| "grad_norm": 0.2352118343114853, |
| "learning_rate": 3.1819239146524566e-05, |
| "loss": 0.2406, |
| "step": 17200 |
| }, |
| { |
| "epoch": 1.09433483473958, |
| "grad_norm": 0.27199751138687134, |
| "learning_rate": 3.176637272938738e-05, |
| "loss": 0.3044, |
| "step": 17250 |
| }, |
| { |
| "epoch": 1.0975068197678106, |
| "grad_norm": 0.28306007385253906, |
| "learning_rate": 3.171350631225021e-05, |
| "loss": 0.2881, |
| "step": 17300 |
| }, |
| { |
| "epoch": 1.1006788047960414, |
| "grad_norm": 0.22405964136123657, |
| "learning_rate": 3.166063989511303e-05, |
| "loss": 0.2771, |
| "step": 17350 |
| }, |
| { |
| "epoch": 1.103850789824272, |
| "grad_norm": 0.5038449764251709, |
| "learning_rate": 3.160777347797585e-05, |
| "loss": 0.2575, |
| "step": 17400 |
| }, |
| { |
| "epoch": 1.1070227748525028, |
| "grad_norm": 0.23774085938930511, |
| "learning_rate": 3.155596438918142e-05, |
| "loss": 0.3377, |
| "step": 17450 |
| }, |
| { |
| "epoch": 1.1101947598807334, |
| "grad_norm": 0.3367967903614044, |
| "learning_rate": 3.1503097972044234e-05, |
| "loss": 0.2936, |
| "step": 17500 |
| }, |
| { |
| "epoch": 1.113366744908964, |
| "grad_norm": 0.7372679710388184, |
| "learning_rate": 3.145023155490706e-05, |
| "loss": 0.285, |
| "step": 17550 |
| }, |
| { |
| "epoch": 1.1165387299371947, |
| "grad_norm": 0.23422600328922272, |
| "learning_rate": 3.1397365137769885e-05, |
| "loss": 0.2332, |
| "step": 17600 |
| }, |
| { |
| "epoch": 1.1197107149654253, |
| "grad_norm": 0.2483871728181839, |
| "learning_rate": 3.1344498720632706e-05, |
| "loss": 0.3047, |
| "step": 17650 |
| }, |
| { |
| "epoch": 1.122882699993656, |
| "grad_norm": 0.3678695261478424, |
| "learning_rate": 3.129163230349553e-05, |
| "loss": 0.2943, |
| "step": 17700 |
| }, |
| { |
| "epoch": 1.1260546850218867, |
| "grad_norm": 0.3198718726634979, |
| "learning_rate": 3.123876588635835e-05, |
| "loss": 0.2797, |
| "step": 17750 |
| }, |
| { |
| "epoch": 1.1292266700501175, |
| "grad_norm": 0.17824482917785645, |
| "learning_rate": 3.118589946922117e-05, |
| "loss": 0.2806, |
| "step": 17800 |
| }, |
| { |
| "epoch": 1.132398655078348, |
| "grad_norm": 0.20436514914035797, |
| "learning_rate": 3.1133033052083993e-05, |
| "loss": 0.3234, |
| "step": 17850 |
| }, |
| { |
| "epoch": 1.1355706401065788, |
| "grad_norm": 0.28306111693382263, |
| "learning_rate": 3.1080166634946815e-05, |
| "loss": 0.2955, |
| "step": 17900 |
| }, |
| { |
| "epoch": 1.1387426251348094, |
| "grad_norm": 0.2912297546863556, |
| "learning_rate": 3.1027300217809644e-05, |
| "loss": 0.2773, |
| "step": 17950 |
| }, |
| { |
| "epoch": 1.14191461016304, |
| "grad_norm": 0.41615915298461914, |
| "learning_rate": 3.0974433800672465e-05, |
| "loss": 0.2689, |
| "step": 18000 |
| }, |
| { |
| "epoch": 1.1450865951912708, |
| "grad_norm": 0.2598041594028473, |
| "learning_rate": 3.092156738353528e-05, |
| "loss": 0.269, |
| "step": 18050 |
| }, |
| { |
| "epoch": 1.1482585802195013, |
| "grad_norm": 0.19208338856697083, |
| "learning_rate": 3.086870096639811e-05, |
| "loss": 0.3644, |
| "step": 18100 |
| }, |
| { |
| "epoch": 1.1514305652477321, |
| "grad_norm": 0.36915165185928345, |
| "learning_rate": 3.081583454926093e-05, |
| "loss": 0.3754, |
| "step": 18150 |
| }, |
| { |
| "epoch": 1.1546025502759627, |
| "grad_norm": 0.2906833589076996, |
| "learning_rate": 3.076296813212375e-05, |
| "loss": 0.3004, |
| "step": 18200 |
| }, |
| { |
| "epoch": 1.1577745353041933, |
| "grad_norm": 0.27490586042404175, |
| "learning_rate": 3.0710101714986574e-05, |
| "loss": 0.2476, |
| "step": 18250 |
| }, |
| { |
| "epoch": 1.160946520332424, |
| "grad_norm": 0.2721092998981476, |
| "learning_rate": 3.0657235297849396e-05, |
| "loss": 0.2994, |
| "step": 18300 |
| }, |
| { |
| "epoch": 1.1641185053606546, |
| "grad_norm": 0.5216304063796997, |
| "learning_rate": 3.060436888071222e-05, |
| "loss": 0.2848, |
| "step": 18350 |
| }, |
| { |
| "epoch": 1.1672904903888854, |
| "grad_norm": 0.2627362012863159, |
| "learning_rate": 3.055150246357504e-05, |
| "loss": 0.2537, |
| "step": 18400 |
| }, |
| { |
| "epoch": 1.170462475417116, |
| "grad_norm": 0.7663968205451965, |
| "learning_rate": 3.049863604643786e-05, |
| "loss": 0.2894, |
| "step": 18450 |
| }, |
| { |
| "epoch": 1.1736344604453466, |
| "grad_norm": 0.2766590714454651, |
| "learning_rate": 3.0445769629300686e-05, |
| "loss": 0.3203, |
| "step": 18500 |
| }, |
| { |
| "epoch": 1.1768064454735774, |
| "grad_norm": 0.37423521280288696, |
| "learning_rate": 3.0392903212163508e-05, |
| "loss": 0.2978, |
| "step": 18550 |
| }, |
| { |
| "epoch": 1.179978430501808, |
| "grad_norm": 0.3937060534954071, |
| "learning_rate": 3.0340036795026326e-05, |
| "loss": 0.3268, |
| "step": 18600 |
| }, |
| { |
| "epoch": 1.1831504155300387, |
| "grad_norm": 0.3597530722618103, |
| "learning_rate": 3.028717037788915e-05, |
| "loss": 0.2737, |
| "step": 18650 |
| }, |
| { |
| "epoch": 1.1863224005582693, |
| "grad_norm": 0.3743630349636078, |
| "learning_rate": 3.0234303960751977e-05, |
| "loss": 0.302, |
| "step": 18700 |
| }, |
| { |
| "epoch": 1.1894943855865001, |
| "grad_norm": 0.2796330749988556, |
| "learning_rate": 3.0181437543614795e-05, |
| "loss": 0.2937, |
| "step": 18750 |
| }, |
| { |
| "epoch": 1.1926663706147307, |
| "grad_norm": 0.2742915749549866, |
| "learning_rate": 3.0128571126477617e-05, |
| "loss": 0.3305, |
| "step": 18800 |
| }, |
| { |
| "epoch": 1.1958383556429615, |
| "grad_norm": 0.29744336009025574, |
| "learning_rate": 3.0075704709340442e-05, |
| "loss": 0.2896, |
| "step": 18850 |
| }, |
| { |
| "epoch": 1.199010340671192, |
| "grad_norm": 0.2520214319229126, |
| "learning_rate": 3.0022838292203264e-05, |
| "loss": 0.2732, |
| "step": 18900 |
| }, |
| { |
| "epoch": 1.2021823256994226, |
| "grad_norm": 0.2396412491798401, |
| "learning_rate": 2.9969971875066082e-05, |
| "loss": 0.3194, |
| "step": 18950 |
| }, |
| { |
| "epoch": 1.2053543107276534, |
| "grad_norm": 0.42488738894462585, |
| "learning_rate": 2.9917105457928907e-05, |
| "loss": 0.2837, |
| "step": 19000 |
| }, |
| { |
| "epoch": 1.208526295755884, |
| "grad_norm": 0.21764253079891205, |
| "learning_rate": 2.986423904079173e-05, |
| "loss": 0.2955, |
| "step": 19050 |
| }, |
| { |
| "epoch": 1.2116982807841148, |
| "grad_norm": 0.4629133641719818, |
| "learning_rate": 2.9811372623654547e-05, |
| "loss": 0.2466, |
| "step": 19100 |
| }, |
| { |
| "epoch": 1.2148702658123454, |
| "grad_norm": 0.2966591417789459, |
| "learning_rate": 2.9758506206517372e-05, |
| "loss": 0.3033, |
| "step": 19150 |
| }, |
| { |
| "epoch": 1.218042250840576, |
| "grad_norm": 0.42917561531066895, |
| "learning_rate": 2.9705639789380198e-05, |
| "loss": 0.3201, |
| "step": 19200 |
| }, |
| { |
| "epoch": 1.2212142358688067, |
| "grad_norm": 0.24894292652606964, |
| "learning_rate": 2.965277337224302e-05, |
| "loss": 0.2464, |
| "step": 19250 |
| }, |
| { |
| "epoch": 1.2243862208970373, |
| "grad_norm": 0.40641218423843384, |
| "learning_rate": 2.9599906955105838e-05, |
| "loss": 0.2771, |
| "step": 19300 |
| }, |
| { |
| "epoch": 1.227558205925268, |
| "grad_norm": 0.36876288056373596, |
| "learning_rate": 2.9547040537968663e-05, |
| "loss": 0.2744, |
| "step": 19350 |
| }, |
| { |
| "epoch": 1.2307301909534987, |
| "grad_norm": 0.6306925415992737, |
| "learning_rate": 2.9494174120831485e-05, |
| "loss": 0.2859, |
| "step": 19400 |
| }, |
| { |
| "epoch": 1.2339021759817292, |
| "grad_norm": 0.17763349413871765, |
| "learning_rate": 2.9441307703694303e-05, |
| "loss": 0.3406, |
| "step": 19450 |
| }, |
| { |
| "epoch": 1.23707416100996, |
| "grad_norm": 0.3222569525241852, |
| "learning_rate": 2.9388441286557128e-05, |
| "loss": 0.286, |
| "step": 19500 |
| }, |
| { |
| "epoch": 1.2402461460381906, |
| "grad_norm": 0.22196908295154572, |
| "learning_rate": 2.9335574869419953e-05, |
| "loss": 0.2585, |
| "step": 19550 |
| }, |
| { |
| "epoch": 1.2434181310664214, |
| "grad_norm": 0.29623332619667053, |
| "learning_rate": 2.9282708452282775e-05, |
| "loss": 0.292, |
| "step": 19600 |
| }, |
| { |
| "epoch": 1.246590116094652, |
| "grad_norm": 0.2855692207813263, |
| "learning_rate": 2.9229842035145593e-05, |
| "loss": 0.2804, |
| "step": 19650 |
| }, |
| { |
| "epoch": 1.2497621011228828, |
| "grad_norm": 0.3012256920337677, |
| "learning_rate": 2.917697561800842e-05, |
| "loss": 0.2678, |
| "step": 19700 |
| }, |
| { |
| "epoch": 1.2529340861511133, |
| "grad_norm": 0.43054285645484924, |
| "learning_rate": 2.912410920087124e-05, |
| "loss": 0.3117, |
| "step": 19750 |
| }, |
| { |
| "epoch": 1.2561060711793441, |
| "grad_norm": 0.2894386351108551, |
| "learning_rate": 2.907124278373406e-05, |
| "loss": 0.2846, |
| "step": 19800 |
| }, |
| { |
| "epoch": 1.2592780562075747, |
| "grad_norm": 0.7129951119422913, |
| "learning_rate": 2.9018376366596884e-05, |
| "loss": 0.3188, |
| "step": 19850 |
| }, |
| { |
| "epoch": 1.2624500412358053, |
| "grad_norm": 0.2086195945739746, |
| "learning_rate": 2.8965509949459705e-05, |
| "loss": 0.267, |
| "step": 19900 |
| }, |
| { |
| "epoch": 1.265622026264036, |
| "grad_norm": 0.15617908537387848, |
| "learning_rate": 2.891264353232253e-05, |
| "loss": 0.2682, |
| "step": 19950 |
| }, |
| { |
| "epoch": 1.2687940112922667, |
| "grad_norm": 0.22346539795398712, |
| "learning_rate": 2.885977711518535e-05, |
| "loss": 0.3291, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.2719659963204974, |
| "grad_norm": 0.20272932946681976, |
| "learning_rate": 2.8806910698048174e-05, |
| "loss": 0.2848, |
| "step": 20050 |
| }, |
| { |
| "epoch": 1.275137981348728, |
| "grad_norm": 0.5262783169746399, |
| "learning_rate": 2.8754044280910996e-05, |
| "loss": 0.3224, |
| "step": 20100 |
| }, |
| { |
| "epoch": 1.2783099663769586, |
| "grad_norm": 0.28421077132225037, |
| "learning_rate": 2.870117786377382e-05, |
| "loss": 0.2628, |
| "step": 20150 |
| }, |
| { |
| "epoch": 1.2814819514051894, |
| "grad_norm": 0.29966121912002563, |
| "learning_rate": 2.864831144663664e-05, |
| "loss": 0.3174, |
| "step": 20200 |
| }, |
| { |
| "epoch": 1.28465393643342, |
| "grad_norm": 0.41417014598846436, |
| "learning_rate": 2.859544502949946e-05, |
| "loss": 0.2971, |
| "step": 20250 |
| }, |
| { |
| "epoch": 1.2878259214616508, |
| "grad_norm": 0.2396809309720993, |
| "learning_rate": 2.8542578612362286e-05, |
| "loss": 0.2744, |
| "step": 20300 |
| }, |
| { |
| "epoch": 1.2909979064898813, |
| "grad_norm": 0.29226428270339966, |
| "learning_rate": 2.8489712195225105e-05, |
| "loss": 0.3256, |
| "step": 20350 |
| }, |
| { |
| "epoch": 1.294169891518112, |
| "grad_norm": 0.27770760655403137, |
| "learning_rate": 2.843684577808793e-05, |
| "loss": 0.3271, |
| "step": 20400 |
| }, |
| { |
| "epoch": 1.2973418765463427, |
| "grad_norm": 0.34558218717575073, |
| "learning_rate": 2.838397936095075e-05, |
| "loss": 0.2438, |
| "step": 20450 |
| }, |
| { |
| "epoch": 1.3005138615745735, |
| "grad_norm": 0.1685953289270401, |
| "learning_rate": 2.8331112943813577e-05, |
| "loss": 0.2701, |
| "step": 20500 |
| }, |
| { |
| "epoch": 1.303685846602804, |
| "grad_norm": 0.2469525784254074, |
| "learning_rate": 2.8278246526676395e-05, |
| "loss": 0.2622, |
| "step": 20550 |
| }, |
| { |
| "epoch": 1.3068578316310346, |
| "grad_norm": 0.17005576193332672, |
| "learning_rate": 2.8225380109539217e-05, |
| "loss": 0.2992, |
| "step": 20600 |
| }, |
| { |
| "epoch": 1.3100298166592654, |
| "grad_norm": 0.30128028988838196, |
| "learning_rate": 2.8172513692402042e-05, |
| "loss": 0.3352, |
| "step": 20650 |
| }, |
| { |
| "epoch": 1.313201801687496, |
| "grad_norm": 0.6207164525985718, |
| "learning_rate": 2.811964727526486e-05, |
| "loss": 0.2747, |
| "step": 20700 |
| }, |
| { |
| "epoch": 1.3163737867157268, |
| "grad_norm": 0.21769997477531433, |
| "learning_rate": 2.8066780858127685e-05, |
| "loss": 0.2948, |
| "step": 20750 |
| }, |
| { |
| "epoch": 1.3195457717439574, |
| "grad_norm": 0.4176817238330841, |
| "learning_rate": 2.801497176933325e-05, |
| "loss": 0.2682, |
| "step": 20800 |
| }, |
| { |
| "epoch": 1.322717756772188, |
| "grad_norm": 0.2459891140460968, |
| "learning_rate": 2.796210535219607e-05, |
| "loss": 0.2669, |
| "step": 20850 |
| }, |
| { |
| "epoch": 1.3258897418004187, |
| "grad_norm": 0.3456882834434509, |
| "learning_rate": 2.7909238935058896e-05, |
| "loss": 0.3307, |
| "step": 20900 |
| }, |
| { |
| "epoch": 1.3290617268286493, |
| "grad_norm": 0.2911352813243866, |
| "learning_rate": 2.785637251792172e-05, |
| "loss": 0.2304, |
| "step": 20950 |
| }, |
| { |
| "epoch": 1.33223371185688, |
| "grad_norm": 0.620798647403717, |
| "learning_rate": 2.780350610078454e-05, |
| "loss": 0.3607, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.3354056968851107, |
| "grad_norm": 0.22824439406394958, |
| "learning_rate": 2.775063968364736e-05, |
| "loss": 0.3092, |
| "step": 21050 |
| }, |
| { |
| "epoch": 1.3385776819133413, |
| "grad_norm": 0.22648726403713226, |
| "learning_rate": 2.7697773266510186e-05, |
| "loss": 0.3063, |
| "step": 21100 |
| }, |
| { |
| "epoch": 1.341749666941572, |
| "grad_norm": 0.698406457901001, |
| "learning_rate": 2.7644906849373004e-05, |
| "loss": 0.2693, |
| "step": 21150 |
| }, |
| { |
| "epoch": 1.3449216519698026, |
| "grad_norm": 0.351519912481308, |
| "learning_rate": 2.7592040432235826e-05, |
| "loss": 0.2796, |
| "step": 21200 |
| }, |
| { |
| "epoch": 1.3480936369980334, |
| "grad_norm": 0.346582293510437, |
| "learning_rate": 2.753917401509865e-05, |
| "loss": 0.3109, |
| "step": 21250 |
| }, |
| { |
| "epoch": 1.351265622026264, |
| "grad_norm": 0.22138628363609314, |
| "learning_rate": 2.7486307597961477e-05, |
| "loss": 0.2739, |
| "step": 21300 |
| }, |
| { |
| "epoch": 1.3544376070544948, |
| "grad_norm": 0.24700744450092316, |
| "learning_rate": 2.743344118082429e-05, |
| "loss": 0.2661, |
| "step": 21350 |
| }, |
| { |
| "epoch": 1.3576095920827254, |
| "grad_norm": 0.5413603186607361, |
| "learning_rate": 2.7380574763687117e-05, |
| "loss": 0.2772, |
| "step": 21400 |
| }, |
| { |
| "epoch": 1.3607815771109562, |
| "grad_norm": 0.36633920669555664, |
| "learning_rate": 2.7327708346549942e-05, |
| "loss": 0.2918, |
| "step": 21450 |
| }, |
| { |
| "epoch": 1.3639535621391867, |
| "grad_norm": 0.4693305194377899, |
| "learning_rate": 2.727484192941276e-05, |
| "loss": 0.3215, |
| "step": 21500 |
| }, |
| { |
| "epoch": 1.3671255471674173, |
| "grad_norm": 0.5809823870658875, |
| "learning_rate": 2.7221975512275582e-05, |
| "loss": 0.2846, |
| "step": 21550 |
| }, |
| { |
| "epoch": 1.370297532195648, |
| "grad_norm": 0.5905492305755615, |
| "learning_rate": 2.7169109095138407e-05, |
| "loss": 0.3345, |
| "step": 21600 |
| }, |
| { |
| "epoch": 1.3734695172238787, |
| "grad_norm": 0.29343274235725403, |
| "learning_rate": 2.711624267800123e-05, |
| "loss": 0.2596, |
| "step": 21650 |
| }, |
| { |
| "epoch": 1.3766415022521095, |
| "grad_norm": 0.4964137375354767, |
| "learning_rate": 2.7063376260864047e-05, |
| "loss": 0.3203, |
| "step": 21700 |
| }, |
| { |
| "epoch": 1.37981348728034, |
| "grad_norm": 0.43368765711784363, |
| "learning_rate": 2.7010509843726872e-05, |
| "loss": 0.2402, |
| "step": 21750 |
| }, |
| { |
| "epoch": 1.3829854723085706, |
| "grad_norm": 0.6017479300498962, |
| "learning_rate": 2.6957643426589697e-05, |
| "loss": 0.3013, |
| "step": 21800 |
| }, |
| { |
| "epoch": 1.3861574573368014, |
| "grad_norm": 0.29252320528030396, |
| "learning_rate": 2.6904777009452516e-05, |
| "loss": 0.3873, |
| "step": 21850 |
| }, |
| { |
| "epoch": 1.389329442365032, |
| "grad_norm": 0.309181809425354, |
| "learning_rate": 2.6851910592315337e-05, |
| "loss": 0.2836, |
| "step": 21900 |
| }, |
| { |
| "epoch": 1.3925014273932628, |
| "grad_norm": 0.43791621923446655, |
| "learning_rate": 2.6799044175178163e-05, |
| "loss": 0.3525, |
| "step": 21950 |
| }, |
| { |
| "epoch": 1.3956734124214933, |
| "grad_norm": 0.2919745147228241, |
| "learning_rate": 2.6746177758040984e-05, |
| "loss": 0.2974, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.398845397449724, |
| "grad_norm": 0.3903926908969879, |
| "learning_rate": 2.6693311340903803e-05, |
| "loss": 0.2992, |
| "step": 22050 |
| }, |
| { |
| "epoch": 1.4020173824779547, |
| "grad_norm": 0.3180385231971741, |
| "learning_rate": 2.6640444923766628e-05, |
| "loss": 0.2865, |
| "step": 22100 |
| }, |
| { |
| "epoch": 1.4051893675061855, |
| "grad_norm": 0.36164039373397827, |
| "learning_rate": 2.6587578506629453e-05, |
| "loss": 0.2636, |
| "step": 22150 |
| }, |
| { |
| "epoch": 1.408361352534416, |
| "grad_norm": 0.6932289004325867, |
| "learning_rate": 2.6534712089492268e-05, |
| "loss": 0.2933, |
| "step": 22200 |
| }, |
| { |
| "epoch": 1.4115333375626467, |
| "grad_norm": 0.21415837109088898, |
| "learning_rate": 2.6481845672355093e-05, |
| "loss": 0.2722, |
| "step": 22250 |
| }, |
| { |
| "epoch": 1.4147053225908774, |
| "grad_norm": 0.27378618717193604, |
| "learning_rate": 2.6428979255217918e-05, |
| "loss": 0.3441, |
| "step": 22300 |
| }, |
| { |
| "epoch": 1.417877307619108, |
| "grad_norm": 0.2832282483577728, |
| "learning_rate": 2.637611283808074e-05, |
| "loss": 0.2735, |
| "step": 22350 |
| }, |
| { |
| "epoch": 1.4210492926473388, |
| "grad_norm": 0.3578130006790161, |
| "learning_rate": 2.632324642094356e-05, |
| "loss": 0.283, |
| "step": 22400 |
| }, |
| { |
| "epoch": 1.4242212776755694, |
| "grad_norm": 0.49478858709335327, |
| "learning_rate": 2.6270380003806383e-05, |
| "loss": 0.3103, |
| "step": 22450 |
| }, |
| { |
| "epoch": 1.4273932627038, |
| "grad_norm": 0.4549751579761505, |
| "learning_rate": 2.6217513586669205e-05, |
| "loss": 0.2965, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.4305652477320308, |
| "grad_norm": 0.24857546389102936, |
| "learning_rate": 2.6164647169532024e-05, |
| "loss": 0.3054, |
| "step": 22550 |
| }, |
| { |
| "epoch": 1.4337372327602613, |
| "grad_norm": 0.35119330883026123, |
| "learning_rate": 2.611178075239485e-05, |
| "loss": 0.2559, |
| "step": 22600 |
| }, |
| { |
| "epoch": 1.4369092177884921, |
| "grad_norm": 0.41354435682296753, |
| "learning_rate": 2.6058914335257674e-05, |
| "loss": 0.2945, |
| "step": 22650 |
| }, |
| { |
| "epoch": 1.4400812028167227, |
| "grad_norm": 0.22190292179584503, |
| "learning_rate": 2.6006047918120496e-05, |
| "loss": 0.2905, |
| "step": 22700 |
| }, |
| { |
| "epoch": 1.4432531878449533, |
| "grad_norm": 0.1551959365606308, |
| "learning_rate": 2.5953181500983314e-05, |
| "loss": 0.2435, |
| "step": 22750 |
| }, |
| { |
| "epoch": 1.446425172873184, |
| "grad_norm": 0.7655497193336487, |
| "learning_rate": 2.590031508384614e-05, |
| "loss": 0.3264, |
| "step": 22800 |
| }, |
| { |
| "epoch": 1.4495971579014146, |
| "grad_norm": 0.5172088742256165, |
| "learning_rate": 2.584744866670896e-05, |
| "loss": 0.2764, |
| "step": 22850 |
| }, |
| { |
| "epoch": 1.4527691429296454, |
| "grad_norm": 0.24481894075870514, |
| "learning_rate": 2.5794582249571786e-05, |
| "loss": 0.3019, |
| "step": 22900 |
| }, |
| { |
| "epoch": 1.455941127957876, |
| "grad_norm": 0.5899595618247986, |
| "learning_rate": 2.5741715832434604e-05, |
| "loss": 0.3086, |
| "step": 22950 |
| }, |
| { |
| "epoch": 1.4591131129861066, |
| "grad_norm": 0.28628554940223694, |
| "learning_rate": 2.568884941529743e-05, |
| "loss": 0.2766, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.4622850980143374, |
| "grad_norm": 0.3063284158706665, |
| "learning_rate": 2.563598299816025e-05, |
| "loss": 0.2854, |
| "step": 23050 |
| }, |
| { |
| "epoch": 1.4654570830425682, |
| "grad_norm": 0.23942221701145172, |
| "learning_rate": 2.558311658102307e-05, |
| "loss": 0.2577, |
| "step": 23100 |
| }, |
| { |
| "epoch": 1.4686290680707987, |
| "grad_norm": 0.17220012843608856, |
| "learning_rate": 2.5530250163885895e-05, |
| "loss": 0.2941, |
| "step": 23150 |
| }, |
| { |
| "epoch": 1.4718010530990293, |
| "grad_norm": 0.6850319504737854, |
| "learning_rate": 2.5477383746748716e-05, |
| "loss": 0.274, |
| "step": 23200 |
| }, |
| { |
| "epoch": 1.47497303812726, |
| "grad_norm": 0.34509897232055664, |
| "learning_rate": 2.542451732961154e-05, |
| "loss": 0.3397, |
| "step": 23250 |
| }, |
| { |
| "epoch": 1.4781450231554907, |
| "grad_norm": 0.5132359266281128, |
| "learning_rate": 2.537165091247436e-05, |
| "loss": 0.274, |
| "step": 23300 |
| }, |
| { |
| "epoch": 1.4813170081837215, |
| "grad_norm": 0.34320104122161865, |
| "learning_rate": 2.5318784495337182e-05, |
| "loss": 0.2837, |
| "step": 23350 |
| }, |
| { |
| "epoch": 1.484488993211952, |
| "grad_norm": 0.3969442844390869, |
| "learning_rate": 2.5265918078200007e-05, |
| "loss": 0.3049, |
| "step": 23400 |
| }, |
| { |
| "epoch": 1.4876609782401826, |
| "grad_norm": 0.25766682624816895, |
| "learning_rate": 2.5213051661062825e-05, |
| "loss": 0.3315, |
| "step": 23450 |
| }, |
| { |
| "epoch": 1.4908329632684134, |
| "grad_norm": 0.1973307579755783, |
| "learning_rate": 2.516018524392565e-05, |
| "loss": 0.3203, |
| "step": 23500 |
| }, |
| { |
| "epoch": 1.494004948296644, |
| "grad_norm": 0.5415976643562317, |
| "learning_rate": 2.5107318826788472e-05, |
| "loss": 0.3044, |
| "step": 23550 |
| }, |
| { |
| "epoch": 1.4971769333248748, |
| "grad_norm": 0.3590141236782074, |
| "learning_rate": 2.5054452409651297e-05, |
| "loss": 0.2836, |
| "step": 23600 |
| }, |
| { |
| "epoch": 1.5003489183531054, |
| "grad_norm": 0.3506677448749542, |
| "learning_rate": 2.5001585992514116e-05, |
| "loss": 0.262, |
| "step": 23650 |
| }, |
| { |
| "epoch": 1.503520903381336, |
| "grad_norm": 0.26054617762565613, |
| "learning_rate": 2.4948719575376937e-05, |
| "loss": 0.2966, |
| "step": 23700 |
| }, |
| { |
| "epoch": 1.5066928884095667, |
| "grad_norm": 0.18887007236480713, |
| "learning_rate": 2.489585315823976e-05, |
| "loss": 0.2243, |
| "step": 23750 |
| }, |
| { |
| "epoch": 1.5098648734377975, |
| "grad_norm": 0.4273685812950134, |
| "learning_rate": 2.4842986741102584e-05, |
| "loss": 0.2944, |
| "step": 23800 |
| }, |
| { |
| "epoch": 1.513036858466028, |
| "grad_norm": 0.27183377742767334, |
| "learning_rate": 2.4790120323965406e-05, |
| "loss": 0.2445, |
| "step": 23850 |
| }, |
| { |
| "epoch": 1.5162088434942587, |
| "grad_norm": 0.28723788261413574, |
| "learning_rate": 2.4737253906828228e-05, |
| "loss": 0.2884, |
| "step": 23900 |
| }, |
| { |
| "epoch": 1.5193808285224892, |
| "grad_norm": 0.3014012575149536, |
| "learning_rate": 2.468544481803379e-05, |
| "loss": 0.2676, |
| "step": 23950 |
| }, |
| { |
| "epoch": 1.52255281355072, |
| "grad_norm": 0.4594823122024536, |
| "learning_rate": 2.4632578400896616e-05, |
| "loss": 0.2874, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.5257247985789508, |
| "grad_norm": 0.17278143763542175, |
| "learning_rate": 2.4579711983759438e-05, |
| "loss": 0.2959, |
| "step": 24050 |
| }, |
| { |
| "epoch": 1.5288967836071814, |
| "grad_norm": 0.2626342177391052, |
| "learning_rate": 2.452684556662226e-05, |
| "loss": 0.3108, |
| "step": 24100 |
| }, |
| { |
| "epoch": 1.532068768635412, |
| "grad_norm": 0.42795270681381226, |
| "learning_rate": 2.447397914948508e-05, |
| "loss": 0.3011, |
| "step": 24150 |
| }, |
| { |
| "epoch": 1.5352407536636425, |
| "grad_norm": 0.331232488155365, |
| "learning_rate": 2.4421112732347903e-05, |
| "loss": 0.3191, |
| "step": 24200 |
| }, |
| { |
| "epoch": 1.5384127386918733, |
| "grad_norm": 0.5219537019729614, |
| "learning_rate": 2.436824631521073e-05, |
| "loss": 0.3303, |
| "step": 24250 |
| }, |
| { |
| "epoch": 1.5415847237201041, |
| "grad_norm": 0.2795136272907257, |
| "learning_rate": 2.4315379898073547e-05, |
| "loss": 0.2567, |
| "step": 24300 |
| }, |
| { |
| "epoch": 1.5447567087483347, |
| "grad_norm": 0.20256808400154114, |
| "learning_rate": 2.4262513480936372e-05, |
| "loss": 0.3279, |
| "step": 24350 |
| }, |
| { |
| "epoch": 1.5479286937765653, |
| "grad_norm": 0.6803138852119446, |
| "learning_rate": 2.4209647063799194e-05, |
| "loss": 0.2965, |
| "step": 24400 |
| }, |
| { |
| "epoch": 1.551100678804796, |
| "grad_norm": 0.3239715099334717, |
| "learning_rate": 2.4156780646662016e-05, |
| "loss": 0.2702, |
| "step": 24450 |
| }, |
| { |
| "epoch": 1.5542726638330269, |
| "grad_norm": 0.26858869194984436, |
| "learning_rate": 2.4103914229524837e-05, |
| "loss": 0.2552, |
| "step": 24500 |
| }, |
| { |
| "epoch": 1.5574446488612574, |
| "grad_norm": 0.4535232186317444, |
| "learning_rate": 2.405104781238766e-05, |
| "loss": 0.3275, |
| "step": 24550 |
| }, |
| { |
| "epoch": 1.560616633889488, |
| "grad_norm": 0.4622326195240021, |
| "learning_rate": 2.3998181395250484e-05, |
| "loss": 0.2784, |
| "step": 24600 |
| }, |
| { |
| "epoch": 1.5637886189177186, |
| "grad_norm": 0.26528090238571167, |
| "learning_rate": 2.3945314978113303e-05, |
| "loss": 0.3231, |
| "step": 24650 |
| }, |
| { |
| "epoch": 1.5669606039459494, |
| "grad_norm": 0.20564743876457214, |
| "learning_rate": 2.3892448560976128e-05, |
| "loss": 0.2765, |
| "step": 24700 |
| }, |
| { |
| "epoch": 1.5701325889741802, |
| "grad_norm": 0.45677173137664795, |
| "learning_rate": 2.383958214383895e-05, |
| "loss": 0.3207, |
| "step": 24750 |
| }, |
| { |
| "epoch": 1.5733045740024107, |
| "grad_norm": 0.3483443558216095, |
| "learning_rate": 2.378671572670177e-05, |
| "loss": 0.2698, |
| "step": 24800 |
| }, |
| { |
| "epoch": 1.5764765590306413, |
| "grad_norm": 0.44952449202537537, |
| "learning_rate": 2.3733849309564593e-05, |
| "loss": 0.2678, |
| "step": 24850 |
| }, |
| { |
| "epoch": 1.579648544058872, |
| "grad_norm": 0.32127121090888977, |
| "learning_rate": 2.3680982892427418e-05, |
| "loss": 0.2802, |
| "step": 24900 |
| }, |
| { |
| "epoch": 1.5828205290871027, |
| "grad_norm": 0.44746747612953186, |
| "learning_rate": 2.3628116475290236e-05, |
| "loss": 0.24, |
| "step": 24950 |
| }, |
| { |
| "epoch": 1.5859925141153335, |
| "grad_norm": 0.8728600740432739, |
| "learning_rate": 2.3575250058153058e-05, |
| "loss": 0.2903, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.589164499143564, |
| "grad_norm": 0.4112453758716583, |
| "learning_rate": 2.3522383641015883e-05, |
| "loss": 0.2781, |
| "step": 25050 |
| }, |
| { |
| "epoch": 1.5923364841717946, |
| "grad_norm": 0.5081580877304077, |
| "learning_rate": 2.3469517223878705e-05, |
| "loss": 0.2689, |
| "step": 25100 |
| }, |
| { |
| "epoch": 1.5955084692000254, |
| "grad_norm": 0.42527180910110474, |
| "learning_rate": 2.3416650806741527e-05, |
| "loss": 0.2657, |
| "step": 25150 |
| }, |
| { |
| "epoch": 1.598680454228256, |
| "grad_norm": 0.30899807810783386, |
| "learning_rate": 2.336378438960435e-05, |
| "loss": 0.2723, |
| "step": 25200 |
| }, |
| { |
| "epoch": 1.6018524392564868, |
| "grad_norm": 0.276732861995697, |
| "learning_rate": 2.3310917972467174e-05, |
| "loss": 0.2771, |
| "step": 25250 |
| }, |
| { |
| "epoch": 1.6050244242847174, |
| "grad_norm": 0.34849727153778076, |
| "learning_rate": 2.3258051555329992e-05, |
| "loss": 0.2287, |
| "step": 25300 |
| }, |
| { |
| "epoch": 1.608196409312948, |
| "grad_norm": 0.30580082535743713, |
| "learning_rate": 2.3205185138192814e-05, |
| "loss": 0.2579, |
| "step": 25350 |
| }, |
| { |
| "epoch": 1.6113683943411787, |
| "grad_norm": 0.21243813633918762, |
| "learning_rate": 2.315231872105564e-05, |
| "loss": 0.2729, |
| "step": 25400 |
| }, |
| { |
| "epoch": 1.6145403793694095, |
| "grad_norm": 0.3976793885231018, |
| "learning_rate": 2.309945230391846e-05, |
| "loss": 0.3249, |
| "step": 25450 |
| }, |
| { |
| "epoch": 1.61771236439764, |
| "grad_norm": 0.3687296211719513, |
| "learning_rate": 2.3046585886781282e-05, |
| "loss": 0.2978, |
| "step": 25500 |
| }, |
| { |
| "epoch": 1.6208843494258707, |
| "grad_norm": 0.6395165324211121, |
| "learning_rate": 2.2993719469644104e-05, |
| "loss": 0.2604, |
| "step": 25550 |
| }, |
| { |
| "epoch": 1.6240563344541012, |
| "grad_norm": 0.31490978598594666, |
| "learning_rate": 2.294085305250693e-05, |
| "loss": 0.3259, |
| "step": 25600 |
| }, |
| { |
| "epoch": 1.627228319482332, |
| "grad_norm": 0.3745858669281006, |
| "learning_rate": 2.2887986635369748e-05, |
| "loss": 0.2989, |
| "step": 25650 |
| }, |
| { |
| "epoch": 1.6304003045105628, |
| "grad_norm": 0.2982928156852722, |
| "learning_rate": 2.2835120218232573e-05, |
| "loss": 0.2532, |
| "step": 25700 |
| }, |
| { |
| "epoch": 1.6335722895387934, |
| "grad_norm": 0.28931793570518494, |
| "learning_rate": 2.2782253801095395e-05, |
| "loss": 0.2652, |
| "step": 25750 |
| }, |
| { |
| "epoch": 1.636744274567024, |
| "grad_norm": 0.27391621470451355, |
| "learning_rate": 2.2729387383958213e-05, |
| "loss": 0.2633, |
| "step": 25800 |
| }, |
| { |
| "epoch": 1.6399162595952546, |
| "grad_norm": 0.32411888241767883, |
| "learning_rate": 2.2676520966821038e-05, |
| "loss": 0.258, |
| "step": 25850 |
| }, |
| { |
| "epoch": 1.6430882446234854, |
| "grad_norm": 0.12455958873033524, |
| "learning_rate": 2.262365454968386e-05, |
| "loss": 0.2943, |
| "step": 25900 |
| }, |
| { |
| "epoch": 1.6462602296517161, |
| "grad_norm": 0.4207943081855774, |
| "learning_rate": 2.257078813254668e-05, |
| "loss": 0.3091, |
| "step": 25950 |
| }, |
| { |
| "epoch": 1.6494322146799467, |
| "grad_norm": 0.23534472286701202, |
| "learning_rate": 2.2517921715409503e-05, |
| "loss": 0.2906, |
| "step": 26000 |
| }, |
| { |
| "epoch": 1.6526041997081773, |
| "grad_norm": 0.42088547348976135, |
| "learning_rate": 2.246505529827233e-05, |
| "loss": 0.3157, |
| "step": 26050 |
| }, |
| { |
| "epoch": 1.655776184736408, |
| "grad_norm": 0.18945495784282684, |
| "learning_rate": 2.241218888113515e-05, |
| "loss": 0.2658, |
| "step": 26100 |
| }, |
| { |
| "epoch": 1.6589481697646387, |
| "grad_norm": 0.19314059615135193, |
| "learning_rate": 2.235932246399797e-05, |
| "loss": 0.303, |
| "step": 26150 |
| }, |
| { |
| "epoch": 1.6621201547928695, |
| "grad_norm": 0.6903896927833557, |
| "learning_rate": 2.2307513375203535e-05, |
| "loss": 0.315, |
| "step": 26200 |
| }, |
| { |
| "epoch": 1.6652921398211, |
| "grad_norm": 0.4477178752422333, |
| "learning_rate": 2.2254646958066357e-05, |
| "loss": 0.2688, |
| "step": 26250 |
| }, |
| { |
| "epoch": 1.6684641248493306, |
| "grad_norm": 0.3560877740383148, |
| "learning_rate": 2.2201780540929182e-05, |
| "loss": 0.2728, |
| "step": 26300 |
| }, |
| { |
| "epoch": 1.6716361098775614, |
| "grad_norm": 0.29471373558044434, |
| "learning_rate": 2.2148914123792004e-05, |
| "loss": 0.2232, |
| "step": 26350 |
| }, |
| { |
| "epoch": 1.6748080949057922, |
| "grad_norm": 0.3013167381286621, |
| "learning_rate": 2.2096047706654826e-05, |
| "loss": 0.3367, |
| "step": 26400 |
| }, |
| { |
| "epoch": 1.6779800799340228, |
| "grad_norm": 0.30516213178634644, |
| "learning_rate": 2.2043181289517648e-05, |
| "loss": 0.2794, |
| "step": 26450 |
| }, |
| { |
| "epoch": 1.6811520649622533, |
| "grad_norm": 0.30091190338134766, |
| "learning_rate": 2.1990314872380473e-05, |
| "loss": 0.2783, |
| "step": 26500 |
| }, |
| { |
| "epoch": 1.684324049990484, |
| "grad_norm": 0.5126471519470215, |
| "learning_rate": 2.193744845524329e-05, |
| "loss": 0.3011, |
| "step": 26550 |
| }, |
| { |
| "epoch": 1.6874960350187147, |
| "grad_norm": 0.3148995041847229, |
| "learning_rate": 2.1884582038106113e-05, |
| "loss": 0.2673, |
| "step": 26600 |
| }, |
| { |
| "epoch": 1.6906680200469455, |
| "grad_norm": 0.5630244016647339, |
| "learning_rate": 2.1831715620968938e-05, |
| "loss": 0.3006, |
| "step": 26650 |
| }, |
| { |
| "epoch": 1.693840005075176, |
| "grad_norm": 0.23608249425888062, |
| "learning_rate": 2.177884920383176e-05, |
| "loss": 0.2932, |
| "step": 26700 |
| }, |
| { |
| "epoch": 1.6970119901034066, |
| "grad_norm": 0.5582406520843506, |
| "learning_rate": 2.172598278669458e-05, |
| "loss": 0.2894, |
| "step": 26750 |
| }, |
| { |
| "epoch": 1.7001839751316372, |
| "grad_norm": 0.6372901797294617, |
| "learning_rate": 2.1673116369557403e-05, |
| "loss": 0.2811, |
| "step": 26800 |
| }, |
| { |
| "epoch": 1.703355960159868, |
| "grad_norm": 0.45520493388175964, |
| "learning_rate": 2.162024995242023e-05, |
| "loss": 0.2169, |
| "step": 26850 |
| }, |
| { |
| "epoch": 1.7065279451880988, |
| "grad_norm": 0.3398955762386322, |
| "learning_rate": 2.1567383535283047e-05, |
| "loss": 0.2982, |
| "step": 26900 |
| }, |
| { |
| "epoch": 1.7096999302163294, |
| "grad_norm": 0.8106810450553894, |
| "learning_rate": 2.151451711814587e-05, |
| "loss": 0.3525, |
| "step": 26950 |
| }, |
| { |
| "epoch": 1.71287191524456, |
| "grad_norm": 0.2512779235839844, |
| "learning_rate": 2.1461650701008694e-05, |
| "loss": 0.2731, |
| "step": 27000 |
| }, |
| { |
| "epoch": 1.7160439002727907, |
| "grad_norm": 0.4722442924976349, |
| "learning_rate": 2.1408784283871512e-05, |
| "loss": 0.2725, |
| "step": 27050 |
| }, |
| { |
| "epoch": 1.7192158853010215, |
| "grad_norm": 0.27375251054763794, |
| "learning_rate": 2.1355917866734337e-05, |
| "loss": 0.3077, |
| "step": 27100 |
| }, |
| { |
| "epoch": 1.7223878703292521, |
| "grad_norm": 0.9621772170066833, |
| "learning_rate": 2.130305144959716e-05, |
| "loss": 0.3281, |
| "step": 27150 |
| }, |
| { |
| "epoch": 1.7255598553574827, |
| "grad_norm": 0.285846084356308, |
| "learning_rate": 2.125018503245998e-05, |
| "loss": 0.301, |
| "step": 27200 |
| }, |
| { |
| "epoch": 1.7287318403857133, |
| "grad_norm": 0.40346983075141907, |
| "learning_rate": 2.1197318615322802e-05, |
| "loss": 0.2653, |
| "step": 27250 |
| }, |
| { |
| "epoch": 1.731903825413944, |
| "grad_norm": 0.4507441818714142, |
| "learning_rate": 2.1144452198185627e-05, |
| "loss": 0.2625, |
| "step": 27300 |
| }, |
| { |
| "epoch": 1.7350758104421748, |
| "grad_norm": 0.4275396168231964, |
| "learning_rate": 2.109158578104845e-05, |
| "loss": 0.2581, |
| "step": 27350 |
| }, |
| { |
| "epoch": 1.7382477954704054, |
| "grad_norm": 0.38401782512664795, |
| "learning_rate": 2.1038719363911268e-05, |
| "loss": 0.2486, |
| "step": 27400 |
| }, |
| { |
| "epoch": 1.741419780498636, |
| "grad_norm": 0.22168871760368347, |
| "learning_rate": 2.0985852946774093e-05, |
| "loss": 0.2556, |
| "step": 27450 |
| }, |
| { |
| "epoch": 1.7445917655268666, |
| "grad_norm": 0.3157811462879181, |
| "learning_rate": 2.0932986529636914e-05, |
| "loss": 0.3277, |
| "step": 27500 |
| }, |
| { |
| "epoch": 1.7477637505550974, |
| "grad_norm": 0.37447062134742737, |
| "learning_rate": 2.0880120112499736e-05, |
| "loss": 0.3922, |
| "step": 27550 |
| }, |
| { |
| "epoch": 1.7509357355833282, |
| "grad_norm": 0.2262888103723526, |
| "learning_rate": 2.0827253695362558e-05, |
| "loss": 0.2663, |
| "step": 27600 |
| }, |
| { |
| "epoch": 1.7541077206115587, |
| "grad_norm": 0.2502616047859192, |
| "learning_rate": 2.0774387278225383e-05, |
| "loss": 0.2922, |
| "step": 27650 |
| }, |
| { |
| "epoch": 1.7572797056397893, |
| "grad_norm": 0.5477867126464844, |
| "learning_rate": 2.0721520861088205e-05, |
| "loss": 0.3556, |
| "step": 27700 |
| }, |
| { |
| "epoch": 1.76045169066802, |
| "grad_norm": 0.31725648045539856, |
| "learning_rate": 2.0668654443951023e-05, |
| "loss": 0.2973, |
| "step": 27750 |
| }, |
| { |
| "epoch": 1.7636236756962507, |
| "grad_norm": 0.5742406249046326, |
| "learning_rate": 2.061578802681385e-05, |
| "loss": 0.2641, |
| "step": 27800 |
| }, |
| { |
| "epoch": 1.7667956607244815, |
| "grad_norm": 0.17584888637065887, |
| "learning_rate": 2.056292160967667e-05, |
| "loss": 0.2907, |
| "step": 27850 |
| }, |
| { |
| "epoch": 1.769967645752712, |
| "grad_norm": 0.18802449107170105, |
| "learning_rate": 2.0510055192539492e-05, |
| "loss": 0.3145, |
| "step": 27900 |
| }, |
| { |
| "epoch": 1.7731396307809426, |
| "grad_norm": 0.1602659672498703, |
| "learning_rate": 2.0457188775402314e-05, |
| "loss": 0.3164, |
| "step": 27950 |
| }, |
| { |
| "epoch": 1.7763116158091734, |
| "grad_norm": 0.19660255312919617, |
| "learning_rate": 2.040432235826514e-05, |
| "loss": 0.3053, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.7794836008374042, |
| "grad_norm": 0.20148231089115143, |
| "learning_rate": 2.0351455941127957e-05, |
| "loss": 0.2577, |
| "step": 28050 |
| }, |
| { |
| "epoch": 1.7826555858656348, |
| "grad_norm": 0.68003910779953, |
| "learning_rate": 2.029858952399078e-05, |
| "loss": 0.2914, |
| "step": 28100 |
| }, |
| { |
| "epoch": 1.7858275708938653, |
| "grad_norm": 0.2885502278804779, |
| "learning_rate": 2.0245723106853604e-05, |
| "loss": 0.2864, |
| "step": 28150 |
| }, |
| { |
| "epoch": 1.788999555922096, |
| "grad_norm": 0.29255005717277527, |
| "learning_rate": 2.0192856689716426e-05, |
| "loss": 0.2482, |
| "step": 28200 |
| }, |
| { |
| "epoch": 1.7921715409503267, |
| "grad_norm": 0.24247625470161438, |
| "learning_rate": 2.0139990272579247e-05, |
| "loss": 0.2796, |
| "step": 28250 |
| }, |
| { |
| "epoch": 1.7953435259785575, |
| "grad_norm": 0.5008931159973145, |
| "learning_rate": 2.008712385544207e-05, |
| "loss": 0.3548, |
| "step": 28300 |
| }, |
| { |
| "epoch": 1.798515511006788, |
| "grad_norm": 0.43906369805336, |
| "learning_rate": 2.0034257438304894e-05, |
| "loss": 0.2499, |
| "step": 28350 |
| }, |
| { |
| "epoch": 1.8016874960350187, |
| "grad_norm": 0.37931495904922485, |
| "learning_rate": 1.9981391021167713e-05, |
| "loss": 0.2815, |
| "step": 28400 |
| }, |
| { |
| "epoch": 1.8048594810632492, |
| "grad_norm": 0.6427097320556641, |
| "learning_rate": 1.992958193237328e-05, |
| "loss": 0.3206, |
| "step": 28450 |
| }, |
| { |
| "epoch": 1.80803146609148, |
| "grad_norm": 0.2767207622528076, |
| "learning_rate": 1.98767155152361e-05, |
| "loss": 0.2855, |
| "step": 28500 |
| }, |
| { |
| "epoch": 1.8112034511197108, |
| "grad_norm": 0.3229456841945648, |
| "learning_rate": 1.9823849098098923e-05, |
| "loss": 0.2656, |
| "step": 28550 |
| }, |
| { |
| "epoch": 1.8143754361479414, |
| "grad_norm": 0.22634799778461456, |
| "learning_rate": 1.9770982680961748e-05, |
| "loss": 0.3229, |
| "step": 28600 |
| }, |
| { |
| "epoch": 1.817547421176172, |
| "grad_norm": 0.35182350873947144, |
| "learning_rate": 1.9718116263824567e-05, |
| "loss": 0.2949, |
| "step": 28650 |
| }, |
| { |
| "epoch": 1.8207194062044028, |
| "grad_norm": 0.325244277715683, |
| "learning_rate": 1.9665249846687392e-05, |
| "loss": 0.33, |
| "step": 28700 |
| }, |
| { |
| "epoch": 1.8238913912326333, |
| "grad_norm": 0.5972079634666443, |
| "learning_rate": 1.9612383429550214e-05, |
| "loss": 0.2836, |
| "step": 28750 |
| }, |
| { |
| "epoch": 1.8270633762608641, |
| "grad_norm": 0.3747629225254059, |
| "learning_rate": 1.9559517012413035e-05, |
| "loss": 0.2406, |
| "step": 28800 |
| }, |
| { |
| "epoch": 1.8302353612890947, |
| "grad_norm": 0.8294934630393982, |
| "learning_rate": 1.9506650595275857e-05, |
| "loss": 0.2768, |
| "step": 28850 |
| }, |
| { |
| "epoch": 1.8334073463173253, |
| "grad_norm": 0.8470065593719482, |
| "learning_rate": 1.9453784178138682e-05, |
| "loss": 0.3305, |
| "step": 28900 |
| }, |
| { |
| "epoch": 1.836579331345556, |
| "grad_norm": 0.23448146879673004, |
| "learning_rate": 1.9400917761001504e-05, |
| "loss": 0.2622, |
| "step": 28950 |
| }, |
| { |
| "epoch": 1.8397513163737869, |
| "grad_norm": 0.4023194909095764, |
| "learning_rate": 1.9348051343864322e-05, |
| "loss": 0.2734, |
| "step": 29000 |
| }, |
| { |
| "epoch": 1.8429233014020174, |
| "grad_norm": 0.24675685167312622, |
| "learning_rate": 1.9295184926727147e-05, |
| "loss": 0.3257, |
| "step": 29050 |
| }, |
| { |
| "epoch": 1.846095286430248, |
| "grad_norm": 0.2382373958826065, |
| "learning_rate": 1.924231850958997e-05, |
| "loss": 0.2646, |
| "step": 29100 |
| }, |
| { |
| "epoch": 1.8492672714584786, |
| "grad_norm": 0.3856213688850403, |
| "learning_rate": 1.918945209245279e-05, |
| "loss": 0.3609, |
| "step": 29150 |
| }, |
| { |
| "epoch": 1.8524392564867094, |
| "grad_norm": 0.46289342641830444, |
| "learning_rate": 1.9136585675315613e-05, |
| "loss": 0.302, |
| "step": 29200 |
| }, |
| { |
| "epoch": 1.8556112415149402, |
| "grad_norm": 0.2916334569454193, |
| "learning_rate": 1.9083719258178438e-05, |
| "loss": 0.3081, |
| "step": 29250 |
| }, |
| { |
| "epoch": 1.8587832265431707, |
| "grad_norm": 0.30459076166152954, |
| "learning_rate": 1.903085284104126e-05, |
| "loss": 0.3154, |
| "step": 29300 |
| }, |
| { |
| "epoch": 1.8619552115714013, |
| "grad_norm": 0.21553024649620056, |
| "learning_rate": 1.8977986423904078e-05, |
| "loss": 0.2745, |
| "step": 29350 |
| }, |
| { |
| "epoch": 1.8651271965996319, |
| "grad_norm": 0.56458580493927, |
| "learning_rate": 1.8925120006766903e-05, |
| "loss": 0.2684, |
| "step": 29400 |
| }, |
| { |
| "epoch": 1.8682991816278627, |
| "grad_norm": 0.2930966913700104, |
| "learning_rate": 1.8872253589629725e-05, |
| "loss": 0.2746, |
| "step": 29450 |
| }, |
| { |
| "epoch": 1.8714711666560935, |
| "grad_norm": 0.24118304252624512, |
| "learning_rate": 1.8819387172492547e-05, |
| "loss": 0.311, |
| "step": 29500 |
| }, |
| { |
| "epoch": 1.874643151684324, |
| "grad_norm": 0.5832043886184692, |
| "learning_rate": 1.8766520755355368e-05, |
| "loss": 0.2716, |
| "step": 29550 |
| }, |
| { |
| "epoch": 1.8778151367125546, |
| "grad_norm": 0.4469052255153656, |
| "learning_rate": 1.8713654338218193e-05, |
| "loss": 0.276, |
| "step": 29600 |
| }, |
| { |
| "epoch": 1.8809871217407854, |
| "grad_norm": 0.24563166499137878, |
| "learning_rate": 1.8660787921081012e-05, |
| "loss": 0.2437, |
| "step": 29650 |
| }, |
| { |
| "epoch": 1.8841591067690162, |
| "grad_norm": 0.25196200609207153, |
| "learning_rate": 1.8607921503943833e-05, |
| "loss": 0.2682, |
| "step": 29700 |
| }, |
| { |
| "epoch": 1.8873310917972468, |
| "grad_norm": 0.37745949625968933, |
| "learning_rate": 1.855505508680666e-05, |
| "loss": 0.2565, |
| "step": 29750 |
| }, |
| { |
| "epoch": 1.8905030768254774, |
| "grad_norm": 0.48578011989593506, |
| "learning_rate": 1.850218866966948e-05, |
| "loss": 0.3559, |
| "step": 29800 |
| }, |
| { |
| "epoch": 1.893675061853708, |
| "grad_norm": 0.7121931910514832, |
| "learning_rate": 1.8449322252532302e-05, |
| "loss": 0.3278, |
| "step": 29850 |
| }, |
| { |
| "epoch": 1.8968470468819387, |
| "grad_norm": 0.3295869827270508, |
| "learning_rate": 1.8396455835395124e-05, |
| "loss": 0.2745, |
| "step": 29900 |
| }, |
| { |
| "epoch": 1.9000190319101695, |
| "grad_norm": 0.3305869698524475, |
| "learning_rate": 1.834358941825795e-05, |
| "loss": 0.2895, |
| "step": 29950 |
| }, |
| { |
| "epoch": 1.9031910169384, |
| "grad_norm": 0.16952826082706451, |
| "learning_rate": 1.8290723001120767e-05, |
| "loss": 0.3058, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1.9063630019666307, |
| "grad_norm": 0.5475337505340576, |
| "learning_rate": 1.8237856583983593e-05, |
| "loss": 0.261, |
| "step": 30050 |
| }, |
| { |
| "epoch": 1.9095349869948612, |
| "grad_norm": 0.29783618450164795, |
| "learning_rate": 1.8184990166846414e-05, |
| "loss": 0.2663, |
| "step": 30100 |
| }, |
| { |
| "epoch": 1.912706972023092, |
| "grad_norm": 0.22428205609321594, |
| "learning_rate": 1.8132123749709236e-05, |
| "loss": 0.2741, |
| "step": 30150 |
| }, |
| { |
| "epoch": 1.9158789570513228, |
| "grad_norm": 0.29052209854125977, |
| "learning_rate": 1.8079257332572058e-05, |
| "loss": 0.2971, |
| "step": 30200 |
| }, |
| { |
| "epoch": 1.9190509420795534, |
| "grad_norm": 0.49918898940086365, |
| "learning_rate": 1.802639091543488e-05, |
| "loss": 0.3309, |
| "step": 30250 |
| }, |
| { |
| "epoch": 1.922222927107784, |
| "grad_norm": 0.38725921511650085, |
| "learning_rate": 1.7973524498297705e-05, |
| "loss": 0.3191, |
| "step": 30300 |
| }, |
| { |
| "epoch": 1.9253949121360148, |
| "grad_norm": 0.5741166472434998, |
| "learning_rate": 1.7920658081160523e-05, |
| "loss": 0.3136, |
| "step": 30350 |
| }, |
| { |
| "epoch": 1.9285668971642453, |
| "grad_norm": 0.3201332986354828, |
| "learning_rate": 1.7867791664023348e-05, |
| "loss": 0.3066, |
| "step": 30400 |
| }, |
| { |
| "epoch": 1.9317388821924761, |
| "grad_norm": 0.5069090723991394, |
| "learning_rate": 1.781492524688617e-05, |
| "loss": 0.2804, |
| "step": 30450 |
| }, |
| { |
| "epoch": 1.9349108672207067, |
| "grad_norm": 0.18295565247535706, |
| "learning_rate": 1.7762058829748988e-05, |
| "loss": 0.2942, |
| "step": 30500 |
| }, |
| { |
| "epoch": 1.9380828522489373, |
| "grad_norm": 0.26717889308929443, |
| "learning_rate": 1.7709192412611813e-05, |
| "loss": 0.2938, |
| "step": 30550 |
| }, |
| { |
| "epoch": 1.941254837277168, |
| "grad_norm": 0.2297358214855194, |
| "learning_rate": 1.7656325995474635e-05, |
| "loss": 0.2656, |
| "step": 30600 |
| }, |
| { |
| "epoch": 1.9444268223053989, |
| "grad_norm": 0.46368107199668884, |
| "learning_rate": 1.7603459578337457e-05, |
| "loss": 0.3266, |
| "step": 30650 |
| }, |
| { |
| "epoch": 1.9475988073336294, |
| "grad_norm": 0.4937555491924286, |
| "learning_rate": 1.7551650489543024e-05, |
| "loss": 0.2992, |
| "step": 30700 |
| }, |
| { |
| "epoch": 1.95077079236186, |
| "grad_norm": 0.7138214111328125, |
| "learning_rate": 1.7498784072405846e-05, |
| "loss": 0.3092, |
| "step": 30750 |
| }, |
| { |
| "epoch": 1.9539427773900906, |
| "grad_norm": 0.26548969745635986, |
| "learning_rate": 1.7445917655268667e-05, |
| "loss": 0.2582, |
| "step": 30800 |
| }, |
| { |
| "epoch": 1.9571147624183214, |
| "grad_norm": 0.29331421852111816, |
| "learning_rate": 1.7393051238131492e-05, |
| "loss": 0.2892, |
| "step": 30850 |
| }, |
| { |
| "epoch": 1.9602867474465522, |
| "grad_norm": 0.29395994544029236, |
| "learning_rate": 1.734018482099431e-05, |
| "loss": 0.2705, |
| "step": 30900 |
| }, |
| { |
| "epoch": 1.9634587324747828, |
| "grad_norm": 0.25719061493873596, |
| "learning_rate": 1.7287318403857133e-05, |
| "loss": 0.2811, |
| "step": 30950 |
| }, |
| { |
| "epoch": 1.9666307175030133, |
| "grad_norm": 0.5885137915611267, |
| "learning_rate": 1.7234451986719958e-05, |
| "loss": 0.2834, |
| "step": 31000 |
| }, |
| { |
| "epoch": 1.969802702531244, |
| "grad_norm": 0.43175041675567627, |
| "learning_rate": 1.718158556958278e-05, |
| "loss": 0.2683, |
| "step": 31050 |
| }, |
| { |
| "epoch": 1.9729746875594747, |
| "grad_norm": 0.25399765372276306, |
| "learning_rate": 1.71287191524456e-05, |
| "loss": 0.2932, |
| "step": 31100 |
| }, |
| { |
| "epoch": 1.9761466725877055, |
| "grad_norm": 0.3162282407283783, |
| "learning_rate": 1.7075852735308423e-05, |
| "loss": 0.2753, |
| "step": 31150 |
| }, |
| { |
| "epoch": 1.979318657615936, |
| "grad_norm": 0.5122566223144531, |
| "learning_rate": 1.7022986318171248e-05, |
| "loss": 0.271, |
| "step": 31200 |
| }, |
| { |
| "epoch": 1.9824906426441666, |
| "grad_norm": 0.22120784223079681, |
| "learning_rate": 1.6970119901034066e-05, |
| "loss": 0.3167, |
| "step": 31250 |
| }, |
| { |
| "epoch": 1.9856626276723974, |
| "grad_norm": 0.7437451481819153, |
| "learning_rate": 1.6917253483896888e-05, |
| "loss": 0.3324, |
| "step": 31300 |
| }, |
| { |
| "epoch": 1.988834612700628, |
| "grad_norm": 0.43868857622146606, |
| "learning_rate": 1.6864387066759713e-05, |
| "loss": 0.3211, |
| "step": 31350 |
| }, |
| { |
| "epoch": 1.9920065977288588, |
| "grad_norm": 0.5011254549026489, |
| "learning_rate": 1.6811520649622535e-05, |
| "loss": 0.3138, |
| "step": 31400 |
| }, |
| { |
| "epoch": 1.9951785827570894, |
| "grad_norm": 0.476948618888855, |
| "learning_rate": 1.6758654232485357e-05, |
| "loss": 0.3014, |
| "step": 31450 |
| }, |
| { |
| "epoch": 1.99835056778532, |
| "grad_norm": 0.258881151676178, |
| "learning_rate": 1.670578781534818e-05, |
| "loss": 0.2907, |
| "step": 31500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.31407585740089417, |
| "eval_runtime": 47.9633, |
| "eval_samples_per_second": 34.297, |
| "eval_steps_per_second": 17.159, |
| "step": 31526 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 47289, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.289941398028288e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|