| { |
| "best_global_step": 16135, |
| "best_metric": 0.7960859271865419, |
| "best_model_checkpoint": "/content/drive/MyDrive/\uac10\uc815\ubd84\ub958/data/emotion_model/checkpoint-16135", |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 16135, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.015494267121165169, |
| "grad_norm": 1.9321871995925903, |
| "learning_rate": 4.5553145336225596e-07, |
| "loss": 1.7919, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.030988534242330338, |
| "grad_norm": 1.7712626457214355, |
| "learning_rate": 9.203594669972111e-07, |
| "loss": 1.7865, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04648280136349551, |
| "grad_norm": 1.9185744524002075, |
| "learning_rate": 1.385187480632166e-06, |
| "loss": 1.7864, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.061977068484660676, |
| "grad_norm": 1.9046003818511963, |
| "learning_rate": 1.8500154942671213e-06, |
| "loss": 1.7864, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.07747133560582585, |
| "grad_norm": 2.0914034843444824, |
| "learning_rate": 2.3148435079020763e-06, |
| "loss": 1.7813, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.09296560272699102, |
| "grad_norm": 2.088219165802002, |
| "learning_rate": 2.7796715215370313e-06, |
| "loss": 1.7773, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.10845986984815618, |
| "grad_norm": 2.1377577781677246, |
| "learning_rate": 3.2444995351719864e-06, |
| "loss": 1.7587, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.12395413696932135, |
| "grad_norm": 2.2140750885009766, |
| "learning_rate": 3.7093275488069414e-06, |
| "loss": 1.7388, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.13944840409048653, |
| "grad_norm": 2.1278295516967773, |
| "learning_rate": 4.174155562441896e-06, |
| "loss": 1.6861, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.1549426712116517, |
| "grad_norm": 4.734658241271973, |
| "learning_rate": 4.638983576076852e-06, |
| "loss": 1.6267, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.17043693833281687, |
| "grad_norm": 4.140384197235107, |
| "learning_rate": 5.103811589711806e-06, |
| "loss": 1.5732, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.18593120545398203, |
| "grad_norm": 2.705493688583374, |
| "learning_rate": 5.568639603346762e-06, |
| "loss": 1.5438, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2014254725751472, |
| "grad_norm": 5.3791422843933105, |
| "learning_rate": 6.0334676169817164e-06, |
| "loss": 1.4644, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.21691973969631237, |
| "grad_norm": 2.8989250659942627, |
| "learning_rate": 6.498295630616672e-06, |
| "loss": 1.4743, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.23241400681747754, |
| "grad_norm": 3.40291166305542, |
| "learning_rate": 6.963123644251627e-06, |
| "loss": 1.4187, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.2479082739386427, |
| "grad_norm": 5.748068809509277, |
| "learning_rate": 7.427951657886583e-06, |
| "loss": 1.3533, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.26340254105980787, |
| "grad_norm": 5.777422904968262, |
| "learning_rate": 7.892779671521537e-06, |
| "loss": 1.3099, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.27889680818097307, |
| "grad_norm": 4.8046112060546875, |
| "learning_rate": 8.357607685156493e-06, |
| "loss": 1.2723, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.2943910753021382, |
| "grad_norm": 5.549858570098877, |
| "learning_rate": 8.822435698791447e-06, |
| "loss": 1.2325, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.3098853424233034, |
| "grad_norm": 6.851742744445801, |
| "learning_rate": 9.287263712426402e-06, |
| "loss": 1.1884, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.32537960954446854, |
| "grad_norm": 5.2497735023498535, |
| "learning_rate": 9.752091726061357e-06, |
| "loss": 1.2224, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.34087387666563373, |
| "grad_norm": 7.023674488067627, |
| "learning_rate": 1.0216919739696313e-05, |
| "loss": 1.177, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.3563681437867989, |
| "grad_norm": 4.888996124267578, |
| "learning_rate": 1.0681747753331269e-05, |
| "loss": 1.1577, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.37186241090796407, |
| "grad_norm": 6.2133660316467285, |
| "learning_rate": 1.1146575766966222e-05, |
| "loss": 1.1726, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3873566780291292, |
| "grad_norm": 6.936697483062744, |
| "learning_rate": 1.1611403780601178e-05, |
| "loss": 1.1001, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.4028509451502944, |
| "grad_norm": 8.526293754577637, |
| "learning_rate": 1.2076231794236133e-05, |
| "loss": 1.0752, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.41834521227145954, |
| "grad_norm": 5.5933756828308105, |
| "learning_rate": 1.254105980787109e-05, |
| "loss": 1.0809, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.43383947939262474, |
| "grad_norm": 6.998812675476074, |
| "learning_rate": 1.3005887821506042e-05, |
| "loss": 1.0566, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.4493337465137899, |
| "grad_norm": 7.077617645263672, |
| "learning_rate": 1.3470715835140998e-05, |
| "loss": 1.0993, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.4648280136349551, |
| "grad_norm": 8.715201377868652, |
| "learning_rate": 1.3935543848775953e-05, |
| "loss": 1.0375, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.4803222807561202, |
| "grad_norm": 6.017217636108398, |
| "learning_rate": 1.440037186241091e-05, |
| "loss": 1.0388, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.4958165478772854, |
| "grad_norm": 5.349973201751709, |
| "learning_rate": 1.4865199876045862e-05, |
| "loss": 1.0354, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5113108149984505, |
| "grad_norm": 12.728338241577148, |
| "learning_rate": 1.533002788968082e-05, |
| "loss": 1.0314, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.5268050821196157, |
| "grad_norm": 5.962468147277832, |
| "learning_rate": 1.5794855903315773e-05, |
| "loss": 1.03, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.5422993492407809, |
| "grad_norm": 5.971400260925293, |
| "learning_rate": 1.6259683916950726e-05, |
| "loss": 1.0541, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.5577936163619461, |
| "grad_norm": 6.260463714599609, |
| "learning_rate": 1.6724511930585682e-05, |
| "loss": 0.9831, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.5732878834831112, |
| "grad_norm": 7.8115010261535645, |
| "learning_rate": 1.718933994422064e-05, |
| "loss": 0.966, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.5887821506042764, |
| "grad_norm": 5.005403995513916, |
| "learning_rate": 1.7654167957855595e-05, |
| "loss": 0.9592, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.6042764177254416, |
| "grad_norm": 7.7732157707214355, |
| "learning_rate": 1.8118995971490548e-05, |
| "loss": 0.9766, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.6197706848466068, |
| "grad_norm": 7.265392303466797, |
| "learning_rate": 1.8583823985125504e-05, |
| "loss": 1.0171, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6352649519677719, |
| "grad_norm": 15.946109771728516, |
| "learning_rate": 1.904865199876046e-05, |
| "loss": 0.9824, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.6507592190889371, |
| "grad_norm": 7.261445999145508, |
| "learning_rate": 1.9513480012395417e-05, |
| "loss": 0.9699, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.6662534862101023, |
| "grad_norm": 8.201744079589844, |
| "learning_rate": 1.997830802603037e-05, |
| "loss": 0.9957, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.6817477533312675, |
| "grad_norm": 6.183067798614502, |
| "learning_rate": 2.0443136039665322e-05, |
| "loss": 0.8554, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6972420204524326, |
| "grad_norm": 7.481590270996094, |
| "learning_rate": 2.090796405330028e-05, |
| "loss": 0.9929, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.7127362875735977, |
| "grad_norm": 7.3274030685424805, |
| "learning_rate": 2.1372792066935235e-05, |
| "loss": 0.9438, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.7282305546947629, |
| "grad_norm": 11.69247055053711, |
| "learning_rate": 2.183762008057019e-05, |
| "loss": 0.9767, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.7437248218159281, |
| "grad_norm": 7.929721832275391, |
| "learning_rate": 2.2302448094205144e-05, |
| "loss": 1.0036, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.7592190889370932, |
| "grad_norm": 9.753717422485352, |
| "learning_rate": 2.27672761078401e-05, |
| "loss": 0.9726, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.7747133560582584, |
| "grad_norm": 7.797086715698242, |
| "learning_rate": 2.3232104121475057e-05, |
| "loss": 0.9322, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.7902076231794236, |
| "grad_norm": 6.927332878112793, |
| "learning_rate": 2.369693213511001e-05, |
| "loss": 0.9378, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.8057018903005888, |
| "grad_norm": 3.726092576980591, |
| "learning_rate": 2.4161760148744962e-05, |
| "loss": 0.958, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.821196157421754, |
| "grad_norm": 5.661774635314941, |
| "learning_rate": 2.462658816237992e-05, |
| "loss": 0.9651, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.8366904245429191, |
| "grad_norm": 6.513345718383789, |
| "learning_rate": 2.5091416176014875e-05, |
| "loss": 0.971, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.8521846916640843, |
| "grad_norm": 6.713255405426025, |
| "learning_rate": 2.555624418964983e-05, |
| "loss": 0.8616, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.8676789587852495, |
| "grad_norm": 8.527266502380371, |
| "learning_rate": 2.6021072203284784e-05, |
| "loss": 0.9413, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.8831732259064147, |
| "grad_norm": 6.599502086639404, |
| "learning_rate": 2.648590021691974e-05, |
| "loss": 0.9985, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.8986674930275798, |
| "grad_norm": 4.0680155754089355, |
| "learning_rate": 2.6950728230554697e-05, |
| "loss": 0.9415, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.914161760148745, |
| "grad_norm": 5.083493232727051, |
| "learning_rate": 2.741555624418965e-05, |
| "loss": 0.9805, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.9296560272699101, |
| "grad_norm": 4.0469069480896, |
| "learning_rate": 2.7880384257824606e-05, |
| "loss": 0.9547, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.9451502943910753, |
| "grad_norm": 6.075752258300781, |
| "learning_rate": 2.834521227145956e-05, |
| "loss": 0.9623, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.9606445615122404, |
| "grad_norm": 6.5252299308776855, |
| "learning_rate": 2.8810040285094515e-05, |
| "loss": 0.9838, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.9761388286334056, |
| "grad_norm": 6.530562877655029, |
| "learning_rate": 2.927486829872947e-05, |
| "loss": 0.911, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.9916330957545708, |
| "grad_norm": 8.217161178588867, |
| "learning_rate": 2.9739696312364428e-05, |
| "loss": 0.9457, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.7087787983737389, |
| "eval_f1": 0.7069046924545164, |
| "eval_loss": 0.8573769330978394, |
| "eval_runtime": 25.5149, |
| "eval_samples_per_second": 260.279, |
| "eval_steps_per_second": 16.304, |
| "step": 3227 |
| }, |
| { |
| "epoch": 1.007127362875736, |
| "grad_norm": 4.367455005645752, |
| "learning_rate": 2.997727507488896e-05, |
| "loss": 0.9143, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.022621629996901, |
| "grad_norm": 4.636725902557373, |
| "learning_rate": 2.9925627517818407e-05, |
| "loss": 0.9304, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.0381158971180664, |
| "grad_norm": 4.251437664031982, |
| "learning_rate": 2.9873979960747857e-05, |
| "loss": 0.8549, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.0536101642392315, |
| "grad_norm": 6.648655414581299, |
| "learning_rate": 2.9822332403677307e-05, |
| "loss": 0.8698, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.0691044313603966, |
| "grad_norm": 7.102205276489258, |
| "learning_rate": 2.9770684846606757e-05, |
| "loss": 0.8597, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.0845986984815619, |
| "grad_norm": 10.821270942687988, |
| "learning_rate": 2.9719037289536206e-05, |
| "loss": 0.8457, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.100092965602727, |
| "grad_norm": 6.111588001251221, |
| "learning_rate": 2.9667389732465652e-05, |
| "loss": 0.8973, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.1155872327238923, |
| "grad_norm": 9.016953468322754, |
| "learning_rate": 2.9615742175395106e-05, |
| "loss": 0.8228, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.1310814998450573, |
| "grad_norm": 7.717069625854492, |
| "learning_rate": 2.9564094618324555e-05, |
| "loss": 0.8103, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.1465757669662224, |
| "grad_norm": 7.848579406738281, |
| "learning_rate": 2.9512447061254005e-05, |
| "loss": 0.8097, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.1620700340873877, |
| "grad_norm": 4.738124847412109, |
| "learning_rate": 2.9460799504183455e-05, |
| "loss": 0.9105, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.1775643012085528, |
| "grad_norm": 5.289875507354736, |
| "learning_rate": 2.94091519471129e-05, |
| "loss": 0.8876, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.1930585683297181, |
| "grad_norm": 6.445308685302734, |
| "learning_rate": 2.935750439004235e-05, |
| "loss": 0.8377, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.2085528354508832, |
| "grad_norm": 4.725327968597412, |
| "learning_rate": 2.93058568329718e-05, |
| "loss": 0.9332, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.2240471025720483, |
| "grad_norm": 53.85081481933594, |
| "learning_rate": 2.925420927590125e-05, |
| "loss": 0.8882, |
| "step": 3950 |
| }, |
| { |
| "epoch": 1.2395413696932136, |
| "grad_norm": 5.677978515625, |
| "learning_rate": 2.9202561718830703e-05, |
| "loss": 0.8481, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.2550356368143787, |
| "grad_norm": 3.941765785217285, |
| "learning_rate": 2.915091416176015e-05, |
| "loss": 0.835, |
| "step": 4050 |
| }, |
| { |
| "epoch": 1.2705299039355438, |
| "grad_norm": 8.099725723266602, |
| "learning_rate": 2.90992666046896e-05, |
| "loss": 0.8322, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.286024171056709, |
| "grad_norm": 6.59591007232666, |
| "learning_rate": 2.904761904761905e-05, |
| "loss": 0.8809, |
| "step": 4150 |
| }, |
| { |
| "epoch": 1.3015184381778742, |
| "grad_norm": 7.200226306915283, |
| "learning_rate": 2.8995971490548498e-05, |
| "loss": 0.8609, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.3170127052990392, |
| "grad_norm": 4.902937412261963, |
| "learning_rate": 2.8944323933477948e-05, |
| "loss": 0.8633, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.3325069724202045, |
| "grad_norm": 5.792146682739258, |
| "learning_rate": 2.8892676376407394e-05, |
| "loss": 0.8684, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.3480012395413696, |
| "grad_norm": 4.636809349060059, |
| "learning_rate": 2.8841028819336844e-05, |
| "loss": 0.8245, |
| "step": 4350 |
| }, |
| { |
| "epoch": 1.363495506662535, |
| "grad_norm": 5.28842306137085, |
| "learning_rate": 2.8789381262266297e-05, |
| "loss": 0.7859, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.3789897737837, |
| "grad_norm": 4.259128570556641, |
| "learning_rate": 2.8737733705195747e-05, |
| "loss": 0.8228, |
| "step": 4450 |
| }, |
| { |
| "epoch": 1.394484040904865, |
| "grad_norm": 7.914375305175781, |
| "learning_rate": 2.8686086148125196e-05, |
| "loss": 0.9448, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.4099783080260304, |
| "grad_norm": 7.636547088623047, |
| "learning_rate": 2.8634438591054643e-05, |
| "loss": 0.8781, |
| "step": 4550 |
| }, |
| { |
| "epoch": 1.4254725751471955, |
| "grad_norm": 8.681707382202148, |
| "learning_rate": 2.8582791033984092e-05, |
| "loss": 0.8367, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.4409668422683608, |
| "grad_norm": 7.864759922027588, |
| "learning_rate": 2.8531143476913542e-05, |
| "loss": 0.9088, |
| "step": 4650 |
| }, |
| { |
| "epoch": 1.4564611093895259, |
| "grad_norm": 4.892348289489746, |
| "learning_rate": 2.8479495919842992e-05, |
| "loss": 0.8993, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.471955376510691, |
| "grad_norm": 23.208873748779297, |
| "learning_rate": 2.842784836277244e-05, |
| "loss": 0.8542, |
| "step": 4750 |
| }, |
| { |
| "epoch": 1.4874496436318563, |
| "grad_norm": 8.983469009399414, |
| "learning_rate": 2.837620080570189e-05, |
| "loss": 0.949, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.5029439107530214, |
| "grad_norm": 10.706644058227539, |
| "learning_rate": 2.832455324863134e-05, |
| "loss": 0.9008, |
| "step": 4850 |
| }, |
| { |
| "epoch": 1.5184381778741867, |
| "grad_norm": 4.685935020446777, |
| "learning_rate": 2.827290569156079e-05, |
| "loss": 0.8613, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.5339324449953518, |
| "grad_norm": 5.286406993865967, |
| "learning_rate": 2.822125813449024e-05, |
| "loss": 0.8929, |
| "step": 4950 |
| }, |
| { |
| "epoch": 1.5494267121165168, |
| "grad_norm": 4.907707691192627, |
| "learning_rate": 2.816961057741969e-05, |
| "loss": 0.8321, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.564920979237682, |
| "grad_norm": 6.398087501525879, |
| "learning_rate": 2.8117963020349136e-05, |
| "loss": 0.8626, |
| "step": 5050 |
| }, |
| { |
| "epoch": 1.5804152463588472, |
| "grad_norm": 5.323617458343506, |
| "learning_rate": 2.8066315463278586e-05, |
| "loss": 0.8324, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.5959095134800125, |
| "grad_norm": 4.136271953582764, |
| "learning_rate": 2.8014667906208035e-05, |
| "loss": 0.879, |
| "step": 5150 |
| }, |
| { |
| "epoch": 1.6114037806011776, |
| "grad_norm": 6.873619556427002, |
| "learning_rate": 2.796302034913749e-05, |
| "loss": 0.9043, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.6268980477223427, |
| "grad_norm": 7.138693809509277, |
| "learning_rate": 2.7911372792066938e-05, |
| "loss": 0.8183, |
| "step": 5250 |
| }, |
| { |
| "epoch": 1.6423923148435078, |
| "grad_norm": 6.483767032623291, |
| "learning_rate": 2.7859725234996384e-05, |
| "loss": 0.8867, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.657886581964673, |
| "grad_norm": 3.2249104976654053, |
| "learning_rate": 2.7808077677925834e-05, |
| "loss": 0.8097, |
| "step": 5350 |
| }, |
| { |
| "epoch": 1.6733808490858384, |
| "grad_norm": 6.961575984954834, |
| "learning_rate": 2.7756430120855284e-05, |
| "loss": 0.8364, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.6888751162070035, |
| "grad_norm": 7.0920000076293945, |
| "learning_rate": 2.7704782563784733e-05, |
| "loss": 0.8283, |
| "step": 5450 |
| }, |
| { |
| "epoch": 1.7043693833281686, |
| "grad_norm": 5.436604976654053, |
| "learning_rate": 2.7653135006714183e-05, |
| "loss": 0.8786, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.7198636504493336, |
| "grad_norm": 4.0141282081604, |
| "learning_rate": 2.760148744964363e-05, |
| "loss": 0.8452, |
| "step": 5550 |
| }, |
| { |
| "epoch": 1.735357917570499, |
| "grad_norm": 5.783074378967285, |
| "learning_rate": 2.7549839892573083e-05, |
| "loss": 0.8168, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.750852184691664, |
| "grad_norm": 7.773756504058838, |
| "learning_rate": 2.7498192335502532e-05, |
| "loss": 0.8598, |
| "step": 5650 |
| }, |
| { |
| "epoch": 1.7663464518128293, |
| "grad_norm": 5.375339984893799, |
| "learning_rate": 2.7446544778431982e-05, |
| "loss": 0.8366, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.7818407189339944, |
| "grad_norm": 4.240859031677246, |
| "learning_rate": 2.739489722136143e-05, |
| "loss": 0.8136, |
| "step": 5750 |
| }, |
| { |
| "epoch": 1.7973349860551595, |
| "grad_norm": 6.107599258422852, |
| "learning_rate": 2.734324966429088e-05, |
| "loss": 0.8074, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.8128292531763246, |
| "grad_norm": 6.027589797973633, |
| "learning_rate": 2.7291602107220328e-05, |
| "loss": 0.7808, |
| "step": 5850 |
| }, |
| { |
| "epoch": 1.82832352029749, |
| "grad_norm": 4.829204559326172, |
| "learning_rate": 2.7239954550149777e-05, |
| "loss": 0.8473, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.8438177874186552, |
| "grad_norm": 5.385358810424805, |
| "learning_rate": 2.7188306993079227e-05, |
| "loss": 0.844, |
| "step": 5950 |
| }, |
| { |
| "epoch": 1.8593120545398203, |
| "grad_norm": 5.991063594818115, |
| "learning_rate": 2.713665943600868e-05, |
| "loss": 0.8667, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.8748063216609854, |
| "grad_norm": 4.269604682922363, |
| "learning_rate": 2.708501187893813e-05, |
| "loss": 0.8987, |
| "step": 6050 |
| }, |
| { |
| "epoch": 1.8903005887821505, |
| "grad_norm": 6.90878438949585, |
| "learning_rate": 2.7033364321867576e-05, |
| "loss": 0.8517, |
| "step": 6100 |
| }, |
| { |
| "epoch": 1.9057948559033158, |
| "grad_norm": 8.742233276367188, |
| "learning_rate": 2.6981716764797026e-05, |
| "loss": 0.8729, |
| "step": 6150 |
| }, |
| { |
| "epoch": 1.921289123024481, |
| "grad_norm": 9.10084342956543, |
| "learning_rate": 2.6930069207726475e-05, |
| "loss": 0.8803, |
| "step": 6200 |
| }, |
| { |
| "epoch": 1.9367833901456462, |
| "grad_norm": 4.210537433624268, |
| "learning_rate": 2.6878421650655925e-05, |
| "loss": 0.7938, |
| "step": 6250 |
| }, |
| { |
| "epoch": 1.9522776572668112, |
| "grad_norm": 6.604791641235352, |
| "learning_rate": 2.6826774093585375e-05, |
| "loss": 0.7958, |
| "step": 6300 |
| }, |
| { |
| "epoch": 1.9677719243879763, |
| "grad_norm": 6.213857173919678, |
| "learning_rate": 2.677512653651482e-05, |
| "loss": 0.8463, |
| "step": 6350 |
| }, |
| { |
| "epoch": 1.9832661915091416, |
| "grad_norm": 4.303800582885742, |
| "learning_rate": 2.6723478979444274e-05, |
| "loss": 0.7909, |
| "step": 6400 |
| }, |
| { |
| "epoch": 1.998760458630307, |
| "grad_norm": 4.933095932006836, |
| "learning_rate": 2.6671831422373724e-05, |
| "loss": 0.7888, |
| "step": 6450 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.7590724288510766, |
| "eval_f1": 0.7587245577707713, |
| "eval_loss": 0.7010347247123718, |
| "eval_runtime": 25.4199, |
| "eval_samples_per_second": 261.252, |
| "eval_steps_per_second": 16.365, |
| "step": 6454 |
| }, |
| { |
| "epoch": 2.014254725751472, |
| "grad_norm": 4.0570244789123535, |
| "learning_rate": 2.6620183865303173e-05, |
| "loss": 0.7236, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.029748992872637, |
| "grad_norm": 5.307652473449707, |
| "learning_rate": 2.6568536308232623e-05, |
| "loss": 0.7213, |
| "step": 6550 |
| }, |
| { |
| "epoch": 2.045243259993802, |
| "grad_norm": 5.398072719573975, |
| "learning_rate": 2.651688875116207e-05, |
| "loss": 0.6839, |
| "step": 6600 |
| }, |
| { |
| "epoch": 2.0607375271149673, |
| "grad_norm": 5.296418190002441, |
| "learning_rate": 2.646524119409152e-05, |
| "loss": 0.6856, |
| "step": 6650 |
| }, |
| { |
| "epoch": 2.076231794236133, |
| "grad_norm": 4.173377990722656, |
| "learning_rate": 2.641359363702097e-05, |
| "loss": 0.7109, |
| "step": 6700 |
| }, |
| { |
| "epoch": 2.091726061357298, |
| "grad_norm": 5.590676784515381, |
| "learning_rate": 2.636194607995042e-05, |
| "loss": 0.6814, |
| "step": 6750 |
| }, |
| { |
| "epoch": 2.107220328478463, |
| "grad_norm": 8.112780570983887, |
| "learning_rate": 2.631029852287987e-05, |
| "loss": 0.7302, |
| "step": 6800 |
| }, |
| { |
| "epoch": 2.122714595599628, |
| "grad_norm": 6.514364242553711, |
| "learning_rate": 2.6258650965809318e-05, |
| "loss": 0.6917, |
| "step": 6850 |
| }, |
| { |
| "epoch": 2.138208862720793, |
| "grad_norm": 8.156841278076172, |
| "learning_rate": 2.6207003408738767e-05, |
| "loss": 0.6568, |
| "step": 6900 |
| }, |
| { |
| "epoch": 2.1537031298419587, |
| "grad_norm": 7.641481876373291, |
| "learning_rate": 2.6155355851668217e-05, |
| "loss": 0.6132, |
| "step": 6950 |
| }, |
| { |
| "epoch": 2.1691973969631237, |
| "grad_norm": 6.33613395690918, |
| "learning_rate": 2.6103708294597667e-05, |
| "loss": 0.6393, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.184691664084289, |
| "grad_norm": 4.2916436195373535, |
| "learning_rate": 2.6052060737527116e-05, |
| "loss": 0.6709, |
| "step": 7050 |
| }, |
| { |
| "epoch": 2.200185931205454, |
| "grad_norm": 4.763488292694092, |
| "learning_rate": 2.6000413180456563e-05, |
| "loss": 0.6919, |
| "step": 7100 |
| }, |
| { |
| "epoch": 2.215680198326619, |
| "grad_norm": 8.614394187927246, |
| "learning_rate": 2.5948765623386012e-05, |
| "loss": 0.6501, |
| "step": 7150 |
| }, |
| { |
| "epoch": 2.2311744654477845, |
| "grad_norm": 9.684426307678223, |
| "learning_rate": 2.5897118066315465e-05, |
| "loss": 0.6947, |
| "step": 7200 |
| }, |
| { |
| "epoch": 2.2466687325689496, |
| "grad_norm": 6.210818767547607, |
| "learning_rate": 2.5845470509244915e-05, |
| "loss": 0.6873, |
| "step": 7250 |
| }, |
| { |
| "epoch": 2.2621629996901147, |
| "grad_norm": 6.774372577667236, |
| "learning_rate": 2.5793822952174365e-05, |
| "loss": 0.7195, |
| "step": 7300 |
| }, |
| { |
| "epoch": 2.27765726681128, |
| "grad_norm": 6.014688491821289, |
| "learning_rate": 2.574217539510381e-05, |
| "loss": 0.6298, |
| "step": 7350 |
| }, |
| { |
| "epoch": 2.293151533932445, |
| "grad_norm": 14.994784355163574, |
| "learning_rate": 2.569052783803326e-05, |
| "loss": 0.7403, |
| "step": 7400 |
| }, |
| { |
| "epoch": 2.3086458010536104, |
| "grad_norm": 6.315488815307617, |
| "learning_rate": 2.563888028096271e-05, |
| "loss": 0.6679, |
| "step": 7450 |
| }, |
| { |
| "epoch": 2.3241400681747755, |
| "grad_norm": 8.482314109802246, |
| "learning_rate": 2.558723272389216e-05, |
| "loss": 0.7173, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.3396343352959406, |
| "grad_norm": 10.161298751831055, |
| "learning_rate": 2.553558516682161e-05, |
| "loss": 0.732, |
| "step": 7550 |
| }, |
| { |
| "epoch": 2.3551286024171056, |
| "grad_norm": 6.758267402648926, |
| "learning_rate": 2.548393760975106e-05, |
| "loss": 0.6192, |
| "step": 7600 |
| }, |
| { |
| "epoch": 2.3706228695382707, |
| "grad_norm": 4.528532981872559, |
| "learning_rate": 2.543229005268051e-05, |
| "loss": 0.7614, |
| "step": 7650 |
| }, |
| { |
| "epoch": 2.3861171366594363, |
| "grad_norm": 6.397975921630859, |
| "learning_rate": 2.538064249560996e-05, |
| "loss": 0.6951, |
| "step": 7700 |
| }, |
| { |
| "epoch": 2.4016114037806013, |
| "grad_norm": 5.440258979797363, |
| "learning_rate": 2.532899493853941e-05, |
| "loss": 0.7188, |
| "step": 7750 |
| }, |
| { |
| "epoch": 2.4171056709017664, |
| "grad_norm": 2.4531173706054688, |
| "learning_rate": 2.5277347381468858e-05, |
| "loss": 0.6347, |
| "step": 7800 |
| }, |
| { |
| "epoch": 2.4325999380229315, |
| "grad_norm": 15.269991874694824, |
| "learning_rate": 2.5225699824398304e-05, |
| "loss": 0.6601, |
| "step": 7850 |
| }, |
| { |
| "epoch": 2.4480942051440966, |
| "grad_norm": 6.438554286956787, |
| "learning_rate": 2.5174052267327754e-05, |
| "loss": 0.698, |
| "step": 7900 |
| }, |
| { |
| "epoch": 2.4635884722652617, |
| "grad_norm": 8.922213554382324, |
| "learning_rate": 2.5122404710257204e-05, |
| "loss": 0.6958, |
| "step": 7950 |
| }, |
| { |
| "epoch": 2.479082739386427, |
| "grad_norm": 6.724533557891846, |
| "learning_rate": 2.5070757153186657e-05, |
| "loss": 0.7131, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.4945770065075923, |
| "grad_norm": 5.617169380187988, |
| "learning_rate": 2.5019109596116107e-05, |
| "loss": 0.7711, |
| "step": 8050 |
| }, |
| { |
| "epoch": 2.5100712736287574, |
| "grad_norm": 6.441185474395752, |
| "learning_rate": 2.4967462039045553e-05, |
| "loss": 0.6612, |
| "step": 8100 |
| }, |
| { |
| "epoch": 2.5255655407499225, |
| "grad_norm": 6.033916473388672, |
| "learning_rate": 2.4915814481975003e-05, |
| "loss": 0.698, |
| "step": 8150 |
| }, |
| { |
| "epoch": 2.5410598078710875, |
| "grad_norm": 6.174665451049805, |
| "learning_rate": 2.4864166924904452e-05, |
| "loss": 0.6968, |
| "step": 8200 |
| }, |
| { |
| "epoch": 2.5565540749922526, |
| "grad_norm": 20.01167869567871, |
| "learning_rate": 2.4812519367833902e-05, |
| "loss": 0.6456, |
| "step": 8250 |
| }, |
| { |
| "epoch": 2.572048342113418, |
| "grad_norm": 10.404682159423828, |
| "learning_rate": 2.476087181076335e-05, |
| "loss": 0.6808, |
| "step": 8300 |
| }, |
| { |
| "epoch": 2.5875426092345832, |
| "grad_norm": 5.160488128662109, |
| "learning_rate": 2.47092242536928e-05, |
| "loss": 0.6913, |
| "step": 8350 |
| }, |
| { |
| "epoch": 2.6030368763557483, |
| "grad_norm": 6.452591896057129, |
| "learning_rate": 2.465757669662225e-05, |
| "loss": 0.6594, |
| "step": 8400 |
| }, |
| { |
| "epoch": 2.6185311434769134, |
| "grad_norm": 12.436300277709961, |
| "learning_rate": 2.46059291395517e-05, |
| "loss": 0.7255, |
| "step": 8450 |
| }, |
| { |
| "epoch": 2.6340254105980785, |
| "grad_norm": 6.132791042327881, |
| "learning_rate": 2.455428158248115e-05, |
| "loss": 0.6753, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.649519677719244, |
| "grad_norm": 10.712909698486328, |
| "learning_rate": 2.45026340254106e-05, |
| "loss": 0.6445, |
| "step": 8550 |
| }, |
| { |
| "epoch": 2.665013944840409, |
| "grad_norm": 12.122429847717285, |
| "learning_rate": 2.445098646834005e-05, |
| "loss": 0.6424, |
| "step": 8600 |
| }, |
| { |
| "epoch": 2.680508211961574, |
| "grad_norm": 8.575897216796875, |
| "learning_rate": 2.4399338911269496e-05, |
| "loss": 0.7242, |
| "step": 8650 |
| }, |
| { |
| "epoch": 2.6960024790827393, |
| "grad_norm": 8.740906715393066, |
| "learning_rate": 2.4347691354198946e-05, |
| "loss": 0.6949, |
| "step": 8700 |
| }, |
| { |
| "epoch": 2.7114967462039044, |
| "grad_norm": 4.871994972229004, |
| "learning_rate": 2.4296043797128395e-05, |
| "loss": 0.787, |
| "step": 8750 |
| }, |
| { |
| "epoch": 2.72699101332507, |
| "grad_norm": 6.642944812774658, |
| "learning_rate": 2.424439624005785e-05, |
| "loss": 0.6925, |
| "step": 8800 |
| }, |
| { |
| "epoch": 2.742485280446235, |
| "grad_norm": 12.149236679077148, |
| "learning_rate": 2.4192748682987298e-05, |
| "loss": 0.6972, |
| "step": 8850 |
| }, |
| { |
| "epoch": 2.7579795475674, |
| "grad_norm": 8.100613594055176, |
| "learning_rate": 2.4141101125916744e-05, |
| "loss": 0.7358, |
| "step": 8900 |
| }, |
| { |
| "epoch": 2.773473814688565, |
| "grad_norm": 12.28987979888916, |
| "learning_rate": 2.4089453568846194e-05, |
| "loss": 0.7176, |
| "step": 8950 |
| }, |
| { |
| "epoch": 2.78896808180973, |
| "grad_norm": 9.355488777160645, |
| "learning_rate": 2.4037806011775644e-05, |
| "loss": 0.6856, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.8044623489308957, |
| "grad_norm": 11.875406265258789, |
| "learning_rate": 2.3986158454705093e-05, |
| "loss": 0.6501, |
| "step": 9050 |
| }, |
| { |
| "epoch": 2.819956616052061, |
| "grad_norm": 8.061235427856445, |
| "learning_rate": 2.3934510897634543e-05, |
| "loss": 0.6823, |
| "step": 9100 |
| }, |
| { |
| "epoch": 2.835450883173226, |
| "grad_norm": 7.949320316314697, |
| "learning_rate": 2.388286334056399e-05, |
| "loss": 0.6764, |
| "step": 9150 |
| }, |
| { |
| "epoch": 2.850945150294391, |
| "grad_norm": 5.9249587059021, |
| "learning_rate": 2.3831215783493442e-05, |
| "loss": 0.6511, |
| "step": 9200 |
| }, |
| { |
| "epoch": 2.866439417415556, |
| "grad_norm": 8.400185585021973, |
| "learning_rate": 2.3779568226422892e-05, |
| "loss": 0.6515, |
| "step": 9250 |
| }, |
| { |
| "epoch": 2.8819336845367216, |
| "grad_norm": 11.487894058227539, |
| "learning_rate": 2.3727920669352342e-05, |
| "loss": 0.6719, |
| "step": 9300 |
| }, |
| { |
| "epoch": 2.8974279516578867, |
| "grad_norm": 8.317901611328125, |
| "learning_rate": 2.367627311228179e-05, |
| "loss": 0.6697, |
| "step": 9350 |
| }, |
| { |
| "epoch": 2.9129222187790518, |
| "grad_norm": 9.878332138061523, |
| "learning_rate": 2.3624625555211238e-05, |
| "loss": 0.6801, |
| "step": 9400 |
| }, |
| { |
| "epoch": 2.928416485900217, |
| "grad_norm": 8.855628967285156, |
| "learning_rate": 2.3572977998140687e-05, |
| "loss": 0.6445, |
| "step": 9450 |
| }, |
| { |
| "epoch": 2.943910753021382, |
| "grad_norm": 5.350094318389893, |
| "learning_rate": 2.3521330441070137e-05, |
| "loss": 0.6891, |
| "step": 9500 |
| }, |
| { |
| "epoch": 2.9594050201425475, |
| "grad_norm": 8.540812492370605, |
| "learning_rate": 2.3469682883999587e-05, |
| "loss": 0.6556, |
| "step": 9550 |
| }, |
| { |
| "epoch": 2.9748992872637126, |
| "grad_norm": 4.337664604187012, |
| "learning_rate": 2.341803532692904e-05, |
| "loss": 0.7013, |
| "step": 9600 |
| }, |
| { |
| "epoch": 2.9903935543848776, |
| "grad_norm": 7.002617359161377, |
| "learning_rate": 2.3366387769858486e-05, |
| "loss": 0.6518, |
| "step": 9650 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.777292576419214, |
| "eval_f1": 0.775972842719567, |
| "eval_loss": 0.69657963514328, |
| "eval_runtime": 25.4856, |
| "eval_samples_per_second": 260.579, |
| "eval_steps_per_second": 16.323, |
| "step": 9681 |
| }, |
| { |
| "epoch": 3.0058878215060427, |
| "grad_norm": 9.228548049926758, |
| "learning_rate": 2.3314740212787936e-05, |
| "loss": 0.5385, |
| "step": 9700 |
| }, |
| { |
| "epoch": 3.021382088627208, |
| "grad_norm": 4.332932472229004, |
| "learning_rate": 2.3263092655717385e-05, |
| "loss": 0.5513, |
| "step": 9750 |
| }, |
| { |
| "epoch": 3.036876355748373, |
| "grad_norm": 6.478864669799805, |
| "learning_rate": 2.3211445098646835e-05, |
| "loss": 0.4542, |
| "step": 9800 |
| }, |
| { |
| "epoch": 3.0523706228695384, |
| "grad_norm": 14.028499603271484, |
| "learning_rate": 2.3159797541576285e-05, |
| "loss": 0.4549, |
| "step": 9850 |
| }, |
| { |
| "epoch": 3.0678648899907035, |
| "grad_norm": 5.590787887573242, |
| "learning_rate": 2.310814998450573e-05, |
| "loss": 0.4624, |
| "step": 9900 |
| }, |
| { |
| "epoch": 3.0833591571118686, |
| "grad_norm": 5.623167514801025, |
| "learning_rate": 2.305650242743518e-05, |
| "loss": 0.4479, |
| "step": 9950 |
| }, |
| { |
| "epoch": 3.0988534242330337, |
| "grad_norm": 10.343826293945312, |
| "learning_rate": 2.3004854870364634e-05, |
| "loss": 0.5079, |
| "step": 10000 |
| }, |
| { |
| "epoch": 3.1143476913541988, |
| "grad_norm": 2.780686616897583, |
| "learning_rate": 2.2953207313294084e-05, |
| "loss": 0.43, |
| "step": 10050 |
| }, |
| { |
| "epoch": 3.1298419584753643, |
| "grad_norm": 10.917914390563965, |
| "learning_rate": 2.2901559756223533e-05, |
| "loss": 0.4932, |
| "step": 10100 |
| }, |
| { |
| "epoch": 3.1453362255965294, |
| "grad_norm": 14.870561599731445, |
| "learning_rate": 2.284991219915298e-05, |
| "loss": 0.483, |
| "step": 10150 |
| }, |
| { |
| "epoch": 3.1608304927176945, |
| "grad_norm": 15.64564323425293, |
| "learning_rate": 2.279826464208243e-05, |
| "loss": 0.5047, |
| "step": 10200 |
| }, |
| { |
| "epoch": 3.1763247598388595, |
| "grad_norm": 8.148391723632812, |
| "learning_rate": 2.274661708501188e-05, |
| "loss": 0.498, |
| "step": 10250 |
| }, |
| { |
| "epoch": 3.1918190269600246, |
| "grad_norm": 9.916448593139648, |
| "learning_rate": 2.269496952794133e-05, |
| "loss": 0.4896, |
| "step": 10300 |
| }, |
| { |
| "epoch": 3.20731329408119, |
| "grad_norm": 10.014134407043457, |
| "learning_rate": 2.2643321970870778e-05, |
| "loss": 0.4572, |
| "step": 10350 |
| }, |
| { |
| "epoch": 3.2228075612023552, |
| "grad_norm": 9.647527694702148, |
| "learning_rate": 2.2591674413800228e-05, |
| "loss": 0.4965, |
| "step": 10400 |
| }, |
| { |
| "epoch": 3.2383018283235203, |
| "grad_norm": 11.77087116241455, |
| "learning_rate": 2.2540026856729678e-05, |
| "loss": 0.512, |
| "step": 10450 |
| }, |
| { |
| "epoch": 3.2537960954446854, |
| "grad_norm": 3.3613386154174805, |
| "learning_rate": 2.2488379299659127e-05, |
| "loss": 0.5522, |
| "step": 10500 |
| }, |
| { |
| "epoch": 3.2692903625658505, |
| "grad_norm": 17.92693519592285, |
| "learning_rate": 2.2436731742588577e-05, |
| "loss": 0.4915, |
| "step": 10550 |
| }, |
| { |
| "epoch": 3.2847846296870156, |
| "grad_norm": 8.389365196228027, |
| "learning_rate": 2.2385084185518027e-05, |
| "loss": 0.5343, |
| "step": 10600 |
| }, |
| { |
| "epoch": 3.300278896808181, |
| "grad_norm": 9.849445343017578, |
| "learning_rate": 2.2333436628447473e-05, |
| "loss": 0.4925, |
| "step": 10650 |
| }, |
| { |
| "epoch": 3.315773163929346, |
| "grad_norm": 7.494227886199951, |
| "learning_rate": 2.2281789071376923e-05, |
| "loss": 0.5242, |
| "step": 10700 |
| }, |
| { |
| "epoch": 3.3312674310505113, |
| "grad_norm": 12.774617195129395, |
| "learning_rate": 2.2230141514306372e-05, |
| "loss": 0.522, |
| "step": 10750 |
| }, |
| { |
| "epoch": 3.3467616981716763, |
| "grad_norm": 4.167229175567627, |
| "learning_rate": 2.2178493957235822e-05, |
| "loss": 0.4852, |
| "step": 10800 |
| }, |
| { |
| "epoch": 3.3622559652928414, |
| "grad_norm": 7.823596000671387, |
| "learning_rate": 2.2126846400165275e-05, |
| "loss": 0.521, |
| "step": 10850 |
| }, |
| { |
| "epoch": 3.377750232414007, |
| "grad_norm": 9.712186813354492, |
| "learning_rate": 2.2075198843094725e-05, |
| "loss": 0.4931, |
| "step": 10900 |
| }, |
| { |
| "epoch": 3.393244499535172, |
| "grad_norm": 9.726935386657715, |
| "learning_rate": 2.202355128602417e-05, |
| "loss": 0.531, |
| "step": 10950 |
| }, |
| { |
| "epoch": 3.408738766656337, |
| "grad_norm": 8.613348007202148, |
| "learning_rate": 2.197190372895362e-05, |
| "loss": 0.4902, |
| "step": 11000 |
| }, |
| { |
| "epoch": 3.424233033777502, |
| "grad_norm": 17.698650360107422, |
| "learning_rate": 2.192025617188307e-05, |
| "loss": 0.4967, |
| "step": 11050 |
| }, |
| { |
| "epoch": 3.4397273008986673, |
| "grad_norm": 13.304680824279785, |
| "learning_rate": 2.186860861481252e-05, |
| "loss": 0.4998, |
| "step": 11100 |
| }, |
| { |
| "epoch": 3.455221568019833, |
| "grad_norm": 9.090615272521973, |
| "learning_rate": 2.181696105774197e-05, |
| "loss": 0.4797, |
| "step": 11150 |
| }, |
| { |
| "epoch": 3.470715835140998, |
| "grad_norm": 6.544071197509766, |
| "learning_rate": 2.176531350067142e-05, |
| "loss": 0.5405, |
| "step": 11200 |
| }, |
| { |
| "epoch": 3.486210102262163, |
| "grad_norm": 10.908158302307129, |
| "learning_rate": 2.171366594360087e-05, |
| "loss": 0.4663, |
| "step": 11250 |
| }, |
| { |
| "epoch": 3.501704369383328, |
| "grad_norm": 9.044700622558594, |
| "learning_rate": 2.166201838653032e-05, |
| "loss": 0.4755, |
| "step": 11300 |
| }, |
| { |
| "epoch": 3.517198636504493, |
| "grad_norm": 7.633232116699219, |
| "learning_rate": 2.161037082945977e-05, |
| "loss": 0.4182, |
| "step": 11350 |
| }, |
| { |
| "epoch": 3.5326929036256587, |
| "grad_norm": 5.32473087310791, |
| "learning_rate": 2.1558723272389218e-05, |
| "loss": 0.4987, |
| "step": 11400 |
| }, |
| { |
| "epoch": 3.5481871707468238, |
| "grad_norm": 9.8456392288208, |
| "learning_rate": 2.1507075715318664e-05, |
| "loss": 0.587, |
| "step": 11450 |
| }, |
| { |
| "epoch": 3.563681437867989, |
| "grad_norm": 12.52115535736084, |
| "learning_rate": 2.1455428158248114e-05, |
| "loss": 0.5331, |
| "step": 11500 |
| }, |
| { |
| "epoch": 3.579175704989154, |
| "grad_norm": 18.225566864013672, |
| "learning_rate": 2.1403780601177564e-05, |
| "loss": 0.4794, |
| "step": 11550 |
| }, |
| { |
| "epoch": 3.594669972110319, |
| "grad_norm": 8.749368667602539, |
| "learning_rate": 2.1352133044107013e-05, |
| "loss": 0.4967, |
| "step": 11600 |
| }, |
| { |
| "epoch": 3.6101642392314846, |
| "grad_norm": 8.760223388671875, |
| "learning_rate": 2.1300485487036466e-05, |
| "loss": 0.4963, |
| "step": 11650 |
| }, |
| { |
| "epoch": 3.6256585063526496, |
| "grad_norm": 15.518270492553711, |
| "learning_rate": 2.1248837929965913e-05, |
| "loss": 0.4456, |
| "step": 11700 |
| }, |
| { |
| "epoch": 3.6411527734738147, |
| "grad_norm": 9.451664924621582, |
| "learning_rate": 2.1197190372895362e-05, |
| "loss": 0.5235, |
| "step": 11750 |
| }, |
| { |
| "epoch": 3.65664704059498, |
| "grad_norm": 17.736055374145508, |
| "learning_rate": 2.1145542815824812e-05, |
| "loss": 0.4876, |
| "step": 11800 |
| }, |
| { |
| "epoch": 3.672141307716145, |
| "grad_norm": 24.323490142822266, |
| "learning_rate": 2.1093895258754262e-05, |
| "loss": 0.483, |
| "step": 11850 |
| }, |
| { |
| "epoch": 3.6876355748373104, |
| "grad_norm": 15.389254570007324, |
| "learning_rate": 2.104224770168371e-05, |
| "loss": 0.5127, |
| "step": 11900 |
| }, |
| { |
| "epoch": 3.7031298419584755, |
| "grad_norm": 11.283272743225098, |
| "learning_rate": 2.0990600144613158e-05, |
| "loss": 0.5282, |
| "step": 11950 |
| }, |
| { |
| "epoch": 3.7186241090796406, |
| "grad_norm": 11.002310752868652, |
| "learning_rate": 2.0938952587542607e-05, |
| "loss": 0.5459, |
| "step": 12000 |
| }, |
| { |
| "epoch": 3.7341183762008057, |
| "grad_norm": 6.972140312194824, |
| "learning_rate": 2.088730503047206e-05, |
| "loss": 0.536, |
| "step": 12050 |
| }, |
| { |
| "epoch": 3.7496126433219708, |
| "grad_norm": 4.202858924865723, |
| "learning_rate": 2.083565747340151e-05, |
| "loss": 0.5736, |
| "step": 12100 |
| }, |
| { |
| "epoch": 3.7651069104431363, |
| "grad_norm": 15.748515129089355, |
| "learning_rate": 2.078400991633096e-05, |
| "loss": 0.4715, |
| "step": 12150 |
| }, |
| { |
| "epoch": 3.7806011775643014, |
| "grad_norm": 6.696774482727051, |
| "learning_rate": 2.0732362359260406e-05, |
| "loss": 0.5545, |
| "step": 12200 |
| }, |
| { |
| "epoch": 3.7960954446854664, |
| "grad_norm": 7.366288661956787, |
| "learning_rate": 2.0680714802189856e-05, |
| "loss": 0.5736, |
| "step": 12250 |
| }, |
| { |
| "epoch": 3.8115897118066315, |
| "grad_norm": 13.58438777923584, |
| "learning_rate": 2.0629067245119306e-05, |
| "loss": 0.4255, |
| "step": 12300 |
| }, |
| { |
| "epoch": 3.8270839789277966, |
| "grad_norm": 9.109688758850098, |
| "learning_rate": 2.0577419688048755e-05, |
| "loss": 0.4565, |
| "step": 12350 |
| }, |
| { |
| "epoch": 3.842578246048962, |
| "grad_norm": 11.448044776916504, |
| "learning_rate": 2.0525772130978205e-05, |
| "loss": 0.5117, |
| "step": 12400 |
| }, |
| { |
| "epoch": 3.858072513170127, |
| "grad_norm": 6.876945495605469, |
| "learning_rate": 2.0474124573907655e-05, |
| "loss": 0.5543, |
| "step": 12450 |
| }, |
| { |
| "epoch": 3.8735667802912923, |
| "grad_norm": 11.25009536743164, |
| "learning_rate": 2.0422477016837104e-05, |
| "loss": 0.456, |
| "step": 12500 |
| }, |
| { |
| "epoch": 3.8890610474124574, |
| "grad_norm": 13.992502212524414, |
| "learning_rate": 2.0370829459766554e-05, |
| "loss": 0.4907, |
| "step": 12550 |
| }, |
| { |
| "epoch": 3.9045553145336225, |
| "grad_norm": 11.92656421661377, |
| "learning_rate": 2.0319181902696004e-05, |
| "loss": 0.4841, |
| "step": 12600 |
| }, |
| { |
| "epoch": 3.9200495816547876, |
| "grad_norm": 7.212582111358643, |
| "learning_rate": 2.0267534345625453e-05, |
| "loss": 0.5529, |
| "step": 12650 |
| }, |
| { |
| "epoch": 3.9355438487759526, |
| "grad_norm": 14.616645812988281, |
| "learning_rate": 2.02158867885549e-05, |
| "loss": 0.5366, |
| "step": 12700 |
| }, |
| { |
| "epoch": 3.951038115897118, |
| "grad_norm": 9.052292823791504, |
| "learning_rate": 2.016423923148435e-05, |
| "loss": 0.5459, |
| "step": 12750 |
| }, |
| { |
| "epoch": 3.9665323830182833, |
| "grad_norm": 18.27539825439453, |
| "learning_rate": 2.01125916744138e-05, |
| "loss": 0.5631, |
| "step": 12800 |
| }, |
| { |
| "epoch": 3.9820266501394483, |
| "grad_norm": 12.429372787475586, |
| "learning_rate": 2.0060944117343252e-05, |
| "loss": 0.4885, |
| "step": 12850 |
| }, |
| { |
| "epoch": 3.9975209172606134, |
| "grad_norm": 4.481673240661621, |
| "learning_rate": 2.00092965602727e-05, |
| "loss": 0.4565, |
| "step": 12900 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.7863273603372986, |
| "eval_f1": 0.7874334496964743, |
| "eval_loss": 0.7491569519042969, |
| "eval_runtime": 25.5609, |
| "eval_samples_per_second": 259.811, |
| "eval_steps_per_second": 16.275, |
| "step": 12908 |
| }, |
| { |
| "epoch": 4.0130151843817785, |
| "grad_norm": 21.53974151611328, |
| "learning_rate": 1.9957649003202148e-05, |
| "loss": 0.2695, |
| "step": 12950 |
| }, |
| { |
| "epoch": 4.028509451502944, |
| "grad_norm": 9.07942008972168, |
| "learning_rate": 1.9906001446131598e-05, |
| "loss": 0.3282, |
| "step": 13000 |
| }, |
| { |
| "epoch": 4.044003718624109, |
| "grad_norm": 16.323549270629883, |
| "learning_rate": 1.9854353889061047e-05, |
| "loss": 0.3079, |
| "step": 13050 |
| }, |
| { |
| "epoch": 4.059497985745274, |
| "grad_norm": 6.679697036743164, |
| "learning_rate": 1.9802706331990497e-05, |
| "loss": 0.3889, |
| "step": 13100 |
| }, |
| { |
| "epoch": 4.07499225286644, |
| "grad_norm": 17.357574462890625, |
| "learning_rate": 1.9751058774919947e-05, |
| "loss": 0.3336, |
| "step": 13150 |
| }, |
| { |
| "epoch": 4.090486519987604, |
| "grad_norm": 5.116195201873779, |
| "learning_rate": 1.9699411217849393e-05, |
| "loss": 0.3102, |
| "step": 13200 |
| }, |
| { |
| "epoch": 4.10598078710877, |
| "grad_norm": 29.05538558959961, |
| "learning_rate": 1.9647763660778846e-05, |
| "loss": 0.2902, |
| "step": 13250 |
| }, |
| { |
| "epoch": 4.1214750542299345, |
| "grad_norm": 6.254473686218262, |
| "learning_rate": 1.9596116103708296e-05, |
| "loss": 0.3816, |
| "step": 13300 |
| }, |
| { |
| "epoch": 4.1369693213511, |
| "grad_norm": 11.854185104370117, |
| "learning_rate": 1.9544468546637745e-05, |
| "loss": 0.3455, |
| "step": 13350 |
| }, |
| { |
| "epoch": 4.152463588472266, |
| "grad_norm": 16.399444580078125, |
| "learning_rate": 1.9492820989567195e-05, |
| "loss": 0.3713, |
| "step": 13400 |
| }, |
| { |
| "epoch": 4.16795785559343, |
| "grad_norm": 18.26226234436035, |
| "learning_rate": 1.9441173432496645e-05, |
| "loss": 0.2957, |
| "step": 13450 |
| }, |
| { |
| "epoch": 4.183452122714596, |
| "grad_norm": 6.590181350708008, |
| "learning_rate": 1.938952587542609e-05, |
| "loss": 0.2905, |
| "step": 13500 |
| }, |
| { |
| "epoch": 4.19894638983576, |
| "grad_norm": 5.3814849853515625, |
| "learning_rate": 1.933787831835554e-05, |
| "loss": 0.3782, |
| "step": 13550 |
| }, |
| { |
| "epoch": 4.214440656956926, |
| "grad_norm": 8.641956329345703, |
| "learning_rate": 1.928623076128499e-05, |
| "loss": 0.3211, |
| "step": 13600 |
| }, |
| { |
| "epoch": 4.2299349240780915, |
| "grad_norm": 14.346405982971191, |
| "learning_rate": 1.9234583204214443e-05, |
| "loss": 0.3274, |
| "step": 13650 |
| }, |
| { |
| "epoch": 4.245429191199256, |
| "grad_norm": 15.577725410461426, |
| "learning_rate": 1.9182935647143893e-05, |
| "loss": 0.3568, |
| "step": 13700 |
| }, |
| { |
| "epoch": 4.260923458320422, |
| "grad_norm": 9.855398178100586, |
| "learning_rate": 1.913128809007334e-05, |
| "loss": 0.3008, |
| "step": 13750 |
| }, |
| { |
| "epoch": 4.276417725441586, |
| "grad_norm": 15.720294952392578, |
| "learning_rate": 1.907964053300279e-05, |
| "loss": 0.2979, |
| "step": 13800 |
| }, |
| { |
| "epoch": 4.291911992562752, |
| "grad_norm": 13.976778030395508, |
| "learning_rate": 1.902799297593224e-05, |
| "loss": 0.3389, |
| "step": 13850 |
| }, |
| { |
| "epoch": 4.307406259683917, |
| "grad_norm": 19.255727767944336, |
| "learning_rate": 1.897634541886169e-05, |
| "loss": 0.3636, |
| "step": 13900 |
| }, |
| { |
| "epoch": 4.322900526805082, |
| "grad_norm": 10.70836353302002, |
| "learning_rate": 1.8924697861791138e-05, |
| "loss": 0.3455, |
| "step": 13950 |
| }, |
| { |
| "epoch": 4.3383947939262475, |
| "grad_norm": 0.9212763905525208, |
| "learning_rate": 1.8873050304720584e-05, |
| "loss": 0.3703, |
| "step": 14000 |
| }, |
| { |
| "epoch": 4.353889061047412, |
| "grad_norm": 10.232623100280762, |
| "learning_rate": 1.8821402747650037e-05, |
| "loss": 0.3247, |
| "step": 14050 |
| }, |
| { |
| "epoch": 4.369383328168578, |
| "grad_norm": 11.130922317504883, |
| "learning_rate": 1.8769755190579487e-05, |
| "loss": 0.3107, |
| "step": 14100 |
| }, |
| { |
| "epoch": 4.384877595289743, |
| "grad_norm": 10.536752700805664, |
| "learning_rate": 1.8718107633508937e-05, |
| "loss": 0.3614, |
| "step": 14150 |
| }, |
| { |
| "epoch": 4.400371862410908, |
| "grad_norm": 15.330968856811523, |
| "learning_rate": 1.8666460076438386e-05, |
| "loss": 0.3984, |
| "step": 14200 |
| }, |
| { |
| "epoch": 4.415866129532073, |
| "grad_norm": 7.436588764190674, |
| "learning_rate": 1.8614812519367833e-05, |
| "loss": 0.3257, |
| "step": 14250 |
| }, |
| { |
| "epoch": 4.431360396653238, |
| "grad_norm": 7.192384243011475, |
| "learning_rate": 1.8563164962297282e-05, |
| "loss": 0.3254, |
| "step": 14300 |
| }, |
| { |
| "epoch": 4.4468546637744035, |
| "grad_norm": 7.792993545532227, |
| "learning_rate": 1.8511517405226732e-05, |
| "loss": 0.3392, |
| "step": 14350 |
| }, |
| { |
| "epoch": 4.462348930895569, |
| "grad_norm": 12.411416053771973, |
| "learning_rate": 1.8459869848156182e-05, |
| "loss": 0.3383, |
| "step": 14400 |
| }, |
| { |
| "epoch": 4.477843198016734, |
| "grad_norm": 17.897613525390625, |
| "learning_rate": 1.8408222291085635e-05, |
| "loss": 0.3392, |
| "step": 14450 |
| }, |
| { |
| "epoch": 4.493337465137899, |
| "grad_norm": 23.59228515625, |
| "learning_rate": 1.835657473401508e-05, |
| "loss": 0.3055, |
| "step": 14500 |
| }, |
| { |
| "epoch": 4.508831732259064, |
| "grad_norm": 13.722383499145508, |
| "learning_rate": 1.830492717694453e-05, |
| "loss": 0.3997, |
| "step": 14550 |
| }, |
| { |
| "epoch": 4.524325999380229, |
| "grad_norm": 17.811538696289062, |
| "learning_rate": 1.825327961987398e-05, |
| "loss": 0.266, |
| "step": 14600 |
| }, |
| { |
| "epoch": 4.539820266501394, |
| "grad_norm": 10.993431091308594, |
| "learning_rate": 1.820163206280343e-05, |
| "loss": 0.2634, |
| "step": 14650 |
| }, |
| { |
| "epoch": 4.55531453362256, |
| "grad_norm": 5.25628137588501, |
| "learning_rate": 1.814998450573288e-05, |
| "loss": 0.3563, |
| "step": 14700 |
| }, |
| { |
| "epoch": 4.570808800743725, |
| "grad_norm": 16.91241455078125, |
| "learning_rate": 1.8098336948662326e-05, |
| "loss": 0.3298, |
| "step": 14750 |
| }, |
| { |
| "epoch": 4.58630306786489, |
| "grad_norm": 27.083995819091797, |
| "learning_rate": 1.8046689391591776e-05, |
| "loss": 0.36, |
| "step": 14800 |
| }, |
| { |
| "epoch": 4.601797334986055, |
| "grad_norm": 19.726198196411133, |
| "learning_rate": 1.799504183452123e-05, |
| "loss": 0.3224, |
| "step": 14850 |
| }, |
| { |
| "epoch": 4.617291602107221, |
| "grad_norm": 6.92859411239624, |
| "learning_rate": 1.794339427745068e-05, |
| "loss": 0.335, |
| "step": 14900 |
| }, |
| { |
| "epoch": 4.632785869228385, |
| "grad_norm": 15.97644329071045, |
| "learning_rate": 1.7891746720380128e-05, |
| "loss": 0.3303, |
| "step": 14950 |
| }, |
| { |
| "epoch": 4.648280136349551, |
| "grad_norm": 24.399837493896484, |
| "learning_rate": 1.7840099163309575e-05, |
| "loss": 0.3492, |
| "step": 15000 |
| }, |
| { |
| "epoch": 4.663774403470716, |
| "grad_norm": 10.855368614196777, |
| "learning_rate": 1.7788451606239024e-05, |
| "loss": 0.3278, |
| "step": 15050 |
| }, |
| { |
| "epoch": 4.679268670591881, |
| "grad_norm": 20.869380950927734, |
| "learning_rate": 1.7736804049168474e-05, |
| "loss": 0.2913, |
| "step": 15100 |
| }, |
| { |
| "epoch": 4.694762937713046, |
| "grad_norm": 6.862913131713867, |
| "learning_rate": 1.7685156492097924e-05, |
| "loss": 0.3133, |
| "step": 15150 |
| }, |
| { |
| "epoch": 4.710257204834211, |
| "grad_norm": 19.621482849121094, |
| "learning_rate": 1.7633508935027373e-05, |
| "loss": 0.3456, |
| "step": 15200 |
| }, |
| { |
| "epoch": 4.725751471955377, |
| "grad_norm": 19.79738998413086, |
| "learning_rate": 1.7581861377956823e-05, |
| "loss": 0.3562, |
| "step": 15250 |
| }, |
| { |
| "epoch": 4.7412457390765415, |
| "grad_norm": 3.2352957725524902, |
| "learning_rate": 1.7530213820886273e-05, |
| "loss": 0.3565, |
| "step": 15300 |
| }, |
| { |
| "epoch": 4.756740006197707, |
| "grad_norm": 10.959282875061035, |
| "learning_rate": 1.7478566263815722e-05, |
| "loss": 0.3472, |
| "step": 15350 |
| }, |
| { |
| "epoch": 4.7722342733188725, |
| "grad_norm": 3.22469162940979, |
| "learning_rate": 1.7426918706745172e-05, |
| "loss": 0.3367, |
| "step": 15400 |
| }, |
| { |
| "epoch": 4.787728540440037, |
| "grad_norm": 7.619373798370361, |
| "learning_rate": 1.737527114967462e-05, |
| "loss": 0.319, |
| "step": 15450 |
| }, |
| { |
| "epoch": 4.803222807561203, |
| "grad_norm": 24.706689834594727, |
| "learning_rate": 1.7323623592604068e-05, |
| "loss": 0.3939, |
| "step": 15500 |
| }, |
| { |
| "epoch": 4.818717074682367, |
| "grad_norm": 15.918986320495605, |
| "learning_rate": 1.7271976035533518e-05, |
| "loss": 0.3618, |
| "step": 15550 |
| }, |
| { |
| "epoch": 4.834211341803533, |
| "grad_norm": 14.518546104431152, |
| "learning_rate": 1.7220328478462967e-05, |
| "loss": 0.4082, |
| "step": 15600 |
| }, |
| { |
| "epoch": 4.8497056089246975, |
| "grad_norm": 6.084866046905518, |
| "learning_rate": 1.716868092139242e-05, |
| "loss": 0.3594, |
| "step": 15650 |
| }, |
| { |
| "epoch": 4.865199876045863, |
| "grad_norm": 18.435983657836914, |
| "learning_rate": 1.711703336432187e-05, |
| "loss": 0.3182, |
| "step": 15700 |
| }, |
| { |
| "epoch": 4.8806941431670285, |
| "grad_norm": 14.745248794555664, |
| "learning_rate": 1.7065385807251316e-05, |
| "loss": 0.3375, |
| "step": 15750 |
| }, |
| { |
| "epoch": 4.896188410288193, |
| "grad_norm": 11.518832206726074, |
| "learning_rate": 1.7013738250180766e-05, |
| "loss": 0.3371, |
| "step": 15800 |
| }, |
| { |
| "epoch": 4.911682677409359, |
| "grad_norm": 17.58115005493164, |
| "learning_rate": 1.6962090693110216e-05, |
| "loss": 0.3851, |
| "step": 15850 |
| }, |
| { |
| "epoch": 4.927176944530523, |
| "grad_norm": 16.769134521484375, |
| "learning_rate": 1.6910443136039665e-05, |
| "loss": 0.3009, |
| "step": 15900 |
| }, |
| { |
| "epoch": 4.942671211651689, |
| "grad_norm": 21.518749237060547, |
| "learning_rate": 1.6858795578969115e-05, |
| "loss": 0.3155, |
| "step": 15950 |
| }, |
| { |
| "epoch": 4.958165478772854, |
| "grad_norm": 11.044340133666992, |
| "learning_rate": 1.6807148021898565e-05, |
| "loss": 0.3648, |
| "step": 16000 |
| }, |
| { |
| "epoch": 4.973659745894019, |
| "grad_norm": 3.9900588989257812, |
| "learning_rate": 1.6755500464828014e-05, |
| "loss": 0.3383, |
| "step": 16050 |
| }, |
| { |
| "epoch": 4.989154013015185, |
| "grad_norm": 16.869041442871094, |
| "learning_rate": 1.6703852907757464e-05, |
| "loss": 0.3471, |
| "step": 16100 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.7964162023791598, |
| "eval_f1": 0.7960859271865419, |
| "eval_loss": 0.8064730167388916, |
| "eval_runtime": 25.5199, |
| "eval_samples_per_second": 260.228, |
| "eval_steps_per_second": 16.301, |
| "step": 16135 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 32270, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 2, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.39622791611904e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|