| { | |
| "best_metric": 0.759579488098438, | |
| "best_model_checkpoint": "bert-finetuned-sem_eval-english\\checkpoint-85976", | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 107470, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.023262305759746905, | |
| "grad_norm": 0.5290595293045044, | |
| "learning_rate": 1.9906950776961013e-05, | |
| "loss": 0.2421, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.04652461151949381, | |
| "grad_norm": 0.7135451436042786, | |
| "learning_rate": 1.9813901553922027e-05, | |
| "loss": 0.153, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.06978691727924072, | |
| "grad_norm": 0.6057224869728088, | |
| "learning_rate": 1.9720852330883038e-05, | |
| "loss": 0.135, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.09304922303898762, | |
| "grad_norm": 2.033938407897949, | |
| "learning_rate": 1.962780310784405e-05, | |
| "loss": 0.1273, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.11631152879873453, | |
| "grad_norm": 1.6713635921478271, | |
| "learning_rate": 1.9534753884805063e-05, | |
| "loss": 0.1221, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.13957383455848144, | |
| "grad_norm": 0.4952755570411682, | |
| "learning_rate": 1.9441704661766077e-05, | |
| "loss": 0.116, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.16283614031822835, | |
| "grad_norm": 0.5051670670509338, | |
| "learning_rate": 1.9348655438727088e-05, | |
| "loss": 0.1147, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.18609844607797524, | |
| "grad_norm": 0.9887399077415466, | |
| "learning_rate": 1.92556062156881e-05, | |
| "loss": 0.1152, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.20936075183772215, | |
| "grad_norm": 0.7931112051010132, | |
| "learning_rate": 1.9162556992649114e-05, | |
| "loss": 0.1138, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.23262305759746907, | |
| "grad_norm": 0.7627053260803223, | |
| "learning_rate": 1.9069507769610124e-05, | |
| "loss": 0.113, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.25588536335721596, | |
| "grad_norm": 0.6043164730072021, | |
| "learning_rate": 1.897645854657114e-05, | |
| "loss": 0.1119, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.27914766911696287, | |
| "grad_norm": 0.5762751698493958, | |
| "learning_rate": 1.888340932353215e-05, | |
| "loss": 0.1107, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.3024099748767098, | |
| "grad_norm": 0.6853972673416138, | |
| "learning_rate": 1.8790360100493164e-05, | |
| "loss": 0.1075, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.3256722806364567, | |
| "grad_norm": 0.6911000609397888, | |
| "learning_rate": 1.8697310877454175e-05, | |
| "loss": 0.1079, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.3489345863962036, | |
| "grad_norm": 0.5828253626823425, | |
| "learning_rate": 1.860426165441519e-05, | |
| "loss": 0.1091, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.3721968921559505, | |
| "grad_norm": 0.782696008682251, | |
| "learning_rate": 1.85112124313762e-05, | |
| "loss": 0.1084, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.3954591979156974, | |
| "grad_norm": 0.8830183148384094, | |
| "learning_rate": 1.841816320833721e-05, | |
| "loss": 0.1059, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.4187215036754443, | |
| "grad_norm": 1.4506700038909912, | |
| "learning_rate": 1.8325113985298225e-05, | |
| "loss": 0.1058, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.4419838094351912, | |
| "grad_norm": 1.0449475049972534, | |
| "learning_rate": 1.8232064762259236e-05, | |
| "loss": 0.1067, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.46524611519493814, | |
| "grad_norm": 1.4629307985305786, | |
| "learning_rate": 1.8139015539220247e-05, | |
| "loss": 0.1031, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.48850842095468505, | |
| "grad_norm": 0.5856618285179138, | |
| "learning_rate": 1.804596631618126e-05, | |
| "loss": 0.1064, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.5117707267144319, | |
| "grad_norm": 0.7551602125167847, | |
| "learning_rate": 1.7952917093142276e-05, | |
| "loss": 0.1029, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.5350330324741789, | |
| "grad_norm": 0.762008011341095, | |
| "learning_rate": 1.7859867870103287e-05, | |
| "loss": 0.1043, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.5582953382339257, | |
| "grad_norm": 0.9580531716346741, | |
| "learning_rate": 1.7766818647064298e-05, | |
| "loss": 0.1048, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.5815576439936726, | |
| "grad_norm": 1.0684598684310913, | |
| "learning_rate": 1.7673769424025312e-05, | |
| "loss": 0.1041, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.6048199497534196, | |
| "grad_norm": 1.6208114624023438, | |
| "learning_rate": 1.7580720200986323e-05, | |
| "loss": 0.1005, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.6280822555131664, | |
| "grad_norm": 1.0143775939941406, | |
| "learning_rate": 1.7487670977947334e-05, | |
| "loss": 0.1012, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.6513445612729134, | |
| "grad_norm": 0.8733798861503601, | |
| "learning_rate": 1.7394621754908348e-05, | |
| "loss": 0.1009, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.6746068670326603, | |
| "grad_norm": 1.1354494094848633, | |
| "learning_rate": 1.730157253186936e-05, | |
| "loss": 0.1012, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.6978691727924072, | |
| "grad_norm": 0.852428674697876, | |
| "learning_rate": 1.720852330883037e-05, | |
| "loss": 0.1003, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.7211314785521541, | |
| "grad_norm": 0.6298101544380188, | |
| "learning_rate": 1.7115474085791384e-05, | |
| "loss": 0.1009, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.744393784311901, | |
| "grad_norm": 0.6968585252761841, | |
| "learning_rate": 1.70224248627524e-05, | |
| "loss": 0.1002, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.7676560900716479, | |
| "grad_norm": 0.7687616944313049, | |
| "learning_rate": 1.692937563971341e-05, | |
| "loss": 0.0976, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.7909183958313948, | |
| "grad_norm": 0.9958521723747253, | |
| "learning_rate": 1.6836326416674424e-05, | |
| "loss": 0.0996, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.8141807015911418, | |
| "grad_norm": 1.9353946447372437, | |
| "learning_rate": 1.6743277193635435e-05, | |
| "loss": 0.0982, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.8374430073508886, | |
| "grad_norm": 0.4970337450504303, | |
| "learning_rate": 1.6650227970596446e-05, | |
| "loss": 0.1013, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.8607053131106355, | |
| "grad_norm": 1.3484145402908325, | |
| "learning_rate": 1.655717874755746e-05, | |
| "loss": 0.1001, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.8839676188703824, | |
| "grad_norm": 0.9516633749008179, | |
| "learning_rate": 1.646412952451847e-05, | |
| "loss": 0.0978, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.9072299246301293, | |
| "grad_norm": 1.358478307723999, | |
| "learning_rate": 1.6371080301479485e-05, | |
| "loss": 0.1016, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.9304922303898763, | |
| "grad_norm": 0.5643423795700073, | |
| "learning_rate": 1.6278031078440496e-05, | |
| "loss": 0.0983, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.9537545361496231, | |
| "grad_norm": 0.532564103603363, | |
| "learning_rate": 1.618498185540151e-05, | |
| "loss": 0.0972, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.9770168419093701, | |
| "grad_norm": 1.3922828435897827, | |
| "learning_rate": 1.609193263236252e-05, | |
| "loss": 0.0976, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.5307062436028659, | |
| "eval_f1": 0.7361535927721048, | |
| "eval_loss": 0.09595564752817154, | |
| "eval_roc_auc": 0.8461935311283144, | |
| "eval_runtime": 24.262, | |
| "eval_samples_per_second": 885.911, | |
| "eval_steps_per_second": 110.749, | |
| "step": 21494 | |
| }, | |
| { | |
| "epoch": 1.0002791476691169, | |
| "grad_norm": 0.6412343978881836, | |
| "learning_rate": 1.5998883409323532e-05, | |
| "loss": 0.0986, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.0235414534288638, | |
| "grad_norm": 0.6500938534736633, | |
| "learning_rate": 1.5905834186284547e-05, | |
| "loss": 0.0889, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.0468037591886108, | |
| "grad_norm": 0.49381542205810547, | |
| "learning_rate": 1.5812784963245558e-05, | |
| "loss": 0.0861, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.0700660649483578, | |
| "grad_norm": 0.790490448474884, | |
| "learning_rate": 1.571973574020657e-05, | |
| "loss": 0.0853, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.0933283707081045, | |
| "grad_norm": 1.0638964176177979, | |
| "learning_rate": 1.5626686517167583e-05, | |
| "loss": 0.0855, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.1165906764678515, | |
| "grad_norm": 0.8379644751548767, | |
| "learning_rate": 1.5533637294128597e-05, | |
| "loss": 0.0839, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.1398529822275985, | |
| "grad_norm": 1.5022120475769043, | |
| "learning_rate": 1.5440588071089608e-05, | |
| "loss": 0.0883, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 1.1631152879873454, | |
| "grad_norm": 0.6825814247131348, | |
| "learning_rate": 1.534753884805062e-05, | |
| "loss": 0.0852, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.1863775937470922, | |
| "grad_norm": 2.154118776321411, | |
| "learning_rate": 1.5254489625011633e-05, | |
| "loss": 0.0854, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 1.2096398995068391, | |
| "grad_norm": 0.8945685029029846, | |
| "learning_rate": 1.5161440401972644e-05, | |
| "loss": 0.0862, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.2329022052665861, | |
| "grad_norm": 1.1257520914077759, | |
| "learning_rate": 1.5068391178933657e-05, | |
| "loss": 0.0832, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 1.2561645110263329, | |
| "grad_norm": 0.785380482673645, | |
| "learning_rate": 1.497534195589467e-05, | |
| "loss": 0.0842, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.2794268167860798, | |
| "grad_norm": 0.4283202290534973, | |
| "learning_rate": 1.488229273285568e-05, | |
| "loss": 0.0859, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 1.3026891225458268, | |
| "grad_norm": 1.3082115650177002, | |
| "learning_rate": 1.4789243509816695e-05, | |
| "loss": 0.0824, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.3259514283055736, | |
| "grad_norm": 0.6663931608200073, | |
| "learning_rate": 1.4696194286777707e-05, | |
| "loss": 0.0868, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 1.3492137340653205, | |
| "grad_norm": 1.093483805656433, | |
| "learning_rate": 1.460314506373872e-05, | |
| "loss": 0.0871, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.3724760398250675, | |
| "grad_norm": 1.7370342016220093, | |
| "learning_rate": 1.4510095840699731e-05, | |
| "loss": 0.0858, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 1.3957383455848142, | |
| "grad_norm": 1.280945062637329, | |
| "learning_rate": 1.4417046617660744e-05, | |
| "loss": 0.0853, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.4190006513445612, | |
| "grad_norm": 0.44828563928604126, | |
| "learning_rate": 1.4323997394621756e-05, | |
| "loss": 0.0828, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 1.4422629571043082, | |
| "grad_norm": 1.0420727729797363, | |
| "learning_rate": 1.4230948171582769e-05, | |
| "loss": 0.0836, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.4655252628640552, | |
| "grad_norm": 1.529417634010315, | |
| "learning_rate": 1.413789894854378e-05, | |
| "loss": 0.0827, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 1.4887875686238021, | |
| "grad_norm": 0.8359895944595337, | |
| "learning_rate": 1.4044849725504792e-05, | |
| "loss": 0.0836, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.5120498743835489, | |
| "grad_norm": 0.8840139508247375, | |
| "learning_rate": 1.3951800502465807e-05, | |
| "loss": 0.0858, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.5353121801432958, | |
| "grad_norm": 1.2289206981658936, | |
| "learning_rate": 1.385875127942682e-05, | |
| "loss": 0.0835, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 1.5585744859030428, | |
| "grad_norm": 1.725092887878418, | |
| "learning_rate": 1.376570205638783e-05, | |
| "loss": 0.0833, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 1.5818367916627896, | |
| "grad_norm": 1.8183008432388306, | |
| "learning_rate": 1.3672652833348843e-05, | |
| "loss": 0.085, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.6050990974225365, | |
| "grad_norm": 1.5482715368270874, | |
| "learning_rate": 1.3579603610309855e-05, | |
| "loss": 0.0825, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 1.6283614031822835, | |
| "grad_norm": 2.1967756748199463, | |
| "learning_rate": 1.3486554387270866e-05, | |
| "loss": 0.0857, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.6516237089420303, | |
| "grad_norm": 0.9423213005065918, | |
| "learning_rate": 1.3393505164231879e-05, | |
| "loss": 0.0818, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 1.6748860147017772, | |
| "grad_norm": 0.8981990218162537, | |
| "learning_rate": 1.3300455941192892e-05, | |
| "loss": 0.0821, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.6981483204615242, | |
| "grad_norm": 1.125404715538025, | |
| "learning_rate": 1.3207406718153904e-05, | |
| "loss": 0.0849, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 1.721410626221271, | |
| "grad_norm": 2.038687229156494, | |
| "learning_rate": 1.3114357495114918e-05, | |
| "loss": 0.0847, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.744672931981018, | |
| "grad_norm": 1.0153677463531494, | |
| "learning_rate": 1.302130827207593e-05, | |
| "loss": 0.084, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 1.7679352377407649, | |
| "grad_norm": 1.0724354982376099, | |
| "learning_rate": 1.2928259049036942e-05, | |
| "loss": 0.0825, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.7911975435005116, | |
| "grad_norm": 0.6580795645713806, | |
| "learning_rate": 1.2835209825997955e-05, | |
| "loss": 0.0843, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 1.8144598492602588, | |
| "grad_norm": 1.5284615755081177, | |
| "learning_rate": 1.2742160602958966e-05, | |
| "loss": 0.0858, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 1.8377221550200056, | |
| "grad_norm": 2.040937900543213, | |
| "learning_rate": 1.2649111379919978e-05, | |
| "loss": 0.0838, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 1.8609844607797523, | |
| "grad_norm": 1.1230378150939941, | |
| "learning_rate": 1.255606215688099e-05, | |
| "loss": 0.0822, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.8842467665394995, | |
| "grad_norm": 0.6881332397460938, | |
| "learning_rate": 1.2463012933842002e-05, | |
| "loss": 0.0827, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 1.9075090722992463, | |
| "grad_norm": 1.4516489505767822, | |
| "learning_rate": 1.2369963710803016e-05, | |
| "loss": 0.0797, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 1.9307713780589932, | |
| "grad_norm": 0.8135964870452881, | |
| "learning_rate": 1.2276914487764029e-05, | |
| "loss": 0.0823, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 1.9540336838187402, | |
| "grad_norm": 0.51099693775177, | |
| "learning_rate": 1.2183865264725041e-05, | |
| "loss": 0.0819, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.977295989578487, | |
| "grad_norm": 1.2015933990478516, | |
| "learning_rate": 1.2090816041686054e-05, | |
| "loss": 0.0826, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.5615520610402903, | |
| "eval_f1": 0.7499260228316903, | |
| "eval_loss": 0.09278739243745804, | |
| "eval_roc_auc": 0.8556249389001388, | |
| "eval_runtime": 23.2564, | |
| "eval_samples_per_second": 924.221, | |
| "eval_steps_per_second": 115.538, | |
| "step": 42988 | |
| }, | |
| { | |
| "epoch": 2.0005582953382337, | |
| "grad_norm": 1.1591126918792725, | |
| "learning_rate": 1.1997766818647065e-05, | |
| "loss": 0.0807, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 2.023820601097981, | |
| "grad_norm": 0.6749496459960938, | |
| "learning_rate": 1.1904717595608077e-05, | |
| "loss": 0.07, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 2.0470829068577276, | |
| "grad_norm": 1.0793603658676147, | |
| "learning_rate": 1.181166837256909e-05, | |
| "loss": 0.0682, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.070345212617475, | |
| "grad_norm": 1.9193094968795776, | |
| "learning_rate": 1.1718619149530101e-05, | |
| "loss": 0.0659, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 2.0936075183772216, | |
| "grad_norm": 2.0090835094451904, | |
| "learning_rate": 1.1625569926491114e-05, | |
| "loss": 0.0661, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 2.1168698241369683, | |
| "grad_norm": 1.252321720123291, | |
| "learning_rate": 1.1532520703452128e-05, | |
| "loss": 0.0663, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 2.1401321298967155, | |
| "grad_norm": 0.25603464245796204, | |
| "learning_rate": 1.143947148041314e-05, | |
| "loss": 0.0671, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 2.1633944356564623, | |
| "grad_norm": 0.9230429530143738, | |
| "learning_rate": 1.1346422257374151e-05, | |
| "loss": 0.0654, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 2.186656741416209, | |
| "grad_norm": 0.8180581331253052, | |
| "learning_rate": 1.1253373034335164e-05, | |
| "loss": 0.0675, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 2.209919047175956, | |
| "grad_norm": 1.928276777267456, | |
| "learning_rate": 1.1160323811296177e-05, | |
| "loss": 0.0686, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 2.233181352935703, | |
| "grad_norm": 1.222936987876892, | |
| "learning_rate": 1.106727458825719e-05, | |
| "loss": 0.0706, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 2.2564436586954497, | |
| "grad_norm": 0.6796595454216003, | |
| "learning_rate": 1.09742253652182e-05, | |
| "loss": 0.0681, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 2.279705964455197, | |
| "grad_norm": 1.2472426891326904, | |
| "learning_rate": 1.0881176142179213e-05, | |
| "loss": 0.0677, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 2.3029682702149437, | |
| "grad_norm": 1.2157268524169922, | |
| "learning_rate": 1.0788126919140227e-05, | |
| "loss": 0.0688, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 2.326230575974691, | |
| "grad_norm": 1.0946940183639526, | |
| "learning_rate": 1.069507769610124e-05, | |
| "loss": 0.0669, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 2.3494928817344376, | |
| "grad_norm": 2.56750750541687, | |
| "learning_rate": 1.060202847306225e-05, | |
| "loss": 0.065, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 2.3727551874941843, | |
| "grad_norm": 0.8625161051750183, | |
| "learning_rate": 1.0508979250023263e-05, | |
| "loss": 0.0676, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 2.3960174932539315, | |
| "grad_norm": 1.6813982725143433, | |
| "learning_rate": 1.0415930026984276e-05, | |
| "loss": 0.0669, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 2.4192797990136783, | |
| "grad_norm": 1.76870596408844, | |
| "learning_rate": 1.0322880803945287e-05, | |
| "loss": 0.0704, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 2.442542104773425, | |
| "grad_norm": 0.8901593089103699, | |
| "learning_rate": 1.02298315809063e-05, | |
| "loss": 0.0703, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 2.4658044105331722, | |
| "grad_norm": 0.7651998400688171, | |
| "learning_rate": 1.0136782357867312e-05, | |
| "loss": 0.0685, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 2.489066716292919, | |
| "grad_norm": 1.2652794122695923, | |
| "learning_rate": 1.0043733134828325e-05, | |
| "loss": 0.0653, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 2.5123290220526657, | |
| "grad_norm": 1.768955111503601, | |
| "learning_rate": 9.950683911789337e-06, | |
| "loss": 0.0686, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 2.535591327812413, | |
| "grad_norm": 1.6044102907180786, | |
| "learning_rate": 9.85763468875035e-06, | |
| "loss": 0.0676, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 2.5588536335721597, | |
| "grad_norm": 1.808396816253662, | |
| "learning_rate": 9.764585465711363e-06, | |
| "loss": 0.0662, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 2.5821159393319064, | |
| "grad_norm": 1.0778286457061768, | |
| "learning_rate": 9.671536242672375e-06, | |
| "loss": 0.0689, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 2.6053782450916536, | |
| "grad_norm": 2.739319324493408, | |
| "learning_rate": 9.578487019633386e-06, | |
| "loss": 0.068, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 2.6286405508514004, | |
| "grad_norm": 1.368030071258545, | |
| "learning_rate": 9.485437796594399e-06, | |
| "loss": 0.0655, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 2.651902856611147, | |
| "grad_norm": 0.30945539474487305, | |
| "learning_rate": 9.392388573555411e-06, | |
| "loss": 0.0649, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 2.6751651623708943, | |
| "grad_norm": 0.8296416997909546, | |
| "learning_rate": 9.299339350516424e-06, | |
| "loss": 0.0677, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 2.698427468130641, | |
| "grad_norm": 1.1620192527770996, | |
| "learning_rate": 9.206290127477437e-06, | |
| "loss": 0.069, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 2.721689773890388, | |
| "grad_norm": 0.9376591444015503, | |
| "learning_rate": 9.11324090443845e-06, | |
| "loss": 0.0662, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 2.744952079650135, | |
| "grad_norm": 1.6231029033660889, | |
| "learning_rate": 9.02019168139946e-06, | |
| "loss": 0.0674, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 2.7682143854098817, | |
| "grad_norm": 1.0340408086776733, | |
| "learning_rate": 8.927142458360474e-06, | |
| "loss": 0.0685, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 2.7914766911696285, | |
| "grad_norm": 1.5797666311264038, | |
| "learning_rate": 8.834093235321485e-06, | |
| "loss": 0.065, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 2.8147389969293757, | |
| "grad_norm": 0.9955604076385498, | |
| "learning_rate": 8.741044012282498e-06, | |
| "loss": 0.0682, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 2.8380013026891224, | |
| "grad_norm": 2.2507500648498535, | |
| "learning_rate": 8.64799478924351e-06, | |
| "loss": 0.0651, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 2.861263608448869, | |
| "grad_norm": 0.9272844791412354, | |
| "learning_rate": 8.554945566204523e-06, | |
| "loss": 0.0651, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 2.8845259142086164, | |
| "grad_norm": 1.3886868953704834, | |
| "learning_rate": 8.461896343165536e-06, | |
| "loss": 0.066, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 2.907788219968363, | |
| "grad_norm": 0.9660001397132874, | |
| "learning_rate": 8.368847120126547e-06, | |
| "loss": 0.0683, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 2.9310505257281103, | |
| "grad_norm": 0.8844442963600159, | |
| "learning_rate": 8.27579789708756e-06, | |
| "loss": 0.0671, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 2.954312831487857, | |
| "grad_norm": 2.407435417175293, | |
| "learning_rate": 8.182748674048572e-06, | |
| "loss": 0.0664, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 2.9775751372476043, | |
| "grad_norm": 2.187854528427124, | |
| "learning_rate": 8.089699451009585e-06, | |
| "loss": 0.0666, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.5649948822927329, | |
| "eval_f1": 0.7591117292255597, | |
| "eval_loss": 0.09460150450468063, | |
| "eval_roc_auc": 0.8701062840131171, | |
| "eval_runtime": 23.3037, | |
| "eval_samples_per_second": 922.345, | |
| "eval_steps_per_second": 115.304, | |
| "step": 64482 | |
| }, | |
| { | |
| "epoch": 3.000837443007351, | |
| "grad_norm": 1.4572051763534546, | |
| "learning_rate": 7.996650227970597e-06, | |
| "loss": 0.0662, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 3.0240997487670978, | |
| "grad_norm": 1.0595591068267822, | |
| "learning_rate": 7.90360100493161e-06, | |
| "loss": 0.0532, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 3.0473620545268445, | |
| "grad_norm": 0.7926930785179138, | |
| "learning_rate": 7.81055178189262e-06, | |
| "loss": 0.0513, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 3.0706243602865917, | |
| "grad_norm": 2.0223031044006348, | |
| "learning_rate": 7.717502558853635e-06, | |
| "loss": 0.056, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 3.0938866660463384, | |
| "grad_norm": 1.3608500957489014, | |
| "learning_rate": 7.624453335814647e-06, | |
| "loss": 0.051, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 3.1171489718060856, | |
| "grad_norm": 1.0539377927780151, | |
| "learning_rate": 7.531404112775659e-06, | |
| "loss": 0.0535, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 3.1404112775658324, | |
| "grad_norm": 0.8993151187896729, | |
| "learning_rate": 7.4383548897366704e-06, | |
| "loss": 0.0529, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 3.163673583325579, | |
| "grad_norm": 1.5115621089935303, | |
| "learning_rate": 7.345305666697684e-06, | |
| "loss": 0.0512, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 3.1869358890853263, | |
| "grad_norm": 0.8718969225883484, | |
| "learning_rate": 7.2522564436586965e-06, | |
| "loss": 0.0517, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 3.210198194845073, | |
| "grad_norm": 1.0791226625442505, | |
| "learning_rate": 7.159207220619708e-06, | |
| "loss": 0.0512, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 3.23346050060482, | |
| "grad_norm": 1.2362322807312012, | |
| "learning_rate": 7.06615799758072e-06, | |
| "loss": 0.0505, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 3.256722806364567, | |
| "grad_norm": 0.4878983497619629, | |
| "learning_rate": 6.9731087745417335e-06, | |
| "loss": 0.0523, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 3.2799851121243138, | |
| "grad_norm": 0.5156907439231873, | |
| "learning_rate": 6.880059551502746e-06, | |
| "loss": 0.0522, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 3.3032474178840605, | |
| "grad_norm": 1.0036829710006714, | |
| "learning_rate": 6.787010328463758e-06, | |
| "loss": 0.053, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 3.3265097236438077, | |
| "grad_norm": 1.9383690357208252, | |
| "learning_rate": 6.69396110542477e-06, | |
| "loss": 0.0524, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 3.3497720294035545, | |
| "grad_norm": 0.9468953609466553, | |
| "learning_rate": 6.600911882385782e-06, | |
| "loss": 0.0507, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 3.373034335163301, | |
| "grad_norm": 1.7953852415084839, | |
| "learning_rate": 6.507862659346795e-06, | |
| "loss": 0.0536, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 3.3962966409230484, | |
| "grad_norm": 3.1470677852630615, | |
| "learning_rate": 6.4148134363078075e-06, | |
| "loss": 0.0521, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 3.419558946682795, | |
| "grad_norm": 2.5121142864227295, | |
| "learning_rate": 6.321764213268819e-06, | |
| "loss": 0.0513, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 3.442821252442542, | |
| "grad_norm": 1.0771255493164062, | |
| "learning_rate": 6.228714990229832e-06, | |
| "loss": 0.0529, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 3.466083558202289, | |
| "grad_norm": 1.3458467721939087, | |
| "learning_rate": 6.1356657671908446e-06, | |
| "loss": 0.053, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 3.489345863962036, | |
| "grad_norm": 0.29975369572639465, | |
| "learning_rate": 6.042616544151857e-06, | |
| "loss": 0.0512, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 3.5126081697217826, | |
| "grad_norm": 1.2391622066497803, | |
| "learning_rate": 5.949567321112869e-06, | |
| "loss": 0.0519, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 3.5358704754815298, | |
| "grad_norm": 1.6513882875442505, | |
| "learning_rate": 5.8565180980738816e-06, | |
| "loss": 0.0529, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 3.5591327812412765, | |
| "grad_norm": 1.1643694639205933, | |
| "learning_rate": 5.763468875034894e-06, | |
| "loss": 0.0537, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 3.5823950870010233, | |
| "grad_norm": 1.9166603088378906, | |
| "learning_rate": 5.670419651995907e-06, | |
| "loss": 0.0508, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 3.6056573927607705, | |
| "grad_norm": 1.5334446430206299, | |
| "learning_rate": 5.5773704289569186e-06, | |
| "loss": 0.0514, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 3.628919698520517, | |
| "grad_norm": 2.500365972518921, | |
| "learning_rate": 5.48432120591793e-06, | |
| "loss": 0.0537, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 3.6521820042802644, | |
| "grad_norm": 0.8305968046188354, | |
| "learning_rate": 5.391271982878944e-06, | |
| "loss": 0.0546, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 3.675444310040011, | |
| "grad_norm": 1.3438687324523926, | |
| "learning_rate": 5.2982227598399564e-06, | |
| "loss": 0.0536, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 3.6987066157997583, | |
| "grad_norm": 1.1025956869125366, | |
| "learning_rate": 5.205173536800968e-06, | |
| "loss": 0.0546, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 3.721968921559505, | |
| "grad_norm": 1.6700533628463745, | |
| "learning_rate": 5.11212431376198e-06, | |
| "loss": 0.0533, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 3.745231227319252, | |
| "grad_norm": 0.8916147351264954, | |
| "learning_rate": 5.019075090722993e-06, | |
| "loss": 0.0516, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 3.768493533078999, | |
| "grad_norm": 1.7522839307785034, | |
| "learning_rate": 4.926025867684005e-06, | |
| "loss": 0.0537, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 3.791755838838746, | |
| "grad_norm": 1.4133764505386353, | |
| "learning_rate": 4.832976644645018e-06, | |
| "loss": 0.0542, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 3.8150181445984925, | |
| "grad_norm": 0.9128021001815796, | |
| "learning_rate": 4.73992742160603e-06, | |
| "loss": 0.0516, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 3.8382804503582397, | |
| "grad_norm": 2.4152848720550537, | |
| "learning_rate": 4.646878198567042e-06, | |
| "loss": 0.0532, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 3.8615427561179865, | |
| "grad_norm": 1.5950450897216797, | |
| "learning_rate": 4.553828975528055e-06, | |
| "loss": 0.0523, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 3.8848050618777332, | |
| "grad_norm": 1.5487794876098633, | |
| "learning_rate": 4.4607797524890675e-06, | |
| "loss": 0.054, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 3.9080673676374804, | |
| "grad_norm": 1.6051621437072754, | |
| "learning_rate": 4.367730529450079e-06, | |
| "loss": 0.052, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 3.931329673397227, | |
| "grad_norm": 1.2082515954971313, | |
| "learning_rate": 4.274681306411092e-06, | |
| "loss": 0.0512, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 3.954591979156974, | |
| "grad_norm": 1.1482079029083252, | |
| "learning_rate": 4.1816320833721045e-06, | |
| "loss": 0.0514, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 3.977854284916721, | |
| "grad_norm": 1.919583797454834, | |
| "learning_rate": 4.088582860333117e-06, | |
| "loss": 0.0543, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.5645761607890574, | |
| "eval_f1": 0.759579488098438, | |
| "eval_loss": 0.10311879962682724, | |
| "eval_roc_auc": 0.8744104988949349, | |
| "eval_runtime": 22.8736, | |
| "eval_samples_per_second": 939.686, | |
| "eval_steps_per_second": 117.472, | |
| "step": 85976 | |
| }, | |
| { | |
| "epoch": 4.001116590676467, | |
| "grad_norm": 0.9219486713409424, | |
| "learning_rate": 3.995533637294129e-06, | |
| "loss": 0.0531, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 4.024378896436215, | |
| "grad_norm": 0.5063530206680298, | |
| "learning_rate": 3.902484414255141e-06, | |
| "loss": 0.0431, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 4.047641202195962, | |
| "grad_norm": 0.9435988068580627, | |
| "learning_rate": 3.8094351912161537e-06, | |
| "loss": 0.0419, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 4.070903507955709, | |
| "grad_norm": 0.8546033501625061, | |
| "learning_rate": 3.716385968177166e-06, | |
| "loss": 0.0413, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 4.094165813715455, | |
| "grad_norm": 1.6249778270721436, | |
| "learning_rate": 3.6233367451381785e-06, | |
| "loss": 0.0429, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 4.1174281194752025, | |
| "grad_norm": 1.7502926588058472, | |
| "learning_rate": 3.5302875220991907e-06, | |
| "loss": 0.0395, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 4.14069042523495, | |
| "grad_norm": 3.170189380645752, | |
| "learning_rate": 3.4372382990602033e-06, | |
| "loss": 0.0431, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 4.163952730994696, | |
| "grad_norm": 1.2306873798370361, | |
| "learning_rate": 3.3441890760212155e-06, | |
| "loss": 0.0419, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 4.187215036754443, | |
| "grad_norm": 2.3752849102020264, | |
| "learning_rate": 3.251139852982228e-06, | |
| "loss": 0.0426, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 4.21047734251419, | |
| "grad_norm": 1.0530017614364624, | |
| "learning_rate": 3.1580906299432403e-06, | |
| "loss": 0.0406, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 4.233739648273937, | |
| "grad_norm": 1.7250635623931885, | |
| "learning_rate": 3.065041406904253e-06, | |
| "loss": 0.0431, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 4.257001954033684, | |
| "grad_norm": 0.6301759481430054, | |
| "learning_rate": 2.9719921838652647e-06, | |
| "loss": 0.0437, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 4.280264259793431, | |
| "grad_norm": 2.9674508571624756, | |
| "learning_rate": 2.8789429608262777e-06, | |
| "loss": 0.0429, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 4.303526565553177, | |
| "grad_norm": 1.3684778213500977, | |
| "learning_rate": 2.7858937377872895e-06, | |
| "loss": 0.0413, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 4.326788871312925, | |
| "grad_norm": 2.5620508193969727, | |
| "learning_rate": 2.692844514748302e-06, | |
| "loss": 0.0425, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 4.350051177072672, | |
| "grad_norm": 2.529858350753784, | |
| "learning_rate": 2.5997952917093143e-06, | |
| "loss": 0.0432, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 4.373313482832418, | |
| "grad_norm": 1.6359411478042603, | |
| "learning_rate": 2.5067460686703265e-06, | |
| "loss": 0.0437, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 4.396575788592165, | |
| "grad_norm": 1.6633356809616089, | |
| "learning_rate": 2.413696845631339e-06, | |
| "loss": 0.0415, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 4.419838094351912, | |
| "grad_norm": 0.9840025901794434, | |
| "learning_rate": 2.3206476225923518e-06, | |
| "loss": 0.0387, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 4.443100400111659, | |
| "grad_norm": 1.1913479566574097, | |
| "learning_rate": 2.227598399553364e-06, | |
| "loss": 0.0433, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 4.466362705871406, | |
| "grad_norm": 0.9769937992095947, | |
| "learning_rate": 2.1345491765143766e-06, | |
| "loss": 0.042, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 4.489625011631153, | |
| "grad_norm": 0.4699022173881531, | |
| "learning_rate": 2.0414999534753888e-06, | |
| "loss": 0.0416, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 4.512887317390899, | |
| "grad_norm": 1.7338500022888184, | |
| "learning_rate": 1.948450730436401e-06, | |
| "loss": 0.0422, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 4.536149623150647, | |
| "grad_norm": 2.9296669960021973, | |
| "learning_rate": 1.8554015073974132e-06, | |
| "loss": 0.041, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 4.559411928910394, | |
| "grad_norm": 2.108750820159912, | |
| "learning_rate": 1.7623522843584256e-06, | |
| "loss": 0.0406, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 4.58267423467014, | |
| "grad_norm": 1.38349449634552, | |
| "learning_rate": 1.669303061319438e-06, | |
| "loss": 0.0403, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 4.605936540429887, | |
| "grad_norm": 1.092578411102295, | |
| "learning_rate": 1.5762538382804504e-06, | |
| "loss": 0.0419, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 4.6291988461896345, | |
| "grad_norm": 2.6619553565979004, | |
| "learning_rate": 1.4832046152414628e-06, | |
| "loss": 0.0394, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 4.652461151949382, | |
| "grad_norm": 1.8110424280166626, | |
| "learning_rate": 1.3901553922024752e-06, | |
| "loss": 0.0407, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 4.675723457709128, | |
| "grad_norm": 1.2239103317260742, | |
| "learning_rate": 1.2971061691634876e-06, | |
| "loss": 0.0415, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 4.698985763468875, | |
| "grad_norm": 3.0208041667938232, | |
| "learning_rate": 1.2040569461245e-06, | |
| "loss": 0.0411, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 4.7222480692286215, | |
| "grad_norm": 1.5058140754699707, | |
| "learning_rate": 1.1110077230855124e-06, | |
| "loss": 0.0431, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 4.745510374988369, | |
| "grad_norm": 1.6732498407363892, | |
| "learning_rate": 1.0179585000465248e-06, | |
| "loss": 0.0408, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 4.768772680748116, | |
| "grad_norm": 2.15928053855896, | |
| "learning_rate": 9.249092770075371e-07, | |
| "loss": 0.0412, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 4.792034986507863, | |
| "grad_norm": 1.8211805820465088, | |
| "learning_rate": 8.318600539685494e-07, | |
| "loss": 0.0418, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 4.815297292267609, | |
| "grad_norm": 1.1392755508422852, | |
| "learning_rate": 7.388108309295617e-07, | |
| "loss": 0.0395, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 4.838559598027357, | |
| "grad_norm": 1.2640013694763184, | |
| "learning_rate": 6.457616078905741e-07, | |
| "loss": 0.0404, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 4.861821903787103, | |
| "grad_norm": 1.2413549423217773, | |
| "learning_rate": 5.527123848515865e-07, | |
| "loss": 0.0409, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 4.88508420954685, | |
| "grad_norm": 0.14875428378582, | |
| "learning_rate": 4.5966316181259895e-07, | |
| "loss": 0.0405, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 4.908346515306597, | |
| "grad_norm": 1.309793472290039, | |
| "learning_rate": 3.6661393877361125e-07, | |
| "loss": 0.041, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 4.9316088210663445, | |
| "grad_norm": 0.9020711779594421, | |
| "learning_rate": 2.7356471573462365e-07, | |
| "loss": 0.0402, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 4.954871126826091, | |
| "grad_norm": 1.875877857208252, | |
| "learning_rate": 1.80515492695636e-07, | |
| "loss": 0.0408, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 4.978133432585838, | |
| "grad_norm": 3.464327335357666, | |
| "learning_rate": 8.746626965664838e-08, | |
| "loss": 0.0407, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.5647157346236159, | |
| "eval_f1": 0.7594581273430615, | |
| "eval_loss": 0.1101851612329483, | |
| "eval_roc_auc": 0.8742792491333411, | |
| "eval_runtime": 22.5758, | |
| "eval_samples_per_second": 952.08, | |
| "eval_steps_per_second": 119.021, | |
| "step": 107470 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 107470, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.656124322596352e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |