| { | |
| "best_global_step": 69500, | |
| "best_metric": 0.9733653983882032, | |
| "best_model_checkpoint": "./results/checkpoint-69500", | |
| "epoch": 2.278541733290694, | |
| "eval_steps": 500, | |
| "global_step": 85500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.332498301064666e-05, | |
| "grad_norm": 2.171241283416748, | |
| "learning_rate": 0.0, | |
| "loss": 1.1419, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0013324983010646661, | |
| "grad_norm": 3.923346757888794, | |
| "learning_rate": 1.319120586275816e-07, | |
| "loss": 0.9369, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0026649966021293323, | |
| "grad_norm": 5.213994026184082, | |
| "learning_rate": 2.651565622918055e-07, | |
| "loss": 0.9031, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.003997494903193999, | |
| "grad_norm": 3.4589016437530518, | |
| "learning_rate": 3.984010659560293e-07, | |
| "loss": 0.8309, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0053299932042586646, | |
| "grad_norm": 2.302459239959717, | |
| "learning_rate": 5.316455696202532e-07, | |
| "loss": 0.7406, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0066624915053233305, | |
| "grad_norm": 2.8590707778930664, | |
| "learning_rate": 6.64890073284477e-07, | |
| "loss": 0.7311, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0066624915053233305, | |
| "eval_dev_accuracy": 0.9312310116323776, | |
| "eval_dev_accuracy_threshold": 0.48881804943084717, | |
| "eval_dev_average_precision": 0.1090500582180461, | |
| "eval_dev_f1": 0.1867953275774505, | |
| "eval_dev_f1_threshold": 0.31155017018318176, | |
| "eval_dev_precision": 0.17409826753763136, | |
| "eval_dev_recall": 0.20149008436507068, | |
| "eval_loss": 0.7330209612846375, | |
| "eval_runtime": 567.4178, | |
| "eval_samples_per_second": 233.773, | |
| "eval_steps_per_second": 7.307, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.007994989806387997, | |
| "grad_norm": 2.162013530731201, | |
| "learning_rate": 7.981345769487009e-07, | |
| "loss": 0.725, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.009327488107452663, | |
| "grad_norm": 3.412961959838867, | |
| "learning_rate": 9.313790806129248e-07, | |
| "loss": 0.6892, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.010659986408517329, | |
| "grad_norm": 3.037612199783325, | |
| "learning_rate": 1.0646235842771487e-06, | |
| "loss": 0.74, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.011992484709581995, | |
| "grad_norm": 3.178318977355957, | |
| "learning_rate": 1.1978680879413725e-06, | |
| "loss": 0.6857, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.013324983010646661, | |
| "grad_norm": 3.9319422245025635, | |
| "learning_rate": 1.3311125916055965e-06, | |
| "loss": 0.6784, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.013324983010646661, | |
| "eval_dev_accuracy": 0.9312913220804089, | |
| "eval_dev_accuracy_threshold": 0.5571334362030029, | |
| "eval_dev_average_precision": 0.2217988734731733, | |
| "eval_dev_f1": 0.3029693004529441, | |
| "eval_dev_f1_threshold": 0.3375406265258789, | |
| "eval_dev_precision": 0.2600942655145326, | |
| "eval_dev_recall": 0.36276980387860197, | |
| "eval_loss": 0.6632949113845825, | |
| "eval_runtime": 567.4434, | |
| "eval_samples_per_second": 233.763, | |
| "eval_steps_per_second": 7.306, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.014657481311711327, | |
| "grad_norm": 3.4704461097717285, | |
| "learning_rate": 1.4643570952698202e-06, | |
| "loss": 0.6753, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.015989979612775995, | |
| "grad_norm": 5.541119575500488, | |
| "learning_rate": 1.597601598934044e-06, | |
| "loss": 0.6707, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.01732247791384066, | |
| "grad_norm": 4.9743475914001465, | |
| "learning_rate": 1.7308461025982678e-06, | |
| "loss": 0.6679, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.018654976214905326, | |
| "grad_norm": 7.222622394561768, | |
| "learning_rate": 1.864090606262492e-06, | |
| "loss": 0.5831, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.01998747451596999, | |
| "grad_norm": 3.6720590591430664, | |
| "learning_rate": 1.9973351099267156e-06, | |
| "loss": 0.5589, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.01998747451596999, | |
| "eval_dev_accuracy": 0.9353095056804903, | |
| "eval_dev_accuracy_threshold": 0.5631594657897949, | |
| "eval_dev_average_precision": 0.338116839920073, | |
| "eval_dev_f1": 0.39591571740541814, | |
| "eval_dev_f1_threshold": 0.4508041739463806, | |
| "eval_dev_precision": 0.4291197543500512, | |
| "eval_dev_recall": 0.3674811000328695, | |
| "eval_loss": 0.6119648814201355, | |
| "eval_runtime": 567.5553, | |
| "eval_samples_per_second": 233.716, | |
| "eval_steps_per_second": 7.305, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.021319972817034658, | |
| "grad_norm": 10.199407577514648, | |
| "learning_rate": 2.1305796135909398e-06, | |
| "loss": 0.6065, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.022652471118099326, | |
| "grad_norm": 6.087101459503174, | |
| "learning_rate": 2.2638241172551636e-06, | |
| "loss": 0.5724, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.02398496941916399, | |
| "grad_norm": 16.529647827148438, | |
| "learning_rate": 2.3970686209193873e-06, | |
| "loss": 0.5568, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.025317467720228658, | |
| "grad_norm": 14.971884727478027, | |
| "learning_rate": 2.530313124583611e-06, | |
| "loss": 0.5603, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.026649966021293322, | |
| "grad_norm": 4.663777828216553, | |
| "learning_rate": 2.663557628247835e-06, | |
| "loss": 0.5553, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.026649966021293322, | |
| "eval_dev_accuracy": 0.9353848937405294, | |
| "eval_dev_accuracy_threshold": 0.6437499523162842, | |
| "eval_dev_average_precision": 0.38323093653846474, | |
| "eval_dev_f1": 0.4514054443643622, | |
| "eval_dev_f1_threshold": 0.6111855506896973, | |
| "eval_dev_precision": 0.4569023569023569, | |
| "eval_dev_recall": 0.44603922427960996, | |
| "eval_loss": 0.5613667964935303, | |
| "eval_runtime": 568.0344, | |
| "eval_samples_per_second": 233.519, | |
| "eval_steps_per_second": 7.299, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.02798246432235799, | |
| "grad_norm": 5.051695823669434, | |
| "learning_rate": 2.7968021319120587e-06, | |
| "loss": 0.5506, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.029314962623422654, | |
| "grad_norm": 12.604368209838867, | |
| "learning_rate": 2.930046635576283e-06, | |
| "loss": 0.5446, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.03064746092448732, | |
| "grad_norm": 3.9183976650238037, | |
| "learning_rate": 3.0632911392405066e-06, | |
| "loss": 0.5432, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.03197995922555199, | |
| "grad_norm": 5.165050983428955, | |
| "learning_rate": 3.1965356429047304e-06, | |
| "loss": 0.5091, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.03331245752661666, | |
| "grad_norm": 10.820756912231445, | |
| "learning_rate": 3.3297801465689546e-06, | |
| "loss": 0.5099, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.03331245752661666, | |
| "eval_dev_accuracy": 0.9395613922666928, | |
| "eval_dev_accuracy_threshold": 0.700435996055603, | |
| "eval_dev_average_precision": 0.4498892909951412, | |
| "eval_dev_f1": 0.4910784423745932, | |
| "eval_dev_f1_threshold": 0.5620608925819397, | |
| "eval_dev_precision": 0.5032777458309373, | |
| "eval_dev_recall": 0.47945655746685656, | |
| "eval_loss": 0.5332435369491577, | |
| "eval_runtime": 567.4907, | |
| "eval_samples_per_second": 233.743, | |
| "eval_steps_per_second": 7.306, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.03464495582768132, | |
| "grad_norm": 5.984354496002197, | |
| "learning_rate": 3.4630246502331784e-06, | |
| "loss": 0.5168, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.035977454128745985, | |
| "grad_norm": 11.091134071350098, | |
| "learning_rate": 3.596269153897402e-06, | |
| "loss": 0.4763, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.03730995242981065, | |
| "grad_norm": 25.33905601501465, | |
| "learning_rate": 3.729513657561626e-06, | |
| "loss": 0.4916, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.03864245073087532, | |
| "grad_norm": 7.44692325592041, | |
| "learning_rate": 3.862758161225849e-06, | |
| "loss": 0.4842, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.03997494903193998, | |
| "grad_norm": 11.449934005737305, | |
| "learning_rate": 3.996002664890073e-06, | |
| "loss": 0.5246, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.03997494903193998, | |
| "eval_dev_accuracy": 0.9320828967108189, | |
| "eval_dev_accuracy_threshold": 0.7955138683319092, | |
| "eval_dev_average_precision": 0.40640646661588725, | |
| "eval_dev_f1": 0.5273073175258689, | |
| "eval_dev_f1_threshold": 0.7331215143203735, | |
| "eval_dev_precision": 0.45734063103670314, | |
| "eval_dev_recall": 0.6225484825243782, | |
| "eval_loss": 0.4692871868610382, | |
| "eval_runtime": 565.8018, | |
| "eval_samples_per_second": 234.441, | |
| "eval_steps_per_second": 7.328, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.04130744733300465, | |
| "grad_norm": 2.739481210708618, | |
| "learning_rate": 4.129247168554298e-06, | |
| "loss": 0.4399, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.042639945634069316, | |
| "grad_norm": 18.604293823242188, | |
| "learning_rate": 4.2624916722185215e-06, | |
| "loss": 0.4532, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.043972443935133984, | |
| "grad_norm": 2.9506380558013916, | |
| "learning_rate": 4.395736175882745e-06, | |
| "loss": 0.5107, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.04530494223619865, | |
| "grad_norm": 6.515221118927002, | |
| "learning_rate": 4.528980679546969e-06, | |
| "loss": 0.4249, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.04663744053726331, | |
| "grad_norm": 8.708155632019043, | |
| "learning_rate": 4.662225183211193e-06, | |
| "loss": 0.4526, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.04663744053726331, | |
| "eval_dev_accuracy": 0.9425166042202237, | |
| "eval_dev_accuracy_threshold": 0.8969273567199707, | |
| "eval_dev_average_precision": 0.5181562757024104, | |
| "eval_dev_f1": 0.5669769324160259, | |
| "eval_dev_f1_threshold": 0.7522543668746948, | |
| "eval_dev_precision": 0.5266422328728503, | |
| "eval_dev_recall": 0.6140024104305906, | |
| "eval_loss": 0.44245800375938416, | |
| "eval_runtime": 566.66, | |
| "eval_samples_per_second": 234.086, | |
| "eval_steps_per_second": 7.317, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.04796993883832798, | |
| "grad_norm": 20.27404022216797, | |
| "learning_rate": 4.795469686875417e-06, | |
| "loss": 0.4791, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.04930243713939265, | |
| "grad_norm": 26.697437286376953, | |
| "learning_rate": 4.92871419053964e-06, | |
| "loss": 0.4151, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.050634935440457315, | |
| "grad_norm": 29.9031982421875, | |
| "learning_rate": 5.061958694203864e-06, | |
| "loss": 0.4842, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.051967433741521976, | |
| "grad_norm": 33.03110885620117, | |
| "learning_rate": 5.195203197868088e-06, | |
| "loss": 0.4062, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.053299932042586644, | |
| "grad_norm": 18.199092864990234, | |
| "learning_rate": 5.328447701532313e-06, | |
| "loss": 0.4491, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.053299932042586644, | |
| "eval_dev_accuracy": 0.9464367833422542, | |
| "eval_dev_accuracy_threshold": 0.8940709829330444, | |
| "eval_dev_average_precision": 0.5665972703365837, | |
| "eval_dev_f1": 0.5783120410421486, | |
| "eval_dev_f1_threshold": 0.532160758972168, | |
| "eval_dev_precision": 0.5345420734542073, | |
| "eval_dev_recall": 0.6298893393228882, | |
| "eval_loss": 0.4243237376213074, | |
| "eval_runtime": 565.9608, | |
| "eval_samples_per_second": 234.375, | |
| "eval_steps_per_second": 7.326, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.05463243034365131, | |
| "grad_norm": 2.3968331813812256, | |
| "learning_rate": 5.461692205196536e-06, | |
| "loss": 0.3937, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.05596492864471598, | |
| "grad_norm": 3.501485586166382, | |
| "learning_rate": 5.59493670886076e-06, | |
| "loss": 0.4806, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.05729742694578065, | |
| "grad_norm": 20.607412338256836, | |
| "learning_rate": 5.728181212524984e-06, | |
| "loss": 0.4355, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.05862992524684531, | |
| "grad_norm": 11.288957595825195, | |
| "learning_rate": 5.861425716189208e-06, | |
| "loss": 0.4579, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.059962423547909975, | |
| "grad_norm": 23.52041244506836, | |
| "learning_rate": 5.9946702198534315e-06, | |
| "loss": 0.4232, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.059962423547909975, | |
| "eval_dev_accuracy": 0.9480651654390978, | |
| "eval_dev_accuracy_threshold": 0.925118088722229, | |
| "eval_dev_average_precision": 0.5978901727391869, | |
| "eval_dev_f1": 0.5897354160025502, | |
| "eval_dev_f1_threshold": 0.8643622994422913, | |
| "eval_dev_precision": 0.5724600309437855, | |
| "eval_dev_recall": 0.6080858989810453, | |
| "eval_loss": 0.4087965786457062, | |
| "eval_runtime": 564.9476, | |
| "eval_samples_per_second": 234.795, | |
| "eval_steps_per_second": 7.339, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.06129492184897464, | |
| "grad_norm": 8.910244941711426, | |
| "learning_rate": 6.127914723517655e-06, | |
| "loss": 0.4195, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.0626274201500393, | |
| "grad_norm": 10.253131866455078, | |
| "learning_rate": 6.261159227181879e-06, | |
| "loss": 0.4332, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.06395991845110398, | |
| "grad_norm": 6.71283483505249, | |
| "learning_rate": 6.394403730846103e-06, | |
| "loss": 0.433, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.06529241675216864, | |
| "grad_norm": 13.018428802490234, | |
| "learning_rate": 6.527648234510327e-06, | |
| "loss": 0.3978, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.06662491505323331, | |
| "grad_norm": 5.483168601989746, | |
| "learning_rate": 6.660892738174551e-06, | |
| "loss": 0.4165, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.06662491505323331, | |
| "eval_dev_accuracy": 0.9488642788755117, | |
| "eval_dev_accuracy_threshold": 0.9351813793182373, | |
| "eval_dev_average_precision": 0.611806667891919, | |
| "eval_dev_f1": 0.594213494881972, | |
| "eval_dev_f1_threshold": 0.8715409636497498, | |
| "eval_dev_precision": 0.5677078135914579, | |
| "eval_dev_recall": 0.6233154377122823, | |
| "eval_loss": 0.4015994369983673, | |
| "eval_runtime": 565.0203, | |
| "eval_samples_per_second": 234.765, | |
| "eval_steps_per_second": 7.338, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.06795741335429797, | |
| "grad_norm": 44.895931243896484, | |
| "learning_rate": 6.794137241838775e-06, | |
| "loss": 0.3173, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.06928991165536263, | |
| "grad_norm": 19.51112937927246, | |
| "learning_rate": 6.927381745502999e-06, | |
| "loss": 0.4279, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.07062240995642731, | |
| "grad_norm": 11.284177780151367, | |
| "learning_rate": 7.0606262491672225e-06, | |
| "loss": 0.4278, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.07195490825749197, | |
| "grad_norm": 12.088862419128418, | |
| "learning_rate": 7.193870752831446e-06, | |
| "loss": 0.394, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.07328740655855664, | |
| "grad_norm": 5.778110504150391, | |
| "learning_rate": 7.32711525649567e-06, | |
| "loss": 0.4033, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.07328740655855664, | |
| "eval_dev_accuracy": 0.9500780266421405, | |
| "eval_dev_accuracy_threshold": 0.9353994131088257, | |
| "eval_dev_average_precision": 0.6400665115573199, | |
| "eval_dev_f1": 0.6073723716004319, | |
| "eval_dev_f1_threshold": 0.8007456064224243, | |
| "eval_dev_precision": 0.5721619527314994, | |
| "eval_dev_recall": 0.6472006135641504, | |
| "eval_loss": 0.3888355791568756, | |
| "eval_runtime": 566.2917, | |
| "eval_samples_per_second": 234.238, | |
| "eval_steps_per_second": 7.321, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.0746199048596213, | |
| "grad_norm": 7.1619439125061035, | |
| "learning_rate": 7.460359760159894e-06, | |
| "loss": 0.3775, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.07595240316068597, | |
| "grad_norm": 12.566367149353027, | |
| "learning_rate": 7.593604263824118e-06, | |
| "loss": 0.3944, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.07728490146175064, | |
| "grad_norm": 10.173190116882324, | |
| "learning_rate": 7.726848767488342e-06, | |
| "loss": 0.4256, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.0786173997628153, | |
| "grad_norm": 1.7395318746566772, | |
| "learning_rate": 7.860093271152565e-06, | |
| "loss": 0.3984, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.07994989806387996, | |
| "grad_norm": 3.9586873054504395, | |
| "learning_rate": 7.99333777481679e-06, | |
| "loss": 0.3545, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.07994989806387996, | |
| "eval_dev_accuracy": 0.949716163953953, | |
| "eval_dev_accuracy_threshold": 0.9251655340194702, | |
| "eval_dev_average_precision": 0.6562730620483952, | |
| "eval_dev_f1": 0.6157150706828513, | |
| "eval_dev_f1_threshold": 0.4972879886627197, | |
| "eval_dev_precision": 0.5658281307381564, | |
| "eval_dev_recall": 0.6752492604360688, | |
| "eval_loss": 0.38393494486808777, | |
| "eval_runtime": 567.4221, | |
| "eval_samples_per_second": 233.771, | |
| "eval_steps_per_second": 7.307, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.08128239636494464, | |
| "grad_norm": 4.9177398681640625, | |
| "learning_rate": 8.126582278481013e-06, | |
| "loss": 0.4551, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.0826148946660093, | |
| "grad_norm": 14.003257751464844, | |
| "learning_rate": 8.259826782145237e-06, | |
| "loss": 0.3817, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.08394739296707397, | |
| "grad_norm": 7.29791259765625, | |
| "learning_rate": 8.39307128580946e-06, | |
| "loss": 0.408, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.08527989126813863, | |
| "grad_norm": 26.11504554748535, | |
| "learning_rate": 8.526315789473685e-06, | |
| "loss": 0.4176, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.0866123895692033, | |
| "grad_norm": 21.16114616394043, | |
| "learning_rate": 8.659560293137908e-06, | |
| "loss": 0.4128, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.0866123895692033, | |
| "eval_dev_accuracy": 0.9528523072515775, | |
| "eval_dev_accuracy_threshold": 0.9102756977081299, | |
| "eval_dev_average_precision": 0.6708215133735886, | |
| "eval_dev_f1": 0.6232578397212545, | |
| "eval_dev_f1_threshold": 0.6869294047355652, | |
| "eval_dev_precision": 0.6194134833892436, | |
| "eval_dev_recall": 0.6271502136518023, | |
| "eval_loss": 0.35909053683280945, | |
| "eval_runtime": 568.2468, | |
| "eval_samples_per_second": 233.432, | |
| "eval_steps_per_second": 7.296, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.08794488787026797, | |
| "grad_norm": 2.0384860038757324, | |
| "learning_rate": 8.792804796802133e-06, | |
| "loss": 0.3837, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.08927738617133263, | |
| "grad_norm": 9.94750690460205, | |
| "learning_rate": 8.926049300466355e-06, | |
| "loss": 0.3618, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.0906098844723973, | |
| "grad_norm": 3.8198211193084717, | |
| "learning_rate": 9.05929380413058e-06, | |
| "loss": 0.3643, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.09194238277346196, | |
| "grad_norm": 14.838878631591797, | |
| "learning_rate": 9.192538307794803e-06, | |
| "loss": 0.3409, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.09327488107452662, | |
| "grad_norm": 25.42053985595703, | |
| "learning_rate": 9.325782811459028e-06, | |
| "loss": 0.4001, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.09327488107452662, | |
| "eval_dev_accuracy": 0.9507640579884958, | |
| "eval_dev_accuracy_threshold": 0.7285012006759644, | |
| "eval_dev_average_precision": 0.6341417194028635, | |
| "eval_dev_f1": 0.6159875449616148, | |
| "eval_dev_f1_threshold": 0.3488144874572754, | |
| "eval_dev_precision": 0.6038947368421053, | |
| "eval_dev_recall": 0.628574559000767, | |
| "eval_loss": 0.46753522753715515, | |
| "eval_runtime": 566.0195, | |
| "eval_samples_per_second": 234.351, | |
| "eval_steps_per_second": 7.325, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.0946073793755913, | |
| "grad_norm": 21.88748550415039, | |
| "learning_rate": 9.459027315123252e-06, | |
| "loss": 0.4186, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.09593987767665596, | |
| "grad_norm": 5.960207939147949, | |
| "learning_rate": 9.592271818787475e-06, | |
| "loss": 0.3478, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.09727237597772063, | |
| "grad_norm": 16.917625427246094, | |
| "learning_rate": 9.7255163224517e-06, | |
| "loss": 0.3492, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.0986048742787853, | |
| "grad_norm": 14.463135719299316, | |
| "learning_rate": 9.858760826115924e-06, | |
| "loss": 0.3522, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.09993737257984996, | |
| "grad_norm": 3.8919215202331543, | |
| "learning_rate": 9.992005329780147e-06, | |
| "loss": 0.3445, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.09993737257984996, | |
| "eval_dev_accuracy": 0.9513897788868199, | |
| "eval_dev_accuracy_threshold": 0.933416485786438, | |
| "eval_dev_average_precision": 0.672072308065407, | |
| "eval_dev_f1": 0.6241289651586063, | |
| "eval_dev_f1_threshold": 0.4514094591140747, | |
| "eval_dev_precision": 0.5939819855488468, | |
| "eval_dev_recall": 0.6574997260874329, | |
| "eval_loss": 0.4463006556034088, | |
| "eval_runtime": 565.9962, | |
| "eval_samples_per_second": 234.36, | |
| "eval_steps_per_second": 7.325, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.10126987088091463, | |
| "grad_norm": 5.6276421546936035, | |
| "learning_rate": 1.012524983344437e-05, | |
| "loss": 0.3611, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.10260236918197929, | |
| "grad_norm": 25.222440719604492, | |
| "learning_rate": 1.0258494337108595e-05, | |
| "loss": 0.3694, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.10393486748304395, | |
| "grad_norm": 10.44590950012207, | |
| "learning_rate": 1.0391738840772818e-05, | |
| "loss": 0.34, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.10526736578410863, | |
| "grad_norm": 15.12126350402832, | |
| "learning_rate": 1.0524983344437042e-05, | |
| "loss": 0.3839, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.10659986408517329, | |
| "grad_norm": 10.425951957702637, | |
| "learning_rate": 1.0658227848101265e-05, | |
| "loss": 0.3408, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.10659986408517329, | |
| "eval_dev_accuracy": 0.947168047524633, | |
| "eval_dev_accuracy_threshold": 0.9196346402168274, | |
| "eval_dev_average_precision": 0.6278546311695713, | |
| "eval_dev_f1": 0.5864126161957174, | |
| "eval_dev_f1_threshold": 0.5887953042984009, | |
| "eval_dev_precision": 0.5073623559539052, | |
| "eval_dev_recall": 0.6946422701873562, | |
| "eval_loss": 0.36468541622161865, | |
| "eval_runtime": 566.1553, | |
| "eval_samples_per_second": 234.294, | |
| "eval_steps_per_second": 7.323, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.10793236238623796, | |
| "grad_norm": 19.852497100830078, | |
| "learning_rate": 1.079147235176549e-05, | |
| "loss": 0.4011, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.10926486068730262, | |
| "grad_norm": 66.98611450195312, | |
| "learning_rate": 1.0924716855429713e-05, | |
| "loss": 0.3037, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.11059735898836728, | |
| "grad_norm": 2.033569812774658, | |
| "learning_rate": 1.1057961359093938e-05, | |
| "loss": 0.3632, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.11192985728943196, | |
| "grad_norm": 1.7951024770736694, | |
| "learning_rate": 1.1191205862758164e-05, | |
| "loss": 0.3878, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.11326235559049662, | |
| "grad_norm": 3.2986645698547363, | |
| "learning_rate": 1.1324450366422385e-05, | |
| "loss": 0.3849, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.11326235559049662, | |
| "eval_dev_accuracy": 0.9516611759029605, | |
| "eval_dev_accuracy_threshold": 0.9289531707763672, | |
| "eval_dev_average_precision": 0.6795165568410317, | |
| "eval_dev_f1": 0.6335993534700474, | |
| "eval_dev_f1_threshold": 0.7476029396057129, | |
| "eval_dev_precision": 0.5877612220035611, | |
| "eval_dev_recall": 0.6871918483620029, | |
| "eval_loss": 0.3556542694568634, | |
| "eval_runtime": 568.4381, | |
| "eval_samples_per_second": 233.353, | |
| "eval_steps_per_second": 7.294, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.1145948538915613, | |
| "grad_norm": 1.4321446418762207, | |
| "learning_rate": 1.1457694870086611e-05, | |
| "loss": 0.3833, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.11592735219262595, | |
| "grad_norm": 54.76650619506836, | |
| "learning_rate": 1.1590939373750833e-05, | |
| "loss": 0.3797, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.11725985049369061, | |
| "grad_norm": 31.644800186157227, | |
| "learning_rate": 1.1724183877415059e-05, | |
| "loss": 0.3705, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.11859234879475529, | |
| "grad_norm": 10.417598724365234, | |
| "learning_rate": 1.1857428381079282e-05, | |
| "loss": 0.3556, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.11992484709581995, | |
| "grad_norm": 9.85118579864502, | |
| "learning_rate": 1.1990672884743507e-05, | |
| "loss": 0.3771, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.11992484709581995, | |
| "eval_dev_accuracy": 0.955196875918792, | |
| "eval_dev_accuracy_threshold": 0.9099207520484924, | |
| "eval_dev_average_precision": 0.7086261480764138, | |
| "eval_dev_f1": 0.6543492478744277, | |
| "eval_dev_f1_threshold": 0.8123365640640259, | |
| "eval_dev_precision": 0.6510139898058779, | |
| "eval_dev_recall": 0.6577188561411198, | |
| "eval_loss": 0.33506301045417786, | |
| "eval_runtime": 567.679, | |
| "eval_samples_per_second": 233.665, | |
| "eval_steps_per_second": 7.303, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.12125734539688462, | |
| "grad_norm": 22.609596252441406, | |
| "learning_rate": 1.212391738840773e-05, | |
| "loss": 0.3649, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.12258984369794929, | |
| "grad_norm": 13.67054271697998, | |
| "learning_rate": 1.2257161892071954e-05, | |
| "loss": 0.3687, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.12392234199901395, | |
| "grad_norm": 11.858447074890137, | |
| "learning_rate": 1.2390406395736177e-05, | |
| "loss": 0.406, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.1252548403000786, | |
| "grad_norm": 22.195842742919922, | |
| "learning_rate": 1.2523650899400402e-05, | |
| "loss": 0.3362, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.1265873386011433, | |
| "grad_norm": 1.6114740371704102, | |
| "learning_rate": 1.2656895403064625e-05, | |
| "loss": 0.2749, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.1265873386011433, | |
| "eval_dev_accuracy": 0.9535986490459641, | |
| "eval_dev_accuracy_threshold": 0.9009051322937012, | |
| "eval_dev_average_precision": 0.6983138303648807, | |
| "eval_dev_f1": 0.6486718540381003, | |
| "eval_dev_f1_threshold": 0.7954304218292236, | |
| "eval_dev_precision": 0.6356752208666386, | |
| "eval_dev_recall": 0.6622110222417005, | |
| "eval_loss": 0.35359007120132446, | |
| "eval_runtime": 567.3028, | |
| "eval_samples_per_second": 233.82, | |
| "eval_steps_per_second": 7.308, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.12791983690220796, | |
| "grad_norm": 15.079890251159668, | |
| "learning_rate": 1.279013990672885e-05, | |
| "loss": 0.3038, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.12925233520327262, | |
| "grad_norm": 19.459815979003906, | |
| "learning_rate": 1.2923384410393072e-05, | |
| "loss": 0.3273, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.13058483350433728, | |
| "grad_norm": 21.132827758789062, | |
| "learning_rate": 1.3056628914057297e-05, | |
| "loss": 0.3823, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.13191733180540194, | |
| "grad_norm": 4.1918158531188965, | |
| "learning_rate": 1.318987341772152e-05, | |
| "loss": 0.3406, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.13324983010646663, | |
| "grad_norm": 22.806039810180664, | |
| "learning_rate": 1.3323117921385744e-05, | |
| "loss": 0.4069, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.13324983010646663, | |
| "eval_dev_accuracy": 0.9545711550204679, | |
| "eval_dev_accuracy_threshold": 0.9562267065048218, | |
| "eval_dev_average_precision": 0.7100922283297543, | |
| "eval_dev_f1": 0.6518728053062817, | |
| "eval_dev_f1_threshold": 0.8285595178604126, | |
| "eval_dev_precision": 0.5874132020743605, | |
| "eval_dev_recall": 0.7322230743946532, | |
| "eval_loss": 0.3286122977733612, | |
| "eval_runtime": 566.8451, | |
| "eval_samples_per_second": 234.009, | |
| "eval_steps_per_second": 7.314, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.1345823284075313, | |
| "grad_norm": 12.236410140991211, | |
| "learning_rate": 1.3456362425049967e-05, | |
| "loss": 0.3618, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.13591482670859595, | |
| "grad_norm": 5.4430060386657715, | |
| "learning_rate": 1.3589606928714192e-05, | |
| "loss": 0.3619, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.1372473250096606, | |
| "grad_norm": 13.798270225524902, | |
| "learning_rate": 1.3722851432378415e-05, | |
| "loss": 0.3413, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.13857982331072527, | |
| "grad_norm": 3.899458169937134, | |
| "learning_rate": 1.385609593604264e-05, | |
| "loss": 0.3374, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.13991232161178996, | |
| "grad_norm": 6.147464752197266, | |
| "learning_rate": 1.3989340439706862e-05, | |
| "loss": 0.3725, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.13991232161178996, | |
| "eval_dev_accuracy": 0.9561316878632762, | |
| "eval_dev_accuracy_threshold": 0.9145029187202454, | |
| "eval_dev_average_precision": 0.7194237355423562, | |
| "eval_dev_f1": 0.6639100398366194, | |
| "eval_dev_f1_threshold": 0.7489595413208008, | |
| "eval_dev_precision": 0.6150037369207773, | |
| "eval_dev_recall": 0.7212665717103101, | |
| "eval_loss": 0.30797863006591797, | |
| "eval_runtime": 567.191, | |
| "eval_samples_per_second": 233.867, | |
| "eval_steps_per_second": 7.31, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.14124481991285462, | |
| "grad_norm": 0.32248708605766296, | |
| "learning_rate": 1.4122584943371087e-05, | |
| "loss": 0.3289, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.14257731821391928, | |
| "grad_norm": 3.342273235321045, | |
| "learning_rate": 1.4255829447035312e-05, | |
| "loss": 0.335, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.14390981651498394, | |
| "grad_norm": 5.640665531158447, | |
| "learning_rate": 1.4389073950699535e-05, | |
| "loss": 0.3298, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.1452423148160486, | |
| "grad_norm": 1.3349778652191162, | |
| "learning_rate": 1.452231845436376e-05, | |
| "loss": 0.3805, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.1465748131171133, | |
| "grad_norm": 8.876007080078125, | |
| "learning_rate": 1.4655562958027982e-05, | |
| "loss": 0.3545, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.1465748131171133, | |
| "eval_dev_accuracy": 0.9554607341289286, | |
| "eval_dev_accuracy_threshold": 0.8000156283378601, | |
| "eval_dev_average_precision": 0.706945036170435, | |
| "eval_dev_f1": 0.6470619459631616, | |
| "eval_dev_f1_threshold": 0.35837632417678833, | |
| "eval_dev_precision": 0.6275741350906096, | |
| "eval_dev_recall": 0.6677988386107154, | |
| "eval_loss": 0.40842413902282715, | |
| "eval_runtime": 566.601, | |
| "eval_samples_per_second": 234.11, | |
| "eval_steps_per_second": 7.317, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.14790731141817795, | |
| "grad_norm": 1.4081681966781616, | |
| "learning_rate": 1.4788807461692207e-05, | |
| "loss": 0.3591, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.1492398097192426, | |
| "grad_norm": 15.024413108825684, | |
| "learning_rate": 1.492205196535643e-05, | |
| "loss": 0.3523, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.15057230802030727, | |
| "grad_norm": 18.281108856201172, | |
| "learning_rate": 1.5055296469020654e-05, | |
| "loss": 0.3601, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.15190480632137193, | |
| "grad_norm": 6.56211519241333, | |
| "learning_rate": 1.5188540972684877e-05, | |
| "loss": 0.3365, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.1532373046224366, | |
| "grad_norm": 43.26646041870117, | |
| "learning_rate": 1.5321785476349102e-05, | |
| "loss": 0.3859, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.1532373046224366, | |
| "eval_dev_accuracy": 0.9578354580201588, | |
| "eval_dev_accuracy_threshold": 0.7837315797805786, | |
| "eval_dev_average_precision": 0.7318177300477213, | |
| "eval_dev_f1": 0.6784168212739641, | |
| "eval_dev_f1_threshold": 0.5022754669189453, | |
| "eval_dev_precision": 0.6404592779994162, | |
| "eval_dev_recall": 0.7211570066834666, | |
| "eval_loss": 0.3321084976196289, | |
| "eval_runtime": 566.5331, | |
| "eval_samples_per_second": 234.138, | |
| "eval_steps_per_second": 7.318, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.15456980292350128, | |
| "grad_norm": 28.52861785888672, | |
| "learning_rate": 1.5455029980013325e-05, | |
| "loss": 0.3581, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.15590230122456594, | |
| "grad_norm": 7.024416923522949, | |
| "learning_rate": 1.558827448367755e-05, | |
| "loss": 0.3133, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.1572347995256306, | |
| "grad_norm": 0.6226129531860352, | |
| "learning_rate": 1.5721518987341774e-05, | |
| "loss": 0.295, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.15856729782669526, | |
| "grad_norm": 1.0621097087860107, | |
| "learning_rate": 1.5854763491005997e-05, | |
| "loss": 0.3027, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.15989979612775992, | |
| "grad_norm": 1.318295955657959, | |
| "learning_rate": 1.598800799467022e-05, | |
| "loss": 0.3216, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.15989979612775992, | |
| "eval_dev_accuracy": 0.9555587386069794, | |
| "eval_dev_accuracy_threshold": 0.8190538287162781, | |
| "eval_dev_average_precision": 0.7133029701747852, | |
| "eval_dev_f1": 0.6531785971038309, | |
| "eval_dev_f1_threshold": 0.3298466205596924, | |
| "eval_dev_precision": 0.6146709191069876, | |
| "eval_dev_recall": 0.6968335707242248, | |
| "eval_loss": 0.3916049897670746, | |
| "eval_runtime": 554.3199, | |
| "eval_samples_per_second": 239.297, | |
| "eval_steps_per_second": 7.479, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.1612322944288246, | |
| "grad_norm": 9.81628704071045, | |
| "learning_rate": 1.6121252498334446e-05, | |
| "loss": 0.3522, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.16256479272988927, | |
| "grad_norm": 4.447005271911621, | |
| "learning_rate": 1.625449700199867e-05, | |
| "loss": 0.3266, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.16389729103095393, | |
| "grad_norm": 14.646246910095215, | |
| "learning_rate": 1.6387741505662892e-05, | |
| "loss": 0.3292, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.1652297893320186, | |
| "grad_norm": 16.482669830322266, | |
| "learning_rate": 1.6520986009327115e-05, | |
| "loss": 0.3446, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.16656228763308326, | |
| "grad_norm": 7.77319860458374, | |
| "learning_rate": 1.665423051299134e-05, | |
| "loss": 0.3236, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.16656228763308326, | |
| "eval_dev_accuracy": 0.9569684953297097, | |
| "eval_dev_accuracy_threshold": 0.9383260011672974, | |
| "eval_dev_average_precision": 0.7292398003419696, | |
| "eval_dev_f1": 0.66701062841812, | |
| "eval_dev_f1_threshold": 0.8000765442848206, | |
| "eval_dev_precision": 0.6303266699171136, | |
| "eval_dev_recall": 0.7082283335159417, | |
| "eval_loss": 0.35466820001602173, | |
| "eval_runtime": 561.336, | |
| "eval_samples_per_second": 236.306, | |
| "eval_steps_per_second": 7.386, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.16789478593414794, | |
| "grad_norm": 6.646021366119385, | |
| "learning_rate": 1.6787475016655564e-05, | |
| "loss": 0.3134, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.1692272842352126, | |
| "grad_norm": 87.47698211669922, | |
| "learning_rate": 1.6920719520319787e-05, | |
| "loss": 0.3249, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.17055978253627727, | |
| "grad_norm": 17.500768661499023, | |
| "learning_rate": 1.705396402398401e-05, | |
| "loss": 0.3811, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.17189228083734193, | |
| "grad_norm": 7.166949272155762, | |
| "learning_rate": 1.7187208527648237e-05, | |
| "loss": 0.3127, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.1732247791384066, | |
| "grad_norm": 4.106062889099121, | |
| "learning_rate": 1.732045303131246e-05, | |
| "loss": 0.3219, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.1732247791384066, | |
| "eval_dev_accuracy": 0.9564407789094364, | |
| "eval_dev_accuracy_threshold": 0.8795315623283386, | |
| "eval_dev_average_precision": 0.726842832162545, | |
| "eval_dev_f1": 0.6643535054597408, | |
| "eval_dev_f1_threshold": 0.4876420497894287, | |
| "eval_dev_precision": 0.621717123483908, | |
| "eval_dev_recall": 0.7132683247507395, | |
| "eval_loss": 0.3564859926700592, | |
| "eval_runtime": 558.1535, | |
| "eval_samples_per_second": 237.653, | |
| "eval_steps_per_second": 7.428, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.17455727743947128, | |
| "grad_norm": 1.3857766389846802, | |
| "learning_rate": 1.7453697534976682e-05, | |
| "loss": 0.3526, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.17588977574053594, | |
| "grad_norm": 20.39262580871582, | |
| "learning_rate": 1.758694203864091e-05, | |
| "loss": 0.3299, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.1772222740416006, | |
| "grad_norm": 18.849407196044922, | |
| "learning_rate": 1.772018654230513e-05, | |
| "loss": 0.3303, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.17855477234266526, | |
| "grad_norm": 42.82183837890625, | |
| "learning_rate": 1.7853431045969355e-05, | |
| "loss": 0.3739, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.17988727064372992, | |
| "grad_norm": 4.524885654449463, | |
| "learning_rate": 1.7986675549633577e-05, | |
| "loss": 0.3544, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.17988727064372992, | |
| "eval_dev_accuracy": 0.9572021983158308, | |
| "eval_dev_accuracy_threshold": 0.9545025825500488, | |
| "eval_dev_average_precision": 0.7351364884979171, | |
| "eval_dev_f1": 0.6692303640099035, | |
| "eval_dev_f1_threshold": 0.7856150269508362, | |
| "eval_dev_precision": 0.6444805194805194, | |
| "eval_dev_recall": 0.6959570505094774, | |
| "eval_loss": 0.33416542410850525, | |
| "eval_runtime": 558.7974, | |
| "eval_samples_per_second": 237.379, | |
| "eval_steps_per_second": 7.42, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.1812197689447946, | |
| "grad_norm": 2.0763137340545654, | |
| "learning_rate": 1.8119920053297804e-05, | |
| "loss": 0.3584, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.18255226724585927, | |
| "grad_norm": 4.722475051879883, | |
| "learning_rate": 1.8253164556962027e-05, | |
| "loss": 0.341, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.18388476554692393, | |
| "grad_norm": 4.084864139556885, | |
| "learning_rate": 1.838640906062625e-05, | |
| "loss": 0.3371, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.1852172638479886, | |
| "grad_norm": 0.32559067010879517, | |
| "learning_rate": 1.8519653564290473e-05, | |
| "loss": 0.3322, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.18654976214905325, | |
| "grad_norm": 9.505677223205566, | |
| "learning_rate": 1.86528980679547e-05, | |
| "loss": 0.3493, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.18654976214905325, | |
| "eval_dev_accuracy": 0.9565010893574676, | |
| "eval_dev_accuracy_threshold": 0.9463940858840942, | |
| "eval_dev_average_precision": 0.7367044841602028, | |
| "eval_dev_f1": 0.6669865642994243, | |
| "eval_dev_f1_threshold": 0.8662494421005249, | |
| "eval_dev_precision": 0.6496001661647107, | |
| "eval_dev_recall": 0.6853292429056645, | |
| "eval_loss": 0.29620230197906494, | |
| "eval_runtime": 559.2923, | |
| "eval_samples_per_second": 237.169, | |
| "eval_steps_per_second": 7.413, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.18788226045011794, | |
| "grad_norm": 19.357847213745117, | |
| "learning_rate": 1.8786142571618922e-05, | |
| "loss": 0.3021, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.1892147587511826, | |
| "grad_norm": 0.8998715281486511, | |
| "learning_rate": 1.8919387075283148e-05, | |
| "loss": 0.3249, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.19054725705224726, | |
| "grad_norm": 17.16973304748535, | |
| "learning_rate": 1.9052631578947368e-05, | |
| "loss": 0.3389, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.19187975535331192, | |
| "grad_norm": 1.553682565689087, | |
| "learning_rate": 1.9185876082611594e-05, | |
| "loss": 0.3547, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.19321225365437658, | |
| "grad_norm": 10.778045654296875, | |
| "learning_rate": 1.9319120586275817e-05, | |
| "loss": 0.3205, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.19321225365437658, | |
| "eval_dev_accuracy": 0.9589587401147406, | |
| "eval_dev_accuracy_threshold": 0.9295341968536377, | |
| "eval_dev_average_precision": 0.7537310584722261, | |
| "eval_dev_f1": 0.6830523319465732, | |
| "eval_dev_f1_threshold": 0.7479926347732544, | |
| "eval_dev_precision": 0.6825292637567005, | |
| "eval_dev_recall": 0.6835762024761696, | |
| "eval_loss": 0.3468180298805237, | |
| "eval_runtime": 558.8191, | |
| "eval_samples_per_second": 237.37, | |
| "eval_steps_per_second": 7.419, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.19454475195544127, | |
| "grad_norm": 2.241529941558838, | |
| "learning_rate": 1.9452365089940043e-05, | |
| "loss": 0.3168, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.19587725025650593, | |
| "grad_norm": 1.1848278045654297, | |
| "learning_rate": 1.9585609593604263e-05, | |
| "loss": 0.3348, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.1972097485575706, | |
| "grad_norm": 16.031787872314453, | |
| "learning_rate": 1.971885409726849e-05, | |
| "loss": 0.3237, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.19854224685863525, | |
| "grad_norm": 12.078638076782227, | |
| "learning_rate": 1.9852098600932712e-05, | |
| "loss": 0.3428, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.1998747451596999, | |
| "grad_norm": 5.735422134399414, | |
| "learning_rate": 1.998534310459694e-05, | |
| "loss": 0.3179, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.1998747451596999, | |
| "eval_dev_accuracy": 0.9547219311405459, | |
| "eval_dev_accuracy_threshold": 0.9576058387756348, | |
| "eval_dev_average_precision": 0.6908515568536635, | |
| "eval_dev_f1": 0.6764229341974599, | |
| "eval_dev_f1_threshold": 0.926771879196167, | |
| "eval_dev_precision": 0.6467119728163102, | |
| "eval_dev_recall": 0.7089952887038458, | |
| "eval_loss": 0.34104466438293457, | |
| "eval_runtime": 561.385, | |
| "eval_samples_per_second": 236.285, | |
| "eval_steps_per_second": 7.385, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.2012072434607646, | |
| "grad_norm": 19.35861587524414, | |
| "learning_rate": 1.9986823013828432e-05, | |
| "loss": 0.3409, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.20253974176182926, | |
| "grad_norm": 35.545223236083984, | |
| "learning_rate": 1.997201741138847e-05, | |
| "loss": 0.331, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.20387224006289392, | |
| "grad_norm": 17.14919662475586, | |
| "learning_rate": 1.9957211808948506e-05, | |
| "loss": 0.3493, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.20520473836395858, | |
| "grad_norm": 2.735530138015747, | |
| "learning_rate": 1.9942406206508544e-05, | |
| "loss": 0.3205, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.20653723666502324, | |
| "grad_norm": 1.0762556791305542, | |
| "learning_rate": 1.9927600604068582e-05, | |
| "loss": 0.3307, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.20653723666502324, | |
| "eval_dev_accuracy": 0.9532292475517727, | |
| "eval_dev_accuracy_threshold": 0.8670874238014221, | |
| "eval_dev_average_precision": 0.6865319564110608, | |
| "eval_dev_f1": 0.660230457801308, | |
| "eval_dev_f1_threshold": 0.6122031211853027, | |
| "eval_dev_precision": 0.6272807969227735, | |
| "eval_dev_recall": 0.6968335707242248, | |
| "eval_loss": 0.3867639899253845, | |
| "eval_runtime": 563.4002, | |
| "eval_samples_per_second": 235.44, | |
| "eval_steps_per_second": 7.359, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.2078697349660879, | |
| "grad_norm": 8.77493953704834, | |
| "learning_rate": 1.9912795001628617e-05, | |
| "loss": 0.3683, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.2092022332671526, | |
| "grad_norm": 0.7768261432647705, | |
| "learning_rate": 1.9897989399188656e-05, | |
| "loss": 0.318, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.21053473156821725, | |
| "grad_norm": 12.180807113647461, | |
| "learning_rate": 1.988318379674869e-05, | |
| "loss": 0.3498, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.21186722986928191, | |
| "grad_norm": 4.719166278839111, | |
| "learning_rate": 1.986837819430873e-05, | |
| "loss": 0.3043, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.21319972817034658, | |
| "grad_norm": 6.112349987030029, | |
| "learning_rate": 1.9853572591868764e-05, | |
| "loss": 0.3393, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.21319972817034658, | |
| "eval_dev_accuracy": 0.9534780281499016, | |
| "eval_dev_accuracy_threshold": 0.9171842336654663, | |
| "eval_dev_average_precision": 0.7089238013031434, | |
| "eval_dev_f1": 0.6696855863736944, | |
| "eval_dev_f1_threshold": 0.8700560331344604, | |
| "eval_dev_precision": 0.6313797787696488, | |
| "eval_dev_recall": 0.7129396296702093, | |
| "eval_loss": 0.33279770612716675, | |
| "eval_runtime": 559.9008, | |
| "eval_samples_per_second": 236.912, | |
| "eval_steps_per_second": 7.405, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.21453222647141124, | |
| "grad_norm": 47.89255905151367, | |
| "learning_rate": 1.9838766989428802e-05, | |
| "loss": 0.3326, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.21586472477247592, | |
| "grad_norm": 6.826938152313232, | |
| "learning_rate": 1.982396138698884e-05, | |
| "loss": 0.3094, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.21719722307354059, | |
| "grad_norm": 13.803174018859863, | |
| "learning_rate": 1.9809155784548875e-05, | |
| "loss": 0.3572, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.21852972137460525, | |
| "grad_norm": 7.402415752410889, | |
| "learning_rate": 1.9794350182108914e-05, | |
| "loss": 0.3447, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.2198622196756699, | |
| "grad_norm": 28.723724365234375, | |
| "learning_rate": 1.977954457966895e-05, | |
| "loss": 0.3484, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.2198622196756699, | |
| "eval_dev_accuracy": 0.9592753699669047, | |
| "eval_dev_accuracy_threshold": 0.8685222864151001, | |
| "eval_dev_average_precision": 0.7462578581871518, | |
| "eval_dev_f1": 0.6837169650468883, | |
| "eval_dev_f1_threshold": 0.35429614782333374, | |
| "eval_dev_precision": 0.6654911316253501, | |
| "eval_dev_recall": 0.702969212227457, | |
| "eval_loss": 0.3938925862312317, | |
| "eval_runtime": 560.3292, | |
| "eval_samples_per_second": 236.73, | |
| "eval_steps_per_second": 7.399, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.22119471797673457, | |
| "grad_norm": 16.560897827148438, | |
| "learning_rate": 1.9764738977228987e-05, | |
| "loss": 0.3182, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.22252721627779926, | |
| "grad_norm": 1.3701841831207275, | |
| "learning_rate": 1.9749933374789022e-05, | |
| "loss": 0.283, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.22385971457886392, | |
| "grad_norm": 12.799971580505371, | |
| "learning_rate": 1.973512777234906e-05, | |
| "loss": 0.3134, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.22519221287992858, | |
| "grad_norm": 4.794546127319336, | |
| "learning_rate": 1.9720322169909095e-05, | |
| "loss": 0.3454, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.22652471118099324, | |
| "grad_norm": 21.59016990661621, | |
| "learning_rate": 1.970551656746913e-05, | |
| "loss": 0.3485, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.22652471118099324, | |
| "eval_dev_accuracy": 0.9593356804149359, | |
| "eval_dev_accuracy_threshold": 0.9145892858505249, | |
| "eval_dev_average_precision": 0.747695886787644, | |
| "eval_dev_f1": 0.6768515829218704, | |
| "eval_dev_f1_threshold": 0.8686491847038269, | |
| "eval_dev_precision": 0.7203264094955489, | |
| "eval_dev_recall": 0.6383258463898324, | |
| "eval_loss": 0.34743377566337585, | |
| "eval_runtime": 563.0506, | |
| "eval_samples_per_second": 235.586, | |
| "eval_steps_per_second": 7.363, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.2278572094820579, | |
| "grad_norm": 14.225733757019043, | |
| "learning_rate": 1.969071096502917e-05, | |
| "loss": 0.2942, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.2291897077831226, | |
| "grad_norm": 7.983681678771973, | |
| "learning_rate": 1.9675905362589203e-05, | |
| "loss": 0.2865, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.23052220608418725, | |
| "grad_norm": 2.1385481357574463, | |
| "learning_rate": 1.9661099760149242e-05, | |
| "loss": 0.3489, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.2318547043852519, | |
| "grad_norm": 12.413968086242676, | |
| "learning_rate": 1.9646294157709277e-05, | |
| "loss": 0.2959, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.23318720268631657, | |
| "grad_norm": 15.191158294677734, | |
| "learning_rate": 1.9631488555269315e-05, | |
| "loss": 0.3626, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.23318720268631657, | |
| "eval_dev_accuracy": 0.959637232655092, | |
| "eval_dev_accuracy_threshold": 0.9243228435516357, | |
| "eval_dev_average_precision": 0.7598083206267562, | |
| "eval_dev_f1": 0.6903569873748368, | |
| "eval_dev_f1_threshold": 0.7939244508743286, | |
| "eval_dev_precision": 0.6858038706887231, | |
| "eval_dev_recall": 0.6949709652678865, | |
| "eval_loss": 0.28921985626220703, | |
| "eval_runtime": 562.3661, | |
| "eval_samples_per_second": 235.873, | |
| "eval_steps_per_second": 7.372, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.23451970098738123, | |
| "grad_norm": 2.7491862773895264, | |
| "learning_rate": 1.961668295282935e-05, | |
| "loss": 0.3107, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.23585219928844592, | |
| "grad_norm": 52.241886138916016, | |
| "learning_rate": 1.960187735038939e-05, | |
| "loss": 0.2914, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.23718469758951058, | |
| "grad_norm": 11.401723861694336, | |
| "learning_rate": 1.9587071747949427e-05, | |
| "loss": 0.3298, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.23851719589057524, | |
| "grad_norm": 4.170936107635498, | |
| "learning_rate": 1.957226614550946e-05, | |
| "loss": 0.3315, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.2398496941916399, | |
| "grad_norm": 5.668073654174805, | |
| "learning_rate": 1.95574605430695e-05, | |
| "loss": 0.3097, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.2398496941916399, | |
| "eval_dev_accuracy": 0.9607454371376661, | |
| "eval_dev_accuracy_threshold": 0.9465240836143494, | |
| "eval_dev_average_precision": 0.7633624538366494, | |
| "eval_dev_f1": 0.6969561824060653, | |
| "eval_dev_f1_threshold": 0.8802664279937744, | |
| "eval_dev_precision": 0.7094540914765634, | |
| "eval_dev_recall": 0.6848909827982907, | |
| "eval_loss": 0.2974649667739868, | |
| "eval_runtime": 565.9458, | |
| "eval_samples_per_second": 234.381, | |
| "eval_steps_per_second": 7.326, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.24118219249270456, | |
| "grad_norm": 18.609146118164062, | |
| "learning_rate": 1.9542654940629535e-05, | |
| "loss": 0.3474, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.24251469079376925, | |
| "grad_norm": 5.154010772705078, | |
| "learning_rate": 1.9527849338189573e-05, | |
| "loss": 0.2917, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.2438471890948339, | |
| "grad_norm": 9.549324035644531, | |
| "learning_rate": 1.9513043735749608e-05, | |
| "loss": 0.3557, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.24517968739589857, | |
| "grad_norm": 1.6343746185302734, | |
| "learning_rate": 1.9498238133309647e-05, | |
| "loss": 0.3394, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.24651218569696323, | |
| "grad_norm": 9.207841873168945, | |
| "learning_rate": 1.9483432530869685e-05, | |
| "loss": 0.2891, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.24651218569696323, | |
| "eval_dev_accuracy": 0.9630900058048806, | |
| "eval_dev_accuracy_threshold": 0.9168897271156311, | |
| "eval_dev_average_precision": 0.7806456437261002, | |
| "eval_dev_f1": 0.7187227550130775, | |
| "eval_dev_f1_threshold": 0.6090723276138306, | |
| "eval_dev_precision": 0.7149051490514905, | |
| "eval_dev_recall": 0.7225813520324312, | |
| "eval_loss": 0.3342207372188568, | |
| "eval_runtime": 562.3653, | |
| "eval_samples_per_second": 235.873, | |
| "eval_steps_per_second": 7.372, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.2478446839980279, | |
| "grad_norm": 30.978008270263672, | |
| "learning_rate": 1.946862692842972e-05, | |
| "loss": 0.2514, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.24917718229909258, | |
| "grad_norm": 26.20627784729004, | |
| "learning_rate": 1.9453821325989758e-05, | |
| "loss": 0.3139, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.2505096806001572, | |
| "grad_norm": 29.30525779724121, | |
| "learning_rate": 1.9439015723549793e-05, | |
| "loss": 0.2896, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.2518421789012219, | |
| "grad_norm": 1.5062270164489746, | |
| "learning_rate": 1.942421012110983e-05, | |
| "loss": 0.3161, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.2531746772022866, | |
| "grad_norm": 5.7221784591674805, | |
| "learning_rate": 1.9409404518669866e-05, | |
| "loss": 0.3331, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.2531746772022866, | |
| "eval_dev_accuracy": 0.9605946610175881, | |
| "eval_dev_accuracy_threshold": 0.8986555337905884, | |
| "eval_dev_average_precision": 0.7733706796615292, | |
| "eval_dev_f1": 0.7069406003832233, | |
| "eval_dev_f1_threshold": 0.5075786113739014, | |
| "eval_dev_precision": 0.6874029603560708, | |
| "eval_dev_recall": 0.727621343267229, | |
| "eval_loss": 0.333689421415329, | |
| "eval_runtime": 564.7555, | |
| "eval_samples_per_second": 234.875, | |
| "eval_steps_per_second": 7.341, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.2545071755033512, | |
| "grad_norm": 0.41423532366752625, | |
| "learning_rate": 1.9394598916229905e-05, | |
| "loss": 0.2959, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.2558396738044159, | |
| "grad_norm": 18.290435791015625, | |
| "learning_rate": 1.937979331378994e-05, | |
| "loss": 0.3236, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.25717217210548055, | |
| "grad_norm": 1.2307929992675781, | |
| "learning_rate": 1.9364987711349975e-05, | |
| "loss": 0.3527, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.25850467040654523, | |
| "grad_norm": 1.151492714881897, | |
| "learning_rate": 1.9350182108910013e-05, | |
| "loss": 0.3106, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.2598371687076099, | |
| "grad_norm": 1.676810383796692, | |
| "learning_rate": 1.9335376506470048e-05, | |
| "loss": 0.3271, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.2598371687076099, | |
| "eval_dev_accuracy": 0.9614239296780176, | |
| "eval_dev_accuracy_threshold": 0.9547422528266907, | |
| "eval_dev_average_precision": 0.7587239294098156, | |
| "eval_dev_f1": 0.7068855932203391, | |
| "eval_dev_f1_threshold": 0.7505875825881958, | |
| "eval_dev_precision": 0.6841997334153593, | |
| "eval_dev_recall": 0.7311274241262189, | |
| "eval_loss": 0.3253738582134247, | |
| "eval_runtime": 562.4242, | |
| "eval_samples_per_second": 235.849, | |
| "eval_steps_per_second": 7.372, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.26116966700867456, | |
| "grad_norm": 18.608806610107422, | |
| "learning_rate": 1.9320570904030086e-05, | |
| "loss": 0.3504, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.26250216530973924, | |
| "grad_norm": 20.453174591064453, | |
| "learning_rate": 1.930576530159012e-05, | |
| "loss": 0.303, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.2638346636108039, | |
| "grad_norm": 5.1661248207092285, | |
| "learning_rate": 1.929095969915016e-05, | |
| "loss": 0.2478, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.26516716191186857, | |
| "grad_norm": 1.2466572523117065, | |
| "learning_rate": 1.9276154096710194e-05, | |
| "loss": 0.3309, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.26649966021293325, | |
| "grad_norm": 2.0681653022766113, | |
| "learning_rate": 1.9261348494270233e-05, | |
| "loss": 0.3063, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.26649966021293325, | |
| "eval_dev_accuracy": 0.9621928878904159, | |
| "eval_dev_accuracy_threshold": 0.5716267228126526, | |
| "eval_dev_average_precision": 0.7784750233173614, | |
| "eval_dev_f1": 0.7154299699632884, | |
| "eval_dev_f1_threshold": 0.2591094672679901, | |
| "eval_dev_precision": 0.7265845667156253, | |
| "eval_dev_recall": 0.7046126876301084, | |
| "eval_loss": 0.45214059948921204, | |
| "eval_runtime": 560.4518, | |
| "eval_samples_per_second": 236.679, | |
| "eval_steps_per_second": 7.398, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.2678321585139979, | |
| "grad_norm": 18.87665557861328, | |
| "learning_rate": 1.924654289183027e-05, | |
| "loss": 0.3482, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.2691646568150626, | |
| "grad_norm": 1.1184475421905518, | |
| "learning_rate": 1.9231737289390306e-05, | |
| "loss": 0.3033, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.2704971551161272, | |
| "grad_norm": 13.190022468566895, | |
| "learning_rate": 1.9216931686950344e-05, | |
| "loss": 0.288, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.2718296534171919, | |
| "grad_norm": 5.855016231536865, | |
| "learning_rate": 1.920212608451038e-05, | |
| "loss": 0.3609, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.2731621517182566, | |
| "grad_norm": 0.26388707756996155, | |
| "learning_rate": 1.9187320482070418e-05, | |
| "loss": 0.3071, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.2731621517182566, | |
| "eval_dev_accuracy": 0.9630975446108845, | |
| "eval_dev_accuracy_threshold": 0.9040592908859253, | |
| "eval_dev_average_precision": 0.7844185876274975, | |
| "eval_dev_f1": 0.7150979850952249, | |
| "eval_dev_f1_threshold": 0.6517728567123413, | |
| "eval_dev_precision": 0.7206275033377837, | |
| "eval_dev_recall": 0.7096526788649063, | |
| "eval_loss": 0.3398449718952179, | |
| "eval_runtime": 559.7754, | |
| "eval_samples_per_second": 236.965, | |
| "eval_steps_per_second": 7.407, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.2744946500193212, | |
| "grad_norm": 4.928101539611816, | |
| "learning_rate": 1.9172514879630453e-05, | |
| "loss": 0.3778, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.2758271483203859, | |
| "grad_norm": 32.13788604736328, | |
| "learning_rate": 1.915770927719049e-05, | |
| "loss": 0.2681, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.27715964662145054, | |
| "grad_norm": 4.934467792510986, | |
| "learning_rate": 1.914290367475053e-05, | |
| "loss": 0.3358, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.27849214492251523, | |
| "grad_norm": 20.491180419921875, | |
| "learning_rate": 1.9128098072310564e-05, | |
| "loss": 0.2964, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.2798246432235799, | |
| "grad_norm": 1.0770193338394165, | |
| "learning_rate": 1.9113292469870603e-05, | |
| "loss": 0.2193, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.2798246432235799, | |
| "eval_dev_accuracy": 0.9630221565508454, | |
| "eval_dev_accuracy_threshold": 0.9147968292236328, | |
| "eval_dev_average_precision": 0.7911413496458403, | |
| "eval_dev_f1": 0.7163220463124683, | |
| "eval_dev_f1_threshold": 0.7818174362182617, | |
| "eval_dev_precision": 0.7372999304105776, | |
| "eval_dev_recall": 0.6965048756436946, | |
| "eval_loss": 0.3126268982887268, | |
| "eval_runtime": 558.7817, | |
| "eval_samples_per_second": 237.386, | |
| "eval_steps_per_second": 7.42, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.28115714152464455, | |
| "grad_norm": 24.751399993896484, | |
| "learning_rate": 1.9098486867430638e-05, | |
| "loss": 0.3136, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.28248963982570924, | |
| "grad_norm": 38.034759521484375, | |
| "learning_rate": 1.9083681264990676e-05, | |
| "loss": 0.3121, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.28382213812677387, | |
| "grad_norm": 22.520530700683594, | |
| "learning_rate": 1.906887566255071e-05, | |
| "loss": 0.2893, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.28515463642783856, | |
| "grad_norm": 13.158409118652344, | |
| "learning_rate": 1.905407006011075e-05, | |
| "loss": 0.2987, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.28648713472890325, | |
| "grad_norm": 2.2500672340393066, | |
| "learning_rate": 1.9039264457670784e-05, | |
| "loss": 0.2781, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.28648713472890325, | |
| "eval_dev_accuracy": 0.96250951774258, | |
| "eval_dev_accuracy_threshold": 0.9360392093658447, | |
| "eval_dev_average_precision": 0.7809073848360293, | |
| "eval_dev_f1": 0.724827056110684, | |
| "eval_dev_f1_threshold": 0.9214021563529968, | |
| "eval_dev_precision": 0.7264223616154947, | |
| "eval_dev_recall": 0.7232387421934918, | |
| "eval_loss": 0.32232773303985596, | |
| "eval_runtime": 558.841, | |
| "eval_samples_per_second": 237.361, | |
| "eval_steps_per_second": 7.419, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.2878196330299679, | |
| "grad_norm": 7.364509582519531, | |
| "learning_rate": 1.902445885523082e-05, | |
| "loss": 0.2444, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.28915213133103257, | |
| "grad_norm": 14.986044883728027, | |
| "learning_rate": 1.9009653252790857e-05, | |
| "loss": 0.2917, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.2904846296320972, | |
| "grad_norm": 1.4703857898712158, | |
| "learning_rate": 1.8994847650350892e-05, | |
| "loss": 0.32, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.2918171279331619, | |
| "grad_norm": 4.144439220428467, | |
| "learning_rate": 1.898004204791093e-05, | |
| "loss": 0.2873, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.2931496262342266, | |
| "grad_norm": 3.1540684700012207, | |
| "learning_rate": 1.8965236445470966e-05, | |
| "loss": 0.2877, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.2931496262342266, | |
| "eval_dev_accuracy": 0.9638212699872594, | |
| "eval_dev_accuracy_threshold": 0.8727903366088867, | |
| "eval_dev_average_precision": 0.7927687696111004, | |
| "eval_dev_f1": 0.7246392958609548, | |
| "eval_dev_f1_threshold": 0.7912191152572632, | |
| "eval_dev_precision": 0.7370806890299184, | |
| "eval_dev_recall": 0.712610934589679, | |
| "eval_loss": 0.30881205201148987, | |
| "eval_runtime": 559.5919, | |
| "eval_samples_per_second": 237.042, | |
| "eval_steps_per_second": 7.409, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.2944821245352912, | |
| "grad_norm": 6.18324613571167, | |
| "learning_rate": 1.8950430843031004e-05, | |
| "loss": 0.2947, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.2958146228363559, | |
| "grad_norm": 12.850146293640137, | |
| "learning_rate": 1.893562524059104e-05, | |
| "loss": 0.2619, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.29714712113742053, | |
| "grad_norm": 5.986371040344238, | |
| "learning_rate": 1.8920819638151077e-05, | |
| "loss": 0.3143, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.2984796194384852, | |
| "grad_norm": 6.889712810516357, | |
| "learning_rate": 1.8906014035711116e-05, | |
| "loss": 0.3585, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.29981211773954985, | |
| "grad_norm": 14.721301078796387, | |
| "learning_rate": 1.889120843327115e-05, | |
| "loss": 0.28, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.29981211773954985, | |
| "eval_dev_accuracy": 0.9627809147587205, | |
| "eval_dev_accuracy_threshold": 0.949242889881134, | |
| "eval_dev_average_precision": 0.7827527934861719, | |
| "eval_dev_f1": 0.7189280438911163, | |
| "eval_dev_f1_threshold": 0.41740649938583374, | |
| "eval_dev_precision": 0.6932546545935497, | |
| "eval_dev_recall": 0.7465760929111428, | |
| "eval_loss": 0.3353007137775421, | |
| "eval_runtime": 558.6719, | |
| "eval_samples_per_second": 237.433, | |
| "eval_steps_per_second": 7.421, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.30114461604061454, | |
| "grad_norm": 1.0338587760925293, | |
| "learning_rate": 1.887640283083119e-05, | |
| "loss": 0.284, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.30247711434167923, | |
| "grad_norm": 0.5249596834182739, | |
| "learning_rate": 1.8861597228391224e-05, | |
| "loss": 0.2821, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.30380961264274386, | |
| "grad_norm": 2.10871958732605, | |
| "learning_rate": 1.8846791625951262e-05, | |
| "loss": 0.2762, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.30514211094380855, | |
| "grad_norm": 8.820456504821777, | |
| "learning_rate": 1.8831986023511297e-05, | |
| "loss": 0.3152, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.3064746092448732, | |
| "grad_norm": 0.5152029395103455, | |
| "learning_rate": 1.8817180421071335e-05, | |
| "loss": 0.2879, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.3064746092448732, | |
| "eval_dev_accuracy": 0.9637911147632438, | |
| "eval_dev_accuracy_threshold": 0.8721863627433777, | |
| "eval_dev_average_precision": 0.7944006641896747, | |
| "eval_dev_f1": 0.7282656663724625, | |
| "eval_dev_f1_threshold": 0.7819468975067139, | |
| "eval_dev_precision": 0.7333629596711476, | |
| "eval_dev_recall": 0.7232387421934918, | |
| "eval_loss": 0.27257823944091797, | |
| "eval_runtime": 559.9281, | |
| "eval_samples_per_second": 236.9, | |
| "eval_steps_per_second": 7.405, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.3078071075459379, | |
| "grad_norm": 7.670559406280518, | |
| "learning_rate": 1.8802374818631374e-05, | |
| "loss": 0.2738, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.30913960584700256, | |
| "grad_norm": 1.2862569093704224, | |
| "learning_rate": 1.878756921619141e-05, | |
| "loss": 0.2624, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.3104721041480672, | |
| "grad_norm": 6.1086249351501465, | |
| "learning_rate": 1.8772763613751447e-05, | |
| "loss": 0.2698, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.3118046024491319, | |
| "grad_norm": 2.7864394187927246, | |
| "learning_rate": 1.8757958011311482e-05, | |
| "loss": 0.278, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.3131371007501965, | |
| "grad_norm": 0.4662020206451416, | |
| "learning_rate": 1.874315240887152e-05, | |
| "loss": 0.3024, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.3131371007501965, | |
| "eval_dev_accuracy": 0.9640398953613727, | |
| "eval_dev_accuracy_threshold": 0.9022700786590576, | |
| "eval_dev_average_precision": 0.789665459307555, | |
| "eval_dev_f1": 0.7233386555084511, | |
| "eval_dev_f1_threshold": 0.49045658111572266, | |
| "eval_dev_precision": 0.7269809650287737, | |
| "eval_dev_recall": 0.719732661334502, | |
| "eval_loss": 0.3414628207683563, | |
| "eval_runtime": 561.5298, | |
| "eval_samples_per_second": 236.224, | |
| "eval_steps_per_second": 7.383, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.3144695990512612, | |
| "grad_norm": 16.328683853149414, | |
| "learning_rate": 1.8728346806431555e-05, | |
| "loss": 0.3255, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.3158020973523259, | |
| "grad_norm": 6.683753490447998, | |
| "learning_rate": 1.8713541203991594e-05, | |
| "loss": 0.3298, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.3171345956533905, | |
| "grad_norm": 14.66252613067627, | |
| "learning_rate": 1.869873560155163e-05, | |
| "loss": 0.2902, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.3184670939544552, | |
| "grad_norm": 1.7640432119369507, | |
| "learning_rate": 1.8683929999111664e-05, | |
| "loss": 0.283, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.31979959225551985, | |
| "grad_norm": 20.055587768554688, | |
| "learning_rate": 1.8669124396671702e-05, | |
| "loss": 0.3098, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.31979959225551985, | |
| "eval_dev_accuracy": 0.9638288087932633, | |
| "eval_dev_accuracy_threshold": 0.9375428557395935, | |
| "eval_dev_average_precision": 0.7947515841312096, | |
| "eval_dev_f1": 0.731536653364675, | |
| "eval_dev_f1_threshold": 0.8831270337104797, | |
| "eval_dev_precision": 0.7299803622081605, | |
| "eval_dev_recall": 0.7330995946094007, | |
| "eval_loss": 0.2945517897605896, | |
| "eval_runtime": 562.1643, | |
| "eval_samples_per_second": 235.958, | |
| "eval_steps_per_second": 7.375, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.32113209055658454, | |
| "grad_norm": 40.83311080932617, | |
| "learning_rate": 1.8654318794231737e-05, | |
| "loss": 0.2592, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.3224645888576492, | |
| "grad_norm": 5.973490238189697, | |
| "learning_rate": 1.8639513191791775e-05, | |
| "loss": 0.27, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.32379708715871386, | |
| "grad_norm": 8.698867797851562, | |
| "learning_rate": 1.862470758935181e-05, | |
| "loss": 0.2738, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.32512958545977855, | |
| "grad_norm": 8.795327186584473, | |
| "learning_rate": 1.860990198691185e-05, | |
| "loss": 0.2528, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.3264620837608432, | |
| "grad_norm": 0.2583109438419342, | |
| "learning_rate": 1.8595096384471883e-05, | |
| "loss": 0.2694, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.3264620837608432, | |
| "eval_dev_accuracy": 0.9628261475947439, | |
| "eval_dev_accuracy_threshold": 0.9562203884124756, | |
| "eval_dev_average_precision": 0.7884777296034856, | |
| "eval_dev_f1": 0.7260596117035821, | |
| "eval_dev_f1_threshold": 0.9503564834594727, | |
| "eval_dev_precision": 0.7248307490718497, | |
| "eval_dev_recall": 0.7272926481866988, | |
| "eval_loss": 0.3025730550289154, | |
| "eval_runtime": 561.6413, | |
| "eval_samples_per_second": 236.177, | |
| "eval_steps_per_second": 7.382, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.32779458206190787, | |
| "grad_norm": 2.1876091957092285, | |
| "learning_rate": 1.8580290782031922e-05, | |
| "loss": 0.2288, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.32912708036297256, | |
| "grad_norm": 7.1153459548950195, | |
| "learning_rate": 1.856548517959196e-05, | |
| "loss": 0.2966, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.3304595786640372, | |
| "grad_norm": 0.5204883217811584, | |
| "learning_rate": 1.8550679577151995e-05, | |
| "loss": 0.3103, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.3317920769651019, | |
| "grad_norm": 0.5321233868598938, | |
| "learning_rate": 1.8535873974712033e-05, | |
| "loss": 0.2403, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.3331245752661665, | |
| "grad_norm": 0.5437518358230591, | |
| "learning_rate": 1.8521068372272068e-05, | |
| "loss": 0.2986, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.3331245752661665, | |
| "eval_dev_accuracy": 0.964250981929482, | |
| "eval_dev_accuracy_threshold": 0.8822938203811646, | |
| "eval_dev_average_precision": 0.7923663060740336, | |
| "eval_dev_f1": 0.7238444852327716, | |
| "eval_dev_f1_threshold": 0.3350263833999634, | |
| "eval_dev_precision": 0.7350881156800723, | |
| "eval_dev_recall": 0.7129396296702093, | |
| "eval_loss": 0.39168474078178406, | |
| "eval_runtime": 559.9637, | |
| "eval_samples_per_second": 236.885, | |
| "eval_steps_per_second": 7.404, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.3344570735672312, | |
| "grad_norm": 10.434455871582031, | |
| "learning_rate": 1.8506262769832107e-05, | |
| "loss": 0.2954, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.3357895718682959, | |
| "grad_norm": 29.660995483398438, | |
| "learning_rate": 1.849145716739214e-05, | |
| "loss": 0.2778, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.3371220701693605, | |
| "grad_norm": 17.967578887939453, | |
| "learning_rate": 1.847665156495218e-05, | |
| "loss": 0.2522, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.3384545684704252, | |
| "grad_norm": 16.963655471801758, | |
| "learning_rate": 1.8461845962512218e-05, | |
| "loss": 0.3071, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.33978706677148984, | |
| "grad_norm": 3.178967237472534, | |
| "learning_rate": 1.8447040360072253e-05, | |
| "loss": 0.3088, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.33978706677148984, | |
| "eval_dev_accuracy": 0.9653064147700288, | |
| "eval_dev_accuracy_threshold": 0.9469561576843262, | |
| "eval_dev_average_precision": 0.8090508028224602, | |
| "eval_dev_f1": 0.7406513872135102, | |
| "eval_dev_f1_threshold": 0.9149296879768372, | |
| "eval_dev_precision": 0.7413017231917463, | |
| "eval_dev_recall": 0.7400021913005369, | |
| "eval_loss": 0.28880587220191956, | |
| "eval_runtime": 559.426, | |
| "eval_samples_per_second": 237.113, | |
| "eval_steps_per_second": 7.411, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.34111956507255453, | |
| "grad_norm": 31.83365821838379, | |
| "learning_rate": 1.843223475763229e-05, | |
| "loss": 0.3328, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.3424520633736192, | |
| "grad_norm": 73.58321380615234, | |
| "learning_rate": 1.8417429155192326e-05, | |
| "loss": 0.249, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.34378456167468385, | |
| "grad_norm": 31.073486328125, | |
| "learning_rate": 1.8402623552752365e-05, | |
| "loss": 0.248, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.34511705997574854, | |
| "grad_norm": 2.6796510219573975, | |
| "learning_rate": 1.83878179503124e-05, | |
| "loss": 0.2735, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.3464495582768132, | |
| "grad_norm": 19.556621551513672, | |
| "learning_rate": 1.8373012347872438e-05, | |
| "loss": 0.3087, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.3464495582768132, | |
| "eval_dev_accuracy": 0.9655853505921732, | |
| "eval_dev_accuracy_threshold": 0.9267855882644653, | |
| "eval_dev_average_precision": 0.8095342358911112, | |
| "eval_dev_f1": 0.7389250472391351, | |
| "eval_dev_f1_threshold": 0.6512651443481445, | |
| "eval_dev_precision": 0.7092191435768262, | |
| "eval_dev_recall": 0.7712282239509148, | |
| "eval_loss": 0.26777184009552, | |
| "eval_runtime": 562.5407, | |
| "eval_samples_per_second": 235.8, | |
| "eval_steps_per_second": 7.37, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.34778205657787786, | |
| "grad_norm": 10.894082069396973, | |
| "learning_rate": 1.8358206745432473e-05, | |
| "loss": 0.2852, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.34911455487894255, | |
| "grad_norm": 43.44607162475586, | |
| "learning_rate": 1.8343401142992508e-05, | |
| "loss": 0.255, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.3504470531800072, | |
| "grad_norm": 0.060168083757162094, | |
| "learning_rate": 1.8328595540552546e-05, | |
| "loss": 0.27, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.3517795514810719, | |
| "grad_norm": 0.13352444767951965, | |
| "learning_rate": 1.831378993811258e-05, | |
| "loss": 0.3315, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.3531120497821365, | |
| "grad_norm": 2.8769795894622803, | |
| "learning_rate": 1.829898433567262e-05, | |
| "loss": 0.2548, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.3531120497821365, | |
| "eval_dev_accuracy": 0.9633161699849978, | |
| "eval_dev_accuracy_threshold": 0.9598461389541626, | |
| "eval_dev_average_precision": 0.7967367390804647, | |
| "eval_dev_f1": 0.7211769095463995, | |
| "eval_dev_f1_threshold": 0.9407143592834473, | |
| "eval_dev_precision": 0.7022005397550343, | |
| "eval_dev_recall": 0.7412074065958146, | |
| "eval_loss": 0.26967185735702515, | |
| "eval_runtime": 561.1397, | |
| "eval_samples_per_second": 236.389, | |
| "eval_steps_per_second": 7.389, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.3544445480832012, | |
| "grad_norm": 6.555627822875977, | |
| "learning_rate": 1.8284178733232655e-05, | |
| "loss": 0.2967, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.3557770463842659, | |
| "grad_norm": 18.727455139160156, | |
| "learning_rate": 1.8269373130792693e-05, | |
| "loss": 0.2907, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.3571095446853305, | |
| "grad_norm": 16.004812240600586, | |
| "learning_rate": 1.825456752835273e-05, | |
| "loss": 0.2871, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.3584420429863952, | |
| "grad_norm": 0.3446504771709442, | |
| "learning_rate": 1.8239761925912766e-05, | |
| "loss": 0.287, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.35977454128745984, | |
| "grad_norm": 1.3801554441452026, | |
| "learning_rate": 1.8224956323472805e-05, | |
| "loss": 0.2461, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.35977454128745984, | |
| "eval_dev_accuracy": 0.9659321356683529, | |
| "eval_dev_accuracy_threshold": 0.9563218355178833, | |
| "eval_dev_average_precision": 0.8166384763438364, | |
| "eval_dev_f1": 0.7424130273871207, | |
| "eval_dev_f1_threshold": 0.5458764433860779, | |
| "eval_dev_precision": 0.7173801982221314, | |
| "eval_dev_recall": 0.7692560534677331, | |
| "eval_loss": 0.3241870701313019, | |
| "eval_runtime": 560.7697, | |
| "eval_samples_per_second": 236.545, | |
| "eval_steps_per_second": 7.393, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.3611070395885245, | |
| "grad_norm": 11.259644508361816, | |
| "learning_rate": 1.821015072103284e-05, | |
| "loss": 0.3134, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.3624395378895892, | |
| "grad_norm": 15.958681106567383, | |
| "learning_rate": 1.8195345118592878e-05, | |
| "loss": 0.229, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.36377203619065385, | |
| "grad_norm": 3.471926689147949, | |
| "learning_rate": 1.8180539516152913e-05, | |
| "loss": 0.2318, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.36510453449171854, | |
| "grad_norm": 57.36378479003906, | |
| "learning_rate": 1.816573391371295e-05, | |
| "loss": 0.2584, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.36643703279278317, | |
| "grad_norm": 15.649163246154785, | |
| "learning_rate": 1.8150928311272986e-05, | |
| "loss": 0.3092, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.36643703279278317, | |
| "eval_dev_accuracy": 0.9644696073035952, | |
| "eval_dev_accuracy_threshold": 0.9345089793205261, | |
| "eval_dev_average_precision": 0.8036883896122946, | |
| "eval_dev_f1": 0.7414679756895747, | |
| "eval_dev_f1_threshold": 0.8182344436645508, | |
| "eval_dev_precision": 0.7049585144211774, | |
| "eval_dev_recall": 0.7819655965815712, | |
| "eval_loss": 0.2574635446071625, | |
| "eval_runtime": 562.3168, | |
| "eval_samples_per_second": 235.894, | |
| "eval_steps_per_second": 7.373, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.36776953109384786, | |
| "grad_norm": 31.03179931640625, | |
| "learning_rate": 1.8136122708833024e-05, | |
| "loss": 0.2781, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.36910202939491255, | |
| "grad_norm": 32.65872573852539, | |
| "learning_rate": 1.8121317106393063e-05, | |
| "loss": 0.2411, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.3704345276959772, | |
| "grad_norm": 10.414048194885254, | |
| "learning_rate": 1.8106511503953098e-05, | |
| "loss": 0.2768, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.37176702599704187, | |
| "grad_norm": 0.27181100845336914, | |
| "learning_rate": 1.8091705901513136e-05, | |
| "loss": 0.256, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.3730995242981065, | |
| "grad_norm": 15.69724178314209, | |
| "learning_rate": 1.807690029907317e-05, | |
| "loss": 0.3024, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.3730995242981065, | |
| "eval_dev_accuracy": 0.9660904505944349, | |
| "eval_dev_accuracy_threshold": 0.960444450378418, | |
| "eval_dev_average_precision": 0.8143885872198954, | |
| "eval_dev_f1": 0.7409103007718926, | |
| "eval_dev_f1_threshold": 0.8899838328361511, | |
| "eval_dev_precision": 0.7205425553944916, | |
| "eval_dev_recall": 0.7624630218034404, | |
| "eval_loss": 0.2652537524700165, | |
| "eval_runtime": 560.0512, | |
| "eval_samples_per_second": 236.848, | |
| "eval_steps_per_second": 7.403, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.3744320225991712, | |
| "grad_norm": 4.027531623840332, | |
| "learning_rate": 1.806209469663321e-05, | |
| "loss": 0.2676, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.3757645209002359, | |
| "grad_norm": 6.543447494506836, | |
| "learning_rate": 1.8047289094193244e-05, | |
| "loss": 0.2384, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.3770970192013005, | |
| "grad_norm": 35.99159622192383, | |
| "learning_rate": 1.8032483491753283e-05, | |
| "loss": 0.2586, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.3784295175023652, | |
| "grad_norm": 1.3943774700164795, | |
| "learning_rate": 1.8017677889313318e-05, | |
| "loss": 0.2663, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.37976201580342983, | |
| "grad_norm": 0.43371257185935974, | |
| "learning_rate": 1.8002872286873352e-05, | |
| "loss": 0.3077, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.37976201580342983, | |
| "eval_dev_accuracy": 0.9662638431325247, | |
| "eval_dev_accuracy_threshold": 0.9389976263046265, | |
| "eval_dev_average_precision": 0.8185963813825948, | |
| "eval_dev_f1": 0.7529551465428834, | |
| "eval_dev_f1_threshold": 0.8002798557281494, | |
| "eval_dev_precision": 0.7420212765957447, | |
| "eval_dev_recall": 0.7642160622329353, | |
| "eval_loss": 0.2862532138824463, | |
| "eval_runtime": 562.8872, | |
| "eval_samples_per_second": 235.655, | |
| "eval_steps_per_second": 7.366, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.3810945141044945, | |
| "grad_norm": 7.868191719055176, | |
| "learning_rate": 1.798806668443339e-05, | |
| "loss": 0.2609, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.3824270124055592, | |
| "grad_norm": 0.37841853499412537, | |
| "learning_rate": 1.7973261081993426e-05, | |
| "loss": 0.277, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.38375951070662384, | |
| "grad_norm": 1.237690806388855, | |
| "learning_rate": 1.7958455479553464e-05, | |
| "loss": 0.2635, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.38509200900768853, | |
| "grad_norm": 14.932636260986328, | |
| "learning_rate": 1.79436498771135e-05, | |
| "loss": 0.2518, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.38642450730875316, | |
| "grad_norm": 7.698137283325195, | |
| "learning_rate": 1.7928844274673537e-05, | |
| "loss": 0.2686, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.38642450730875316, | |
| "eval_dev_accuracy": 0.9663693864165793, | |
| "eval_dev_accuracy_threshold": 0.9125785231590271, | |
| "eval_dev_average_precision": 0.8194613717227588, | |
| "eval_dev_f1": 0.7500950931913275, | |
| "eval_dev_f1_threshold": 0.7369703054428101, | |
| "eval_dev_precision": 0.7440707201379905, | |
| "eval_dev_recall": 0.7562178152733647, | |
| "eval_loss": 0.25516369938850403, | |
| "eval_runtime": 561.2432, | |
| "eval_samples_per_second": 236.345, | |
| "eval_steps_per_second": 7.387, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.38775700560981785, | |
| "grad_norm": 11.858484268188477, | |
| "learning_rate": 1.7914038672233576e-05, | |
| "loss": 0.2419, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.38908950391088254, | |
| "grad_norm": 1.3223813772201538, | |
| "learning_rate": 1.789923306979361e-05, | |
| "loss": 0.268, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.3904220022119472, | |
| "grad_norm": 1.3486851453781128, | |
| "learning_rate": 1.788442746735365e-05, | |
| "loss": 0.2851, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.39175450051301186, | |
| "grad_norm": 4.85157585144043, | |
| "learning_rate": 1.7869621864913684e-05, | |
| "loss": 0.2212, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.3930869988140765, | |
| "grad_norm": 6.538160800933838, | |
| "learning_rate": 1.7854816262473722e-05, | |
| "loss": 0.2571, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.3930869988140765, | |
| "eval_dev_accuracy": 0.9645676117816461, | |
| "eval_dev_accuracy_threshold": 0.8994825482368469, | |
| "eval_dev_average_precision": 0.8082227405172548, | |
| "eval_dev_f1": 0.7435443565181175, | |
| "eval_dev_f1_threshold": 0.609738826751709, | |
| "eval_dev_precision": 0.7083622656482492, | |
| "eval_dev_recall": 0.7824038566889449, | |
| "eval_loss": 0.2665890157222748, | |
| "eval_runtime": 562.6368, | |
| "eval_samples_per_second": 235.76, | |
| "eval_steps_per_second": 7.369, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.3944194971151412, | |
| "grad_norm": 10.298799514770508, | |
| "learning_rate": 1.7840010660033757e-05, | |
| "loss": 0.2803, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.39575199541620587, | |
| "grad_norm": 46.07704162597656, | |
| "learning_rate": 1.7825205057593796e-05, | |
| "loss": 0.3034, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.3970844937172705, | |
| "grad_norm": 12.525829315185547, | |
| "learning_rate": 1.781039945515383e-05, | |
| "loss": 0.2332, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.3984169920183352, | |
| "grad_norm": 3.9645519256591797, | |
| "learning_rate": 1.779559385271387e-05, | |
| "loss": 0.2444, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.3997494903193998, | |
| "grad_norm": 18.388866424560547, | |
| "learning_rate": 1.7780788250273907e-05, | |
| "loss": 0.247, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.3997494903193998, | |
| "eval_dev_accuracy": 0.9654571908901068, | |
| "eval_dev_accuracy_threshold": 0.9365599155426025, | |
| "eval_dev_average_precision": 0.8171252302464322, | |
| "eval_dev_f1": 0.747335818153184, | |
| "eval_dev_f1_threshold": 0.8443748354911804, | |
| "eval_dev_precision": 0.7173956863535578, | |
| "eval_dev_recall": 0.779883861071546, | |
| "eval_loss": 0.267426073551178, | |
| "eval_runtime": 564.1091, | |
| "eval_samples_per_second": 235.144, | |
| "eval_steps_per_second": 7.35, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.4010819886204645, | |
| "grad_norm": 23.66806411743164, | |
| "learning_rate": 1.7765982647833942e-05, | |
| "loss": 0.2861, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.4024144869215292, | |
| "grad_norm": 3.966848611831665, | |
| "learning_rate": 1.775117704539398e-05, | |
| "loss": 0.2409, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.40374698522259383, | |
| "grad_norm": 14.780499458312988, | |
| "learning_rate": 1.7736371442954015e-05, | |
| "loss": 0.2658, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.4050794835236585, | |
| "grad_norm": 30.90425682067871, | |
| "learning_rate": 1.7721565840514054e-05, | |
| "loss": 0.3114, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.40641198182472316, | |
| "grad_norm": 5.639667987823486, | |
| "learning_rate": 1.770676023807409e-05, | |
| "loss": 0.2685, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.40641198182472316, | |
| "eval_dev_accuracy": 0.9670704953749425, | |
| "eval_dev_accuracy_threshold": 0.9521620869636536, | |
| "eval_dev_average_precision": 0.8255021501170436, | |
| "eval_dev_f1": 0.7578924800343035, | |
| "eval_dev_f1_threshold": 0.8574447631835938, | |
| "eval_dev_precision": 0.7418677859391396, | |
| "eval_dev_recall": 0.7746247397830612, | |
| "eval_loss": 0.27643173933029175, | |
| "eval_runtime": 561.8887, | |
| "eval_samples_per_second": 236.073, | |
| "eval_steps_per_second": 7.379, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.40774448012578784, | |
| "grad_norm": 0.6215185523033142, | |
| "learning_rate": 1.7691954635634127e-05, | |
| "loss": 0.2354, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.40907697842685253, | |
| "grad_norm": 4.660243034362793, | |
| "learning_rate": 1.7677149033194162e-05, | |
| "loss": 0.2576, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.41040947672791717, | |
| "grad_norm": 0.37590527534484863, | |
| "learning_rate": 1.7662343430754197e-05, | |
| "loss": 0.2647, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.41174197502898185, | |
| "grad_norm": 0.8927075862884521, | |
| "learning_rate": 1.7647537828314235e-05, | |
| "loss": 0.2175, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.4130744733300465, | |
| "grad_norm": 3.024475336074829, | |
| "learning_rate": 1.763273222587427e-05, | |
| "loss": 0.3085, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.4130744733300465, | |
| "eval_dev_accuracy": 0.9660376789524076, | |
| "eval_dev_accuracy_threshold": 0.9548216462135315, | |
| "eval_dev_average_precision": 0.8156242337854964, | |
| "eval_dev_f1": 0.7478032096816627, | |
| "eval_dev_f1_threshold": 0.6426188945770264, | |
| "eval_dev_precision": 0.7193763919821826, | |
| "eval_dev_recall": 0.7785690807494248, | |
| "eval_loss": 0.26265889406204224, | |
| "eval_runtime": 565.3292, | |
| "eval_samples_per_second": 234.637, | |
| "eval_steps_per_second": 7.334, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.4144069716311112, | |
| "grad_norm": 0.6045613884925842, | |
| "learning_rate": 1.761792662343431e-05, | |
| "loss": 0.2637, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 0.4157394699321758, | |
| "grad_norm": 0.6080629229545593, | |
| "learning_rate": 1.7603121020994344e-05, | |
| "loss": 0.2567, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.4170719682332405, | |
| "grad_norm": 0.933800995349884, | |
| "learning_rate": 1.7588315418554382e-05, | |
| "loss": 0.2906, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 0.4184044665343052, | |
| "grad_norm": 3.305546522140503, | |
| "learning_rate": 1.757350981611442e-05, | |
| "loss": 0.2516, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.4197369648353698, | |
| "grad_norm": 9.856147766113281, | |
| "learning_rate": 1.7558704213674455e-05, | |
| "loss": 0.2342, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.4197369648353698, | |
| "eval_dev_accuracy": 0.9664523132826223, | |
| "eval_dev_accuracy_threshold": 0.6949450373649597, | |
| "eval_dev_average_precision": 0.8198951977617771, | |
| "eval_dev_f1": 0.752799668187474, | |
| "eval_dev_f1_threshold": 0.14068716764450073, | |
| "eval_dev_precision": 0.7144966046648952, | |
| "eval_dev_recall": 0.7954420948833133, | |
| "eval_loss": 0.3560490906238556, | |
| "eval_runtime": 566.4442, | |
| "eval_samples_per_second": 234.175, | |
| "eval_steps_per_second": 7.319, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.4210694631364345, | |
| "grad_norm": 6.468503952026367, | |
| "learning_rate": 1.7543898611234493e-05, | |
| "loss": 0.2595, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.42240196143749914, | |
| "grad_norm": 2.2248482704162598, | |
| "learning_rate": 1.752909300879453e-05, | |
| "loss": 0.259, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 0.42373445973856383, | |
| "grad_norm": 2.2780916690826416, | |
| "learning_rate": 1.7514287406354567e-05, | |
| "loss": 0.2563, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.4250669580396285, | |
| "grad_norm": 5.997177600860596, | |
| "learning_rate": 1.74994818039146e-05, | |
| "loss": 0.2504, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 0.42639945634069315, | |
| "grad_norm": 5.018893241882324, | |
| "learning_rate": 1.748467620147464e-05, | |
| "loss": 0.2751, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.42639945634069315, | |
| "eval_dev_accuracy": 0.9660979894004388, | |
| "eval_dev_accuracy_threshold": 0.9447215795516968, | |
| "eval_dev_average_precision": 0.818149670082586, | |
| "eval_dev_f1": 0.7564001884718078, | |
| "eval_dev_f1_threshold": 0.7197975516319275, | |
| "eval_dev_precision": 0.7242831361540004, | |
| "eval_dev_recall": 0.7914977539169498, | |
| "eval_loss": 0.23995983600616455, | |
| "eval_runtime": 559.4727, | |
| "eval_samples_per_second": 237.093, | |
| "eval_steps_per_second": 7.411, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.42773195464175784, | |
| "grad_norm": 9.826861381530762, | |
| "learning_rate": 1.7469870599034675e-05, | |
| "loss": 0.2521, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 0.42906445294282247, | |
| "grad_norm": 7.288123607635498, | |
| "learning_rate": 1.7455064996594713e-05, | |
| "loss": 0.2406, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.43039695124388716, | |
| "grad_norm": 11.257208824157715, | |
| "learning_rate": 1.744025939415475e-05, | |
| "loss": 0.3026, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 0.43172944954495185, | |
| "grad_norm": 0.21672357618808746, | |
| "learning_rate": 1.7425453791714787e-05, | |
| "loss": 0.234, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.4330619478460165, | |
| "grad_norm": 1.5854872465133667, | |
| "learning_rate": 1.7410648189274825e-05, | |
| "loss": 0.2639, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.4330619478460165, | |
| "eval_dev_accuracy": 0.9651707162619584, | |
| "eval_dev_accuracy_threshold": 0.8978205919265747, | |
| "eval_dev_average_precision": 0.8087336536278384, | |
| "eval_dev_f1": 0.740958788898234, | |
| "eval_dev_f1_threshold": 0.7787094712257385, | |
| "eval_dev_precision": 0.7121349904011317, | |
| "eval_dev_recall": 0.7722143091925058, | |
| "eval_loss": 0.2519395053386688, | |
| "eval_runtime": 557.1987, | |
| "eval_samples_per_second": 238.061, | |
| "eval_steps_per_second": 7.441, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.43439444614708117, | |
| "grad_norm": 5.898445129394531, | |
| "learning_rate": 1.739584258683486e-05, | |
| "loss": 0.2321, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.4357269444481458, | |
| "grad_norm": 0.27915239334106445, | |
| "learning_rate": 1.7381036984394898e-05, | |
| "loss": 0.1894, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 0.4370594427492105, | |
| "grad_norm": 0.3429672122001648, | |
| "learning_rate": 1.7366231381954933e-05, | |
| "loss": 0.3076, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.4383919410502752, | |
| "grad_norm": 0.6808755397796631, | |
| "learning_rate": 1.735142577951497e-05, | |
| "loss": 0.2392, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 0.4397244393513398, | |
| "grad_norm": 36.33818435668945, | |
| "learning_rate": 1.7336620177075006e-05, | |
| "loss": 0.2742, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.4397244393513398, | |
| "eval_dev_accuracy": 0.9674248192571261, | |
| "eval_dev_accuracy_threshold": 0.9071935415267944, | |
| "eval_dev_average_precision": 0.8132130323917695, | |
| "eval_dev_f1": 0.7505652677438923, | |
| "eval_dev_f1_threshold": 0.5399670600891113, | |
| "eval_dev_precision": 0.7556073728625361, | |
| "eval_dev_recall": 0.7455900076695519, | |
| "eval_loss": 0.30597466230392456, | |
| "eval_runtime": 519.866, | |
| "eval_samples_per_second": 255.156, | |
| "eval_steps_per_second": 7.975, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.4410569376524045, | |
| "grad_norm": 6.550230503082275, | |
| "learning_rate": 1.732181457463504e-05, | |
| "loss": 0.2624, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 0.44238943595346913, | |
| "grad_norm": 15.728365898132324, | |
| "learning_rate": 1.730700897219508e-05, | |
| "loss": 0.2481, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.4437219342545338, | |
| "grad_norm": 1.1476960182189941, | |
| "learning_rate": 1.7292203369755115e-05, | |
| "loss": 0.2289, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 0.4450544325555985, | |
| "grad_norm": 89.61054992675781, | |
| "learning_rate": 1.7277397767315153e-05, | |
| "loss": 0.2854, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.44638693085666314, | |
| "grad_norm": 4.351845741271973, | |
| "learning_rate": 1.7262592164875188e-05, | |
| "loss": 0.2733, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.44638693085666314, | |
| "eval_dev_accuracy": 0.9650500953658959, | |
| "eval_dev_accuracy_threshold": 0.9060708284378052, | |
| "eval_dev_average_precision": 0.8133536713572236, | |
| "eval_dev_f1": 0.744153082919915, | |
| "eval_dev_f1_threshold": 0.8223495483398438, | |
| "eval_dev_precision": 0.7405598958333334, | |
| "eval_dev_recall": 0.7477813082064205, | |
| "eval_loss": 0.272208571434021, | |
| "eval_runtime": 521.749, | |
| "eval_samples_per_second": 254.235, | |
| "eval_steps_per_second": 7.946, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.44771942915772783, | |
| "grad_norm": 6.246555805206299, | |
| "learning_rate": 1.7247786562435226e-05, | |
| "loss": 0.2759, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.44905192745879247, | |
| "grad_norm": 52.076377868652344, | |
| "learning_rate": 1.7232980959995265e-05, | |
| "loss": 0.2588, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 0.45038442575985715, | |
| "grad_norm": 5.682718276977539, | |
| "learning_rate": 1.72181753575553e-05, | |
| "loss": 0.2106, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.45171692406092184, | |
| "grad_norm": 2.271516799926758, | |
| "learning_rate": 1.7203369755115338e-05, | |
| "loss": 0.2631, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 0.4530494223619865, | |
| "grad_norm": 1.0763822793960571, | |
| "learning_rate": 1.7188564152675373e-05, | |
| "loss": 0.304, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.4530494223619865, | |
| "eval_dev_accuracy": 0.9669197192548644, | |
| "eval_dev_accuracy_threshold": 0.8872429132461548, | |
| "eval_dev_average_precision": 0.8222864572131344, | |
| "eval_dev_f1": 0.7534934497816593, | |
| "eval_dev_f1_threshold": 0.4772883951663971, | |
| "eval_dev_precision": 0.750788643533123, | |
| "eval_dev_recall": 0.7562178152733647, | |
| "eval_loss": 0.2554573118686676, | |
| "eval_runtime": 520.4082, | |
| "eval_samples_per_second": 254.89, | |
| "eval_steps_per_second": 7.967, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.45438192066305116, | |
| "grad_norm": 0.5738760828971863, | |
| "learning_rate": 1.717375855023541e-05, | |
| "loss": 0.2513, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 0.4557144189641158, | |
| "grad_norm": 2.8462681770324707, | |
| "learning_rate": 1.7158952947795446e-05, | |
| "loss": 0.2507, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 0.4570469172651805, | |
| "grad_norm": 8.60177993774414, | |
| "learning_rate": 1.7144147345355484e-05, | |
| "loss": 0.2417, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 0.4583794155662452, | |
| "grad_norm": 1.3673675060272217, | |
| "learning_rate": 1.712934174291552e-05, | |
| "loss": 0.239, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.4597119138673098, | |
| "grad_norm": 36.5560188293457, | |
| "learning_rate": 1.7114536140475558e-05, | |
| "loss": 0.2527, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.4597119138673098, | |
| "eval_dev_accuracy": 0.9667614043287824, | |
| "eval_dev_accuracy_threshold": 0.9581319093704224, | |
| "eval_dev_average_precision": 0.818866417573704, | |
| "eval_dev_f1": 0.7523900039134568, | |
| "eval_dev_f1_threshold": 0.9470370411872864, | |
| "eval_dev_precision": 0.7681506849315068, | |
| "eval_dev_recall": 0.7372630656294511, | |
| "eval_loss": 0.2984105348587036, | |
| "eval_runtime": 519.7969, | |
| "eval_samples_per_second": 255.19, | |
| "eval_steps_per_second": 7.976, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.4610444121683745, | |
| "grad_norm": 17.973974227905273, | |
| "learning_rate": 1.7099730538035596e-05, | |
| "loss": 0.2807, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 0.46237691046943913, | |
| "grad_norm": 9.143497467041016, | |
| "learning_rate": 1.708492493559563e-05, | |
| "loss": 0.2304, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 0.4637094087705038, | |
| "grad_norm": 8.447179794311523, | |
| "learning_rate": 1.707011933315567e-05, | |
| "loss": 0.2707, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.4650419070715685, | |
| "grad_norm": 0.18045054376125336, | |
| "learning_rate": 1.7055313730715704e-05, | |
| "loss": 0.2202, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 0.46637440537263314, | |
| "grad_norm": 18.00141716003418, | |
| "learning_rate": 1.7040508128275743e-05, | |
| "loss": 0.2802, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.46637440537263314, | |
| "eval_dev_accuracy": 0.9667387879107707, | |
| "eval_dev_accuracy_threshold": 0.922869086265564, | |
| "eval_dev_average_precision": 0.8248757172419965, | |
| "eval_dev_f1": 0.7573180276545787, | |
| "eval_dev_f1_threshold": 0.618488073348999, | |
| "eval_dev_precision": 0.7229527794381351, | |
| "eval_dev_recall": 0.795113399802783, | |
| "eval_loss": 0.2512986958026886, | |
| "eval_runtime": 520.5462, | |
| "eval_samples_per_second": 254.823, | |
| "eval_steps_per_second": 7.965, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.4677069036736978, | |
| "grad_norm": 0.6688315868377686, | |
| "learning_rate": 1.7025702525835778e-05, | |
| "loss": 0.2375, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 0.46903940197476246, | |
| "grad_norm": 17.023473739624023, | |
| "learning_rate": 1.7010896923395816e-05, | |
| "loss": 0.2058, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.47037190027582715, | |
| "grad_norm": 0.3867310881614685, | |
| "learning_rate": 1.699609132095585e-05, | |
| "loss": 0.2419, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 0.47170439857689184, | |
| "grad_norm": 13.710586547851562, | |
| "learning_rate": 1.6981285718515886e-05, | |
| "loss": 0.2232, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 0.47303689687795647, | |
| "grad_norm": 14.513033866882324, | |
| "learning_rate": 1.6966480116075924e-05, | |
| "loss": 0.316, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.47303689687795647, | |
| "eval_dev_accuracy": 0.9672815819430518, | |
| "eval_dev_accuracy_threshold": 0.9403676986694336, | |
| "eval_dev_average_precision": 0.821150327893476, | |
| "eval_dev_f1": 0.7596174282678001, | |
| "eval_dev_f1_threshold": 0.7547413110733032, | |
| "eval_dev_precision": 0.7374393892499742, | |
| "eval_dev_recall": 0.7831708118768489, | |
| "eval_loss": 0.2614619731903076, | |
| "eval_runtime": 520.6856, | |
| "eval_samples_per_second": 254.755, | |
| "eval_steps_per_second": 7.963, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.47436939517902116, | |
| "grad_norm": 2.2954721450805664, | |
| "learning_rate": 1.695167451363596e-05, | |
| "loss": 0.2527, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.4757018934800858, | |
| "grad_norm": 2.294912338256836, | |
| "learning_rate": 1.6936868911195997e-05, | |
| "loss": 0.2732, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 0.4770343917811505, | |
| "grad_norm": 100.57258605957031, | |
| "learning_rate": 1.6922063308756032e-05, | |
| "loss": 0.2806, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 0.47836689008221517, | |
| "grad_norm": 13.040018081665039, | |
| "learning_rate": 1.690725770631607e-05, | |
| "loss": 0.25, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 0.4796993883832798, | |
| "grad_norm": 0.7189066410064697, | |
| "learning_rate": 1.689245210387611e-05, | |
| "loss": 0.2173, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.4796993883832798, | |
| "eval_dev_accuracy": 0.9675982117952159, | |
| "eval_dev_accuracy_threshold": 0.9232138395309448, | |
| "eval_dev_average_precision": 0.8271432363427305, | |
| "eval_dev_f1": 0.7561493449329397, | |
| "eval_dev_f1_threshold": 0.49452510476112366, | |
| "eval_dev_precision": 0.7169088766692852, | |
| "eval_dev_recall": 0.799934260983894, | |
| "eval_loss": 0.2864265441894531, | |
| "eval_runtime": 519.7616, | |
| "eval_samples_per_second": 255.207, | |
| "eval_steps_per_second": 7.977, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.4810318866843445, | |
| "grad_norm": 0.35153084993362427, | |
| "learning_rate": 1.6877646501436144e-05, | |
| "loss": 0.2576, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 0.4823643849854091, | |
| "grad_norm": 0.3834153413772583, | |
| "learning_rate": 1.6862840898996182e-05, | |
| "loss": 0.2087, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 0.4836968832864738, | |
| "grad_norm": 0.9096924066543579, | |
| "learning_rate": 1.6848035296556217e-05, | |
| "loss": 0.2581, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 0.4850293815875385, | |
| "grad_norm": 17.327335357666016, | |
| "learning_rate": 1.6833229694116256e-05, | |
| "loss": 0.265, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 0.48636187988860313, | |
| "grad_norm": 3.3336431980133057, | |
| "learning_rate": 1.681842409167629e-05, | |
| "loss": 0.2404, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.48636187988860313, | |
| "eval_dev_accuracy": 0.9678545311993486, | |
| "eval_dev_accuracy_threshold": 0.6843677163124084, | |
| "eval_dev_average_precision": 0.8368359833153991, | |
| "eval_dev_f1": 0.7613580982292738, | |
| "eval_dev_f1_threshold": 0.3513629138469696, | |
| "eval_dev_precision": 0.7526766595289079, | |
| "eval_dev_recall": 0.770242138709324, | |
| "eval_loss": 0.3165341913700104, | |
| "eval_runtime": 520.0004, | |
| "eval_samples_per_second": 255.09, | |
| "eval_steps_per_second": 7.973, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.4876943781896678, | |
| "grad_norm": 22.69322395324707, | |
| "learning_rate": 1.680361848923633e-05, | |
| "loss": 0.2525, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 0.48902687649073245, | |
| "grad_norm": 4.788589954376221, | |
| "learning_rate": 1.6788812886796367e-05, | |
| "loss": 0.2262, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 0.49035937479179714, | |
| "grad_norm": 48.63047409057617, | |
| "learning_rate": 1.6774007284356402e-05, | |
| "loss": 0.2572, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 0.49169187309286183, | |
| "grad_norm": 8.924850463867188, | |
| "learning_rate": 1.675920168191644e-05, | |
| "loss": 0.2608, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 0.49302437139392646, | |
| "grad_norm": 0.28982293605804443, | |
| "learning_rate": 1.6744396079476476e-05, | |
| "loss": 0.2212, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.49302437139392646, | |
| "eval_dev_accuracy": 0.9689250416519032, | |
| "eval_dev_accuracy_threshold": 0.9271968603134155, | |
| "eval_dev_average_precision": 0.8366709399417354, | |
| "eval_dev_f1": 0.7682220970137786, | |
| "eval_dev_f1_threshold": 0.2581200897693634, | |
| "eval_dev_precision": 0.7213351288957291, | |
| "eval_dev_recall": 0.8216281362988934, | |
| "eval_loss": 0.29943621158599854, | |
| "eval_runtime": 519.5326, | |
| "eval_samples_per_second": 255.32, | |
| "eval_steps_per_second": 7.98, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.49435686969499115, | |
| "grad_norm": 8.631064414978027, | |
| "learning_rate": 1.6729590477036514e-05, | |
| "loss": 0.2236, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 0.4956893679960558, | |
| "grad_norm": 0.3893554210662842, | |
| "learning_rate": 1.671478487459655e-05, | |
| "loss": 0.2542, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 0.4970218662971205, | |
| "grad_norm": 11.258530616760254, | |
| "learning_rate": 1.6699979272156587e-05, | |
| "loss": 0.2775, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 0.49835436459818516, | |
| "grad_norm": 23.54794692993164, | |
| "learning_rate": 1.6685173669716622e-05, | |
| "loss": 0.2437, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 0.4996868628992498, | |
| "grad_norm": 15.748093605041504, | |
| "learning_rate": 1.667036806727666e-05, | |
| "loss": 0.3368, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.4996868628992498, | |
| "eval_dev_accuracy": 0.9686687222477705, | |
| "eval_dev_accuracy_threshold": 0.9627949595451355, | |
| "eval_dev_average_precision": 0.8345873786652108, | |
| "eval_dev_f1": 0.7644562041783806, | |
| "eval_dev_f1_threshold": 0.6956943869590759, | |
| "eval_dev_precision": 0.7692478366984691, | |
| "eval_dev_recall": 0.7597238961323546, | |
| "eval_loss": 0.2797723412513733, | |
| "eval_runtime": 520.4988, | |
| "eval_samples_per_second": 254.846, | |
| "eval_steps_per_second": 7.965, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.5010193612003144, | |
| "grad_norm": 2.5468738079071045, | |
| "learning_rate": 1.6655562464836695e-05, | |
| "loss": 0.2806, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 0.5023518595013792, | |
| "grad_norm": 2.1441900730133057, | |
| "learning_rate": 1.664075686239673e-05, | |
| "loss": 0.2576, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 0.5036843578024438, | |
| "grad_norm": 1.2568778991699219, | |
| "learning_rate": 1.662595125995677e-05, | |
| "loss": 0.2848, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 0.5050168561035084, | |
| "grad_norm": 3.095561981201172, | |
| "learning_rate": 1.6611145657516804e-05, | |
| "loss": 0.215, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 0.5063493544045732, | |
| "grad_norm": 1.0205029249191284, | |
| "learning_rate": 1.6596340055076842e-05, | |
| "loss": 0.2331, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.5063493544045732, | |
| "eval_dev_accuracy": 0.9679223804533837, | |
| "eval_dev_accuracy_threshold": 0.9669108390808105, | |
| "eval_dev_average_precision": 0.8269638058273905, | |
| "eval_dev_f1": 0.7641839204087119, | |
| "eval_dev_f1_threshold": 0.8454810380935669, | |
| "eval_dev_precision": 0.7504224757076469, | |
| "eval_dev_recall": 0.7784595157225813, | |
| "eval_loss": 0.27937838435173035, | |
| "eval_runtime": 519.5927, | |
| "eval_samples_per_second": 255.29, | |
| "eval_steps_per_second": 7.979, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.5076818527056378, | |
| "grad_norm": 31.32097053527832, | |
| "learning_rate": 1.6581534452636877e-05, | |
| "loss": 0.27, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 0.5090143510067024, | |
| "grad_norm": 0.6534382104873657, | |
| "learning_rate": 1.6566728850196915e-05, | |
| "loss": 0.2522, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 0.5103468493077671, | |
| "grad_norm": 23.841657638549805, | |
| "learning_rate": 1.6551923247756954e-05, | |
| "loss": 0.251, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 0.5116793476088318, | |
| "grad_norm": 11.927959442138672, | |
| "learning_rate": 1.653711764531699e-05, | |
| "loss": 0.2299, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.5130118459098965, | |
| "grad_norm": 1.765657663345337, | |
| "learning_rate": 1.6522312042877027e-05, | |
| "loss": 0.2543, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.5130118459098965, | |
| "eval_dev_accuracy": 0.9683596312016103, | |
| "eval_dev_accuracy_threshold": 0.9189764261245728, | |
| "eval_dev_average_precision": 0.8251932254713443, | |
| "eval_dev_f1": 0.7703276368781975, | |
| "eval_dev_f1_threshold": 0.8014627695083618, | |
| "eval_dev_precision": 0.765329295987888, | |
| "eval_dev_recall": 0.7753916949709653, | |
| "eval_loss": 0.2911910116672516, | |
| "eval_runtime": 519.1529, | |
| "eval_samples_per_second": 255.507, | |
| "eval_steps_per_second": 7.986, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.5143443442109611, | |
| "grad_norm": 6.26005220413208, | |
| "learning_rate": 1.6507506440437062e-05, | |
| "loss": 0.203, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 0.5156768425120258, | |
| "grad_norm": 3.370025157928467, | |
| "learning_rate": 1.64927008379971e-05, | |
| "loss": 0.2621, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 0.5170093408130905, | |
| "grad_norm": 29.85224151611328, | |
| "learning_rate": 1.6477895235557135e-05, | |
| "loss": 0.2677, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 0.5183418391141551, | |
| "grad_norm": 13.099495887756348, | |
| "learning_rate": 1.6463089633117173e-05, | |
| "loss": 0.2377, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 0.5196743374152198, | |
| "grad_norm": 17.140789031982422, | |
| "learning_rate": 1.6448284030677212e-05, | |
| "loss": 0.265, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.5196743374152198, | |
| "eval_dev_accuracy": 0.9675529789591925, | |
| "eval_dev_accuracy_threshold": 0.9323844909667969, | |
| "eval_dev_average_precision": 0.818988116595722, | |
| "eval_dev_f1": 0.7656208525773743, | |
| "eval_dev_f1_threshold": 0.8410446643829346, | |
| "eval_dev_precision": 0.7426364572605562, | |
| "eval_dev_recall": 0.7900734085679851, | |
| "eval_loss": 0.28379642963409424, | |
| "eval_runtime": 518.2405, | |
| "eval_samples_per_second": 255.956, | |
| "eval_steps_per_second": 8.0, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.5210068357162845, | |
| "grad_norm": 2.0083911418914795, | |
| "learning_rate": 1.319120586275816e-07, | |
| "loss": 0.2108, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 0.5223393340173491, | |
| "grad_norm": 0.4948272705078125, | |
| "learning_rate": 2.651565622918055e-07, | |
| "loss": 0.227, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 0.5236718323184137, | |
| "grad_norm": 11.525949478149414, | |
| "learning_rate": 3.984010659560293e-07, | |
| "loss": 0.2081, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 0.5250043306194785, | |
| "grad_norm": 18.18743133544922, | |
| "learning_rate": 5.316455696202532e-07, | |
| "loss": 0.2782, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 0.5263368289205431, | |
| "grad_norm": 30.067602157592773, | |
| "learning_rate": 6.64890073284477e-07, | |
| "loss": 0.2357, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.5263368289205431, | |
| "eval_dev_accuracy": 0.9679374580653916, | |
| "eval_dev_accuracy_threshold": 0.8992660045623779, | |
| "eval_dev_average_precision": 0.8239503903565419, | |
| "eval_dev_f1": 0.768843413510473, | |
| "eval_dev_f1_threshold": 0.8412591814994812, | |
| "eval_dev_precision": 0.7522012578616353, | |
| "eval_dev_recall": 0.7862386326284649, | |
| "eval_loss": 0.27902960777282715, | |
| "eval_runtime": 522.9572, | |
| "eval_samples_per_second": 253.648, | |
| "eval_steps_per_second": 7.928, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.5276693272216078, | |
| "grad_norm": 1.496453881263733, | |
| "learning_rate": 7.981345769487009e-07, | |
| "loss": 0.2654, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 0.5290018255226725, | |
| "grad_norm": 2.676929473876953, | |
| "learning_rate": 9.313790806129248e-07, | |
| "loss": 0.2572, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 0.5303343238237371, | |
| "grad_norm": 1.3355958461761475, | |
| "learning_rate": 1.0646235842771487e-06, | |
| "loss": 0.2452, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 0.5316668221248018, | |
| "grad_norm": 24.94687843322754, | |
| "learning_rate": 1.1978680879413725e-06, | |
| "loss": 0.2412, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 0.5329993204258665, | |
| "grad_norm": 16.272785186767578, | |
| "learning_rate": 1.3311125916055965e-06, | |
| "loss": 0.2656, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.5329993204258665, | |
| "eval_dev_accuracy": 0.9683219371715908, | |
| "eval_dev_accuracy_threshold": 0.8796899914741516, | |
| "eval_dev_average_precision": 0.8334902875069624, | |
| "eval_dev_f1": 0.7711174542763505, | |
| "eval_dev_f1_threshold": 0.6210243701934814, | |
| "eval_dev_precision": 0.7449698702890409, | |
| "eval_dev_recall": 0.7991673057959899, | |
| "eval_loss": 0.2717488408088684, | |
| "eval_runtime": 523.9326, | |
| "eval_samples_per_second": 253.176, | |
| "eval_steps_per_second": 7.913, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.5343318187269311, | |
| "grad_norm": 38.643516540527344, | |
| "learning_rate": 1.4643570952698202e-06, | |
| "loss": 0.2558, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 0.5356643170279958, | |
| "grad_norm": 0.41367307305336, | |
| "learning_rate": 1.597601598934044e-06, | |
| "loss": 0.2445, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 0.5369968153290604, | |
| "grad_norm": 0.5968548655509949, | |
| "learning_rate": 1.7308461025982678e-06, | |
| "loss": 0.225, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 0.5383293136301252, | |
| "grad_norm": 3.6407761573791504, | |
| "learning_rate": 1.864090606262492e-06, | |
| "loss": 0.1996, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 0.5396618119311898, | |
| "grad_norm": 4.504887580871582, | |
| "learning_rate": 1.9973351099267156e-06, | |
| "loss": 0.244, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.5396618119311898, | |
| "eval_dev_accuracy": 0.9687214938897978, | |
| "eval_dev_accuracy_threshold": 0.9278361797332764, | |
| "eval_dev_average_precision": 0.8391958373486473, | |
| "eval_dev_f1": 0.772467364332722, | |
| "eval_dev_f1_threshold": 0.8639750480651855, | |
| "eval_dev_precision": 0.7608118159600468, | |
| "eval_dev_recall": 0.7844855921989701, | |
| "eval_loss": 0.2598799467086792, | |
| "eval_runtime": 524.043, | |
| "eval_samples_per_second": 253.122, | |
| "eval_steps_per_second": 7.912, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.5409943102322544, | |
| "grad_norm": 102.69219970703125, | |
| "learning_rate": 2.1305796135909398e-06, | |
| "loss": 0.2261, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 0.5423268085333192, | |
| "grad_norm": 0.4366992115974426, | |
| "learning_rate": 2.2638241172551636e-06, | |
| "loss": 0.2146, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 0.5436593068343838, | |
| "grad_norm": 0.5195454955101013, | |
| "learning_rate": 2.3970686209193873e-06, | |
| "loss": 0.2287, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 0.5449918051354484, | |
| "grad_norm": 0.5551161170005798, | |
| "learning_rate": 2.530313124583611e-06, | |
| "loss": 0.2278, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 0.5463243034365132, | |
| "grad_norm": 0.49544551968574524, | |
| "learning_rate": 2.663557628247835e-06, | |
| "loss": 0.2482, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.5463243034365132, | |
| "eval_dev_accuracy": 0.9691587446380242, | |
| "eval_dev_accuracy_threshold": 0.9283666610717773, | |
| "eval_dev_average_precision": 0.8431961837252191, | |
| "eval_dev_f1": 0.7750185715801761, | |
| "eval_dev_f1_threshold": 0.6344282627105713, | |
| "eval_dev_precision": 0.7514147546043831, | |
| "eval_dev_recall": 0.8001533910375808, | |
| "eval_loss": 0.275828093290329, | |
| "eval_runtime": 522.4079, | |
| "eval_samples_per_second": 253.915, | |
| "eval_steps_per_second": 7.936, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.5476568017375778, | |
| "grad_norm": 0.10281296074390411, | |
| "learning_rate": 2.7968021319120587e-06, | |
| "loss": 0.2163, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 0.5489893000386424, | |
| "grad_norm": 1.15056312084198, | |
| "learning_rate": 2.930046635576283e-06, | |
| "loss": 0.2284, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 0.5503217983397071, | |
| "grad_norm": 0.4747524559497833, | |
| "learning_rate": 3.0632911392405066e-06, | |
| "loss": 0.2382, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 0.5516542966407718, | |
| "grad_norm": 0.4341018795967102, | |
| "learning_rate": 3.1965356429047304e-06, | |
| "loss": 0.2355, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 0.5529867949418364, | |
| "grad_norm": 14.61008071899414, | |
| "learning_rate": 3.3297801465689546e-06, | |
| "loss": 0.2247, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.5529867949418364, | |
| "eval_dev_accuracy": 0.9692039774740476, | |
| "eval_dev_accuracy_threshold": 0.9339917302131653, | |
| "eval_dev_average_precision": 0.8436933951228754, | |
| "eval_dev_f1": 0.7787227299138979, | |
| "eval_dev_f1_threshold": 0.5835311412811279, | |
| "eval_dev_precision": 0.7518359853121175, | |
| "eval_dev_recall": 0.8076038128629341, | |
| "eval_loss": 0.2721947729587555, | |
| "eval_runtime": 523.5606, | |
| "eval_samples_per_second": 253.356, | |
| "eval_steps_per_second": 7.919, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.5543192932429011, | |
| "grad_norm": 0.17993593215942383, | |
| "learning_rate": 3.4630246502331784e-06, | |
| "loss": 0.2731, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 0.5556517915439658, | |
| "grad_norm": 0.47082406282424927, | |
| "learning_rate": 3.596269153897402e-06, | |
| "loss": 0.2493, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 0.5569842898450305, | |
| "grad_norm": 3.0138349533081055, | |
| "learning_rate": 3.729513657561626e-06, | |
| "loss": 0.2002, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 0.5583167881460951, | |
| "grad_norm": 15.761974334716797, | |
| "learning_rate": 3.862758161225849e-06, | |
| "loss": 0.2301, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 0.5596492864471598, | |
| "grad_norm": 0.34038063883781433, | |
| "learning_rate": 3.996002664890073e-06, | |
| "loss": 0.2136, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.5596492864471598, | |
| "eval_dev_accuracy": 0.9691587446380242, | |
| "eval_dev_accuracy_threshold": 0.9421218633651733, | |
| "eval_dev_average_precision": 0.8475633374819089, | |
| "eval_dev_f1": 0.7781878671310496, | |
| "eval_dev_f1_threshold": 0.3623931407928467, | |
| "eval_dev_precision": 0.7403560830860534, | |
| "eval_dev_recall": 0.8200942259230853, | |
| "eval_loss": 0.2632051110267639, | |
| "eval_runtime": 523.1078, | |
| "eval_samples_per_second": 253.575, | |
| "eval_steps_per_second": 7.926, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.5609817847482245, | |
| "grad_norm": 0.8982422351837158, | |
| "learning_rate": 4.129247168554298e-06, | |
| "loss": 0.2323, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 0.5623142830492891, | |
| "grad_norm": 3.004122495651245, | |
| "learning_rate": 4.2624916722185215e-06, | |
| "loss": 0.2274, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 0.5636467813503537, | |
| "grad_norm": 7.217723846435547, | |
| "learning_rate": 4.395736175882745e-06, | |
| "loss": 0.2233, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 0.5649792796514185, | |
| "grad_norm": 1.1566057205200195, | |
| "learning_rate": 4.528980679546969e-06, | |
| "loss": 0.2819, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 0.5663117779524831, | |
| "grad_norm": 0.2774888575077057, | |
| "learning_rate": 4.662225183211193e-06, | |
| "loss": 0.2002, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.5663117779524831, | |
| "eval_dev_accuracy": 0.9700181685224694, | |
| "eval_dev_accuracy_threshold": 0.9420008063316345, | |
| "eval_dev_average_precision": 0.8490166145203218, | |
| "eval_dev_f1": 0.7794501933730532, | |
| "eval_dev_f1_threshold": 0.41960281133651733, | |
| "eval_dev_precision": 0.7451783751374038, | |
| "eval_dev_recall": 0.8170264051714693, | |
| "eval_loss": 0.2606056034564972, | |
| "eval_runtime": 524.5455, | |
| "eval_samples_per_second": 252.88, | |
| "eval_steps_per_second": 7.904, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.5676442762535477, | |
| "grad_norm": 13.932589530944824, | |
| "learning_rate": 4.795469686875417e-06, | |
| "loss": 0.2599, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 0.5689767745546125, | |
| "grad_norm": 10.140316009521484, | |
| "learning_rate": 4.92871419053964e-06, | |
| "loss": 0.2478, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 0.5703092728556771, | |
| "grad_norm": 13.381287574768066, | |
| "learning_rate": 5.061958694203864e-06, | |
| "loss": 0.2151, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 0.5716417711567418, | |
| "grad_norm": 3.821155548095703, | |
| "learning_rate": 5.195203197868088e-06, | |
| "loss": 0.2207, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 0.5729742694578065, | |
| "grad_norm": 0.3303406834602356, | |
| "learning_rate": 5.328447701532313e-06, | |
| "loss": 0.2683, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.5729742694578065, | |
| "eval_dev_accuracy": 0.9702820267326061, | |
| "eval_dev_accuracy_threshold": 0.9166876673698425, | |
| "eval_dev_average_precision": 0.8539072755077529, | |
| "eval_dev_f1": 0.7817631806395852, | |
| "eval_dev_f1_threshold": 0.4148586690425873, | |
| "eval_dev_precision": 0.7710175812466702, | |
| "eval_dev_recall": 0.7928125342390709, | |
| "eval_loss": 0.2761251628398895, | |
| "eval_runtime": 522.8877, | |
| "eval_samples_per_second": 253.682, | |
| "eval_steps_per_second": 7.929, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.5743067677588711, | |
| "grad_norm": 2.869353771209717, | |
| "learning_rate": 5.461692205196536e-06, | |
| "loss": 0.2233, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 0.5756392660599358, | |
| "grad_norm": 1.4524685144424438, | |
| "learning_rate": 5.59493670886076e-06, | |
| "loss": 0.2473, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 0.5769717643610004, | |
| "grad_norm": 0.838426411151886, | |
| "learning_rate": 5.728181212524984e-06, | |
| "loss": 0.2289, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 0.5783042626620651, | |
| "grad_norm": 33.507659912109375, | |
| "learning_rate": 5.861425716189208e-06, | |
| "loss": 0.2757, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 0.5796367609631298, | |
| "grad_norm": 10.75368595123291, | |
| "learning_rate": 5.9946702198534315e-06, | |
| "loss": 0.2489, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.5796367609631298, | |
| "eval_dev_accuracy": 0.9702367938965827, | |
| "eval_dev_accuracy_threshold": 0.9455279111862183, | |
| "eval_dev_average_precision": 0.8513893973961074, | |
| "eval_dev_f1": 0.7795382036446223, | |
| "eval_dev_f1_threshold": 0.6581396460533142, | |
| "eval_dev_precision": 0.7695921417894512, | |
| "eval_dev_recall": 0.7897447134874548, | |
| "eval_loss": 0.24530762434005737, | |
| "eval_runtime": 523.1112, | |
| "eval_samples_per_second": 253.573, | |
| "eval_steps_per_second": 7.926, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.5809692592641944, | |
| "grad_norm": 4.178175449371338, | |
| "learning_rate": 6.127914723517655e-06, | |
| "loss": 0.2238, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 0.5823017575652591, | |
| "grad_norm": 7.612859725952148, | |
| "learning_rate": 6.261159227181879e-06, | |
| "loss": 0.2342, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 0.5836342558663238, | |
| "grad_norm": 19.10555648803711, | |
| "learning_rate": 6.394403730846103e-06, | |
| "loss": 0.2209, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 0.5849667541673884, | |
| "grad_norm": 0.2660426199436188, | |
| "learning_rate": 6.527648234510327e-06, | |
| "loss": 0.1982, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 0.5862992524684532, | |
| "grad_norm": 4.176153659820557, | |
| "learning_rate": 6.660892738174551e-06, | |
| "loss": 0.2577, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.5862992524684532, | |
| "eval_dev_accuracy": 0.9705006521067193, | |
| "eval_dev_accuracy_threshold": 0.9348860383033752, | |
| "eval_dev_average_precision": 0.8544433474182094, | |
| "eval_dev_f1": 0.7824561403508773, | |
| "eval_dev_f1_threshold": 0.41301047801971436, | |
| "eval_dev_precision": 0.759991738097697, | |
| "eval_dev_recall": 0.806289032540813, | |
| "eval_loss": 0.268686980009079, | |
| "eval_runtime": 525.8935, | |
| "eval_samples_per_second": 252.232, | |
| "eval_steps_per_second": 7.884, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.5876317507695178, | |
| "grad_norm": 2.451788902282715, | |
| "learning_rate": 6.794137241838775e-06, | |
| "loss": 0.1872, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 0.5889642490705824, | |
| "grad_norm": 0.2053864449262619, | |
| "learning_rate": 6.927381745502999e-06, | |
| "loss": 0.2132, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 0.5902967473716471, | |
| "grad_norm": 2.7442498207092285, | |
| "learning_rate": 7.0606262491672225e-06, | |
| "loss": 0.1735, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 0.5916292456727118, | |
| "grad_norm": 14.928565979003906, | |
| "learning_rate": 7.193870752831446e-06, | |
| "loss": 0.2907, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 0.5929617439737764, | |
| "grad_norm": 1.0581625699996948, | |
| "learning_rate": 7.32711525649567e-06, | |
| "loss": 0.2109, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.5929617439737764, | |
| "eval_dev_accuracy": 0.9710132909149849, | |
| "eval_dev_accuracy_threshold": 0.9184995889663696, | |
| "eval_dev_average_precision": 0.8564900386871592, | |
| "eval_dev_f1": 0.7874429836329488, | |
| "eval_dev_f1_threshold": 0.42533212900161743, | |
| "eval_dev_precision": 0.7716659655027346, | |
| "eval_dev_recall": 0.8038786019502575, | |
| "eval_loss": 0.2596043348312378, | |
| "eval_runtime": 521.1196, | |
| "eval_samples_per_second": 254.542, | |
| "eval_steps_per_second": 7.956, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.5942942422748411, | |
| "grad_norm": 7.90291166305542, | |
| "learning_rate": 7.460359760159894e-06, | |
| "loss": 0.2621, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 0.5956267405759058, | |
| "grad_norm": 27.323461532592773, | |
| "learning_rate": 7.593604263824118e-06, | |
| "loss": 0.21, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 0.5969592388769704, | |
| "grad_norm": 0.3570970296859741, | |
| "learning_rate": 7.726848767488342e-06, | |
| "loss": 0.216, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 0.5982917371780351, | |
| "grad_norm": 0.6491680145263672, | |
| "learning_rate": 7.860093271152565e-06, | |
| "loss": 0.2136, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 0.5996242354790997, | |
| "grad_norm": 20.47812271118164, | |
| "learning_rate": 7.99333777481679e-06, | |
| "loss": 0.2099, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.5996242354790997, | |
| "eval_dev_accuracy": 0.9701463282245358, | |
| "eval_dev_accuracy_threshold": 0.7721706628799438, | |
| "eval_dev_average_precision": 0.8515314890810202, | |
| "eval_dev_f1": 0.7854063375727528, | |
| "eval_dev_f1_threshold": 0.46630430221557617, | |
| "eval_dev_precision": 0.7728285077951003, | |
| "eval_dev_recall": 0.7984003506080859, | |
| "eval_loss": 0.27925005555152893, | |
| "eval_runtime": 528.9897, | |
| "eval_samples_per_second": 250.755, | |
| "eval_steps_per_second": 7.838, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.6009567337801645, | |
| "grad_norm": 0.4902491867542267, | |
| "learning_rate": 8.126582278481013e-06, | |
| "loss": 0.2536, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 0.6022892320812291, | |
| "grad_norm": 0.5637998580932617, | |
| "learning_rate": 8.259826782145237e-06, | |
| "loss": 0.2247, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 0.6036217303822937, | |
| "grad_norm": 1.9175264835357666, | |
| "learning_rate": 8.39307128580946e-06, | |
| "loss": 0.2349, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 0.6049542286833585, | |
| "grad_norm": 76.62299346923828, | |
| "learning_rate": 8.526315789473685e-06, | |
| "loss": 0.1836, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 0.6062867269844231, | |
| "grad_norm": 1.5868983268737793, | |
| "learning_rate": 8.659560293137908e-06, | |
| "loss": 0.2635, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.6062867269844231, | |
| "eval_dev_accuracy": 0.9700709401644968, | |
| "eval_dev_accuracy_threshold": 0.9073478579521179, | |
| "eval_dev_average_precision": 0.85367208401453, | |
| "eval_dev_f1": 0.7835151777033597, | |
| "eval_dev_f1_threshold": 0.5480349659919739, | |
| "eval_dev_precision": 0.7726643229998935, | |
| "eval_dev_recall": 0.7946751396954093, | |
| "eval_loss": 0.27641019225120544, | |
| "eval_runtime": 535.7653, | |
| "eval_samples_per_second": 247.584, | |
| "eval_steps_per_second": 7.738, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.6076192252854877, | |
| "grad_norm": 0.3646801710128784, | |
| "learning_rate": 8.792804796802133e-06, | |
| "loss": 0.2259, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 0.6089517235865525, | |
| "grad_norm": 0.1534300446510315, | |
| "learning_rate": 8.926049300466355e-06, | |
| "loss": 0.1824, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 0.6102842218876171, | |
| "grad_norm": 4.515030384063721, | |
| "learning_rate": 9.05929380413058e-06, | |
| "loss": 0.2108, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 0.6116167201886817, | |
| "grad_norm": 27.513139724731445, | |
| "learning_rate": 9.192538307794803e-06, | |
| "loss": 0.1652, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 0.6129492184897464, | |
| "grad_norm": 0.3283866345882416, | |
| "learning_rate": 9.325782811459028e-06, | |
| "loss": 0.2599, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.6129492184897464, | |
| "eval_dev_accuracy": 0.9699880132984537, | |
| "eval_dev_accuracy_threshold": 0.9482549428939819, | |
| "eval_dev_average_precision": 0.8491188703823201, | |
| "eval_dev_f1": 0.7826180027828322, | |
| "eval_dev_f1_threshold": 0.9011486768722534, | |
| "eval_dev_precision": 0.7649335704571608, | |
| "eval_dev_recall": 0.8011394762791717, | |
| "eval_loss": 0.2594774067401886, | |
| "eval_runtime": 527.4017, | |
| "eval_samples_per_second": 251.51, | |
| "eval_steps_per_second": 7.861, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.6142817167908111, | |
| "grad_norm": 0.6060785055160522, | |
| "learning_rate": 9.459027315123252e-06, | |
| "loss": 0.231, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 0.6156142150918757, | |
| "grad_norm": 1.9709681272506714, | |
| "learning_rate": 9.592271818787475e-06, | |
| "loss": 0.2364, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 0.6169467133929404, | |
| "grad_norm": 0.13106560707092285, | |
| "learning_rate": 9.7255163224517e-06, | |
| "loss": 0.1774, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 0.6182792116940051, | |
| "grad_norm": 53.972103118896484, | |
| "learning_rate": 9.858760826115924e-06, | |
| "loss": 0.2322, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 0.6196117099950698, | |
| "grad_norm": 12.795185089111328, | |
| "learning_rate": 9.992005329780147e-06, | |
| "loss": 0.2283, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.6196117099950698, | |
| "eval_dev_accuracy": 0.9702518715085905, | |
| "eval_dev_accuracy_threshold": 0.8647300004959106, | |
| "eval_dev_average_precision": 0.8569022880485853, | |
| "eval_dev_f1": 0.7869809918232983, | |
| "eval_dev_f1_threshold": 0.43426772952079773, | |
| "eval_dev_precision": 0.7634696610693315, | |
| "eval_dev_recall": 0.8119864139366714, | |
| "eval_loss": 0.26569852232933044, | |
| "eval_runtime": 528.8452, | |
| "eval_samples_per_second": 250.824, | |
| "eval_steps_per_second": 7.84, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.6209442082961344, | |
| "grad_norm": 6.9099507331848145, | |
| "learning_rate": 1.012524983344437e-05, | |
| "loss": 0.2275, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 0.6222767065971991, | |
| "grad_norm": 3.897141456604004, | |
| "learning_rate": 1.0258494337108595e-05, | |
| "loss": 0.1867, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 0.6236092048982638, | |
| "grad_norm": 1.8539767265319824, | |
| "learning_rate": 1.0391738840772818e-05, | |
| "loss": 0.276, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 0.6249417031993284, | |
| "grad_norm": 17.823284149169922, | |
| "learning_rate": 1.0524983344437042e-05, | |
| "loss": 0.2208, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 0.626274201500393, | |
| "grad_norm": 0.8377816081047058, | |
| "learning_rate": 1.0658227848101265e-05, | |
| "loss": 0.2644, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.626274201500393, | |
| "eval_dev_accuracy": 0.9708625147949068, | |
| "eval_dev_accuracy_threshold": 0.8370188474655151, | |
| "eval_dev_average_precision": 0.8568328618718613, | |
| "eval_dev_f1": 0.7867207514944491, | |
| "eval_dev_f1_threshold": 0.3532576858997345, | |
| "eval_dev_precision": 0.766989280882506, | |
| "eval_dev_recall": 0.8074942478360907, | |
| "eval_loss": 0.2608221769332886, | |
| "eval_runtime": 528.9364, | |
| "eval_samples_per_second": 250.781, | |
| "eval_steps_per_second": 7.838, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.6276066998014578, | |
| "grad_norm": 23.196794509887695, | |
| "learning_rate": 1.079147235176549e-05, | |
| "loss": 0.1944, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 0.6289391981025224, | |
| "grad_norm": 0.2909054458141327, | |
| "learning_rate": 1.0924716855429713e-05, | |
| "loss": 0.2221, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 0.630271696403587, | |
| "grad_norm": 15.759045600891113, | |
| "learning_rate": 1.1057961359093938e-05, | |
| "loss": 0.2392, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 0.6316041947046518, | |
| "grad_norm": 4.435680866241455, | |
| "learning_rate": 1.1191205862758164e-05, | |
| "loss": 0.1809, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 0.6329366930057164, | |
| "grad_norm": 3.936431646347046, | |
| "learning_rate": 1.1324450366422385e-05, | |
| "loss": 0.1708, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.6329366930057164, | |
| "eval_dev_accuracy": 0.9702594103145944, | |
| "eval_dev_accuracy_threshold": 0.9633700847625732, | |
| "eval_dev_average_precision": 0.8539832745264263, | |
| "eval_dev_f1": 0.7859069988890653, | |
| "eval_dev_f1_threshold": 0.7301878929138184, | |
| "eval_dev_precision": 0.7598199672667758, | |
| "eval_dev_recall": 0.8138490193930098, | |
| "eval_loss": 0.2916560173034668, | |
| "eval_runtime": 524.8425, | |
| "eval_samples_per_second": 252.737, | |
| "eval_steps_per_second": 7.9, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.634269191306781, | |
| "grad_norm": 1.574413776397705, | |
| "learning_rate": 1.1457694870086611e-05, | |
| "loss": 0.2181, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 0.6356016896078458, | |
| "grad_norm": 4.340725421905518, | |
| "learning_rate": 1.1590939373750833e-05, | |
| "loss": 0.2258, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 0.6369341879089104, | |
| "grad_norm": 5.916915416717529, | |
| "learning_rate": 1.1724183877415059e-05, | |
| "loss": 0.2808, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 0.6382666862099751, | |
| "grad_norm": 15.759284019470215, | |
| "learning_rate": 1.1857428381079282e-05, | |
| "loss": 0.2394, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 0.6395991845110397, | |
| "grad_norm": 14.555028915405273, | |
| "learning_rate": 1.1990672884743507e-05, | |
| "loss": 0.2267, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.6395991845110397, | |
| "eval_dev_accuracy": 0.9713826924091762, | |
| "eval_dev_accuracy_threshold": 0.9341762065887451, | |
| "eval_dev_average_precision": 0.8563315677126753, | |
| "eval_dev_f1": 0.7862142099681866, | |
| "eval_dev_f1_threshold": 0.4216569662094116, | |
| "eval_dev_precision": 0.7617384156991678, | |
| "eval_dev_recall": 0.8123151090172017, | |
| "eval_loss": 0.26165512204170227, | |
| "eval_runtime": 525.2884, | |
| "eval_samples_per_second": 252.522, | |
| "eval_steps_per_second": 7.893, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.6409316828121044, | |
| "grad_norm": 8.00622844696045, | |
| "learning_rate": 1.212391738840773e-05, | |
| "loss": 0.2583, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 0.6422641811131691, | |
| "grad_norm": 13.320343017578125, | |
| "learning_rate": 1.2257161892071954e-05, | |
| "loss": 0.2188, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 0.6435966794142337, | |
| "grad_norm": 2.9494426250457764, | |
| "learning_rate": 1.2390406395736177e-05, | |
| "loss": 0.1877, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 0.6449291777152985, | |
| "grad_norm": 0.39628902077674866, | |
| "learning_rate": 1.2523650899400402e-05, | |
| "loss": 0.2324, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 0.6462616760163631, | |
| "grad_norm": 0.1506374627351761, | |
| "learning_rate": 1.2656895403064625e-05, | |
| "loss": 0.2239, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.6462616760163631, | |
| "eval_dev_accuracy": 0.9706514282267974, | |
| "eval_dev_accuracy_threshold": 0.8615503311157227, | |
| "eval_dev_average_precision": 0.8586570982605375, | |
| "eval_dev_f1": 0.7870691958322201, | |
| "eval_dev_f1_threshold": 0.24849581718444824, | |
| "eval_dev_precision": 0.7681476846057572, | |
| "eval_dev_recall": 0.8069464227018736, | |
| "eval_loss": 0.28418707847595215, | |
| "eval_runtime": 533.0754, | |
| "eval_samples_per_second": 248.833, | |
| "eval_steps_per_second": 7.778, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.6475941743174277, | |
| "grad_norm": 0.48906368017196655, | |
| "learning_rate": 1.279013990672885e-05, | |
| "loss": 0.22, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 0.6489266726184925, | |
| "grad_norm": 71.81077575683594, | |
| "learning_rate": 1.2923384410393072e-05, | |
| "loss": 0.2079, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 0.6502591709195571, | |
| "grad_norm": 17.413375854492188, | |
| "learning_rate": 1.3056628914057297e-05, | |
| "loss": 0.2212, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 0.6515916692206217, | |
| "grad_norm": 0.7448732852935791, | |
| "learning_rate": 1.318987341772152e-05, | |
| "loss": 0.2106, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 0.6529241675216864, | |
| "grad_norm": 0.6357948780059814, | |
| "learning_rate": 1.3323117921385744e-05, | |
| "loss": 0.2095, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.6529241675216864, | |
| "eval_dev_accuracy": 0.971164067035063, | |
| "eval_dev_accuracy_threshold": 0.925714373588562, | |
| "eval_dev_average_precision": 0.8570638757463108, | |
| "eval_dev_f1": 0.7913554743365645, | |
| "eval_dev_f1_threshold": 0.5317444801330566, | |
| "eval_dev_precision": 0.7659967186218212, | |
| "eval_dev_recall": 0.8184507505204339, | |
| "eval_loss": 0.2687513828277588, | |
| "eval_runtime": 529.1402, | |
| "eval_samples_per_second": 250.684, | |
| "eval_steps_per_second": 7.835, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.6542566658227511, | |
| "grad_norm": 12.15365982055664, | |
| "learning_rate": 1.3456362425049967e-05, | |
| "loss": 0.2359, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 0.6555891641238157, | |
| "grad_norm": 12.457159996032715, | |
| "learning_rate": 1.3589606928714192e-05, | |
| "loss": 0.2392, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 0.6569216624248804, | |
| "grad_norm": 0.6378312110900879, | |
| "learning_rate": 1.3722851432378415e-05, | |
| "loss": 0.2185, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 0.6582541607259451, | |
| "grad_norm": 10.198519706726074, | |
| "learning_rate": 1.385609593604264e-05, | |
| "loss": 0.2497, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 0.6595866590270097, | |
| "grad_norm": 0.6230494976043701, | |
| "learning_rate": 1.3989340439706862e-05, | |
| "loss": 0.2357, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.6595866590270097, | |
| "eval_dev_accuracy": 0.9700030909104616, | |
| "eval_dev_accuracy_threshold": 0.5345156192779541, | |
| "eval_dev_average_precision": 0.8443688741553218, | |
| "eval_dev_f1": 0.785516801361123, | |
| "eval_dev_f1_threshold": 0.39208123087882996, | |
| "eval_dev_precision": 0.7630410081603141, | |
| "eval_dev_recall": 0.809356853292429, | |
| "eval_loss": 0.270622581243515, | |
| "eval_runtime": 527.7067, | |
| "eval_samples_per_second": 251.365, | |
| "eval_steps_per_second": 7.857, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.6609191573280744, | |
| "grad_norm": 6.028562068939209, | |
| "learning_rate": 1.4122584943371087e-05, | |
| "loss": 0.2147, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 0.6622516556291391, | |
| "grad_norm": 7.488621711730957, | |
| "learning_rate": 1.4255829447035312e-05, | |
| "loss": 0.2252, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 0.6635841539302038, | |
| "grad_norm": 3.221320152282715, | |
| "learning_rate": 1.4389073950699535e-05, | |
| "loss": 0.2296, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 0.6649166522312684, | |
| "grad_norm": 33.004817962646484, | |
| "learning_rate": 1.452231845436376e-05, | |
| "loss": 0.2434, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 0.666249150532333, | |
| "grad_norm": 6.759824752807617, | |
| "learning_rate": 1.4655562958027982e-05, | |
| "loss": 0.2449, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.666249150532333, | |
| "eval_dev_accuracy": 0.9705534237487466, | |
| "eval_dev_accuracy_threshold": 0.9030373096466064, | |
| "eval_dev_average_precision": 0.8517374123261313, | |
| "eval_dev_f1": 0.7881202847731378, | |
| "eval_dev_f1_threshold": 0.5092203617095947, | |
| "eval_dev_precision": 0.7650335224342445, | |
| "eval_dev_recall": 0.812643804097732, | |
| "eval_loss": 0.24229487776756287, | |
| "eval_runtime": 528.3673, | |
| "eval_samples_per_second": 251.051, | |
| "eval_steps_per_second": 7.847, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.6675816488333978, | |
| "grad_norm": 0.4978267252445221, | |
| "learning_rate": 1.4788807461692207e-05, | |
| "loss": 0.3087, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 0.6689141471344624, | |
| "grad_norm": 17.420612335205078, | |
| "learning_rate": 1.492205196535643e-05, | |
| "loss": 0.2188, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 0.670246645435527, | |
| "grad_norm": 0.26254966855049133, | |
| "learning_rate": 1.5055296469020654e-05, | |
| "loss": 0.2214, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 0.6715791437365918, | |
| "grad_norm": 16.93143653869629, | |
| "learning_rate": 1.5188540972684877e-05, | |
| "loss": 0.2141, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 0.6729116420376564, | |
| "grad_norm": 5.481032848358154, | |
| "learning_rate": 1.5321785476349102e-05, | |
| "loss": 0.2534, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.6729116420376564, | |
| "eval_dev_accuracy": 0.9701538670305397, | |
| "eval_dev_accuracy_threshold": 0.9412756562232971, | |
| "eval_dev_average_precision": 0.8418413944064206, | |
| "eval_dev_f1": 0.78390731292517, | |
| "eval_dev_f1_threshold": 0.8259508013725281, | |
| "eval_dev_precision": 0.7611724636185365, | |
| "eval_dev_recall": 0.8080420729703078, | |
| "eval_loss": 0.28124794363975525, | |
| "eval_runtime": 528.2314, | |
| "eval_samples_per_second": 251.115, | |
| "eval_steps_per_second": 7.849, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.674244140338721, | |
| "grad_norm": 0.13247288763523102, | |
| "learning_rate": 1.319120586275816e-07, | |
| "loss": 0.2242, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 0.6755766386397858, | |
| "grad_norm": 50.61308670043945, | |
| "learning_rate": 2.651565622918055e-07, | |
| "loss": 0.199, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 0.6769091369408504, | |
| "grad_norm": 9.46574592590332, | |
| "learning_rate": 3.984010659560293e-07, | |
| "loss": 0.2019, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 0.678241635241915, | |
| "grad_norm": 0.4613121449947357, | |
| "learning_rate": 5.316455696202532e-07, | |
| "loss": 0.2324, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 0.6795741335429797, | |
| "grad_norm": 0.06632626801729202, | |
| "learning_rate": 6.64890073284477e-07, | |
| "loss": 0.2095, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.6795741335429797, | |
| "eval_dev_accuracy": 0.9702820267326061, | |
| "eval_dev_accuracy_threshold": 0.9349472522735596, | |
| "eval_dev_average_precision": 0.8438139930773977, | |
| "eval_dev_f1": 0.7839174599797903, | |
| "eval_dev_f1_threshold": 0.7425632476806641, | |
| "eval_dev_precision": 0.7616783794956593, | |
| "eval_dev_recall": 0.8074942478360907, | |
| "eval_loss": 0.2824593782424927, | |
| "eval_runtime": 534.9937, | |
| "eval_samples_per_second": 247.941, | |
| "eval_steps_per_second": 7.75, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.6809066318440444, | |
| "grad_norm": 0.5744990706443787, | |
| "learning_rate": 7.981345769487009e-07, | |
| "loss": 0.2757, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 0.6822391301451091, | |
| "grad_norm": 44.8016471862793, | |
| "learning_rate": 9.313790806129248e-07, | |
| "loss": 0.2954, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 0.6835716284461737, | |
| "grad_norm": 18.677654266357422, | |
| "learning_rate": 1.0646235842771487e-06, | |
| "loss": 0.2051, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 0.6849041267472384, | |
| "grad_norm": 7.698785305023193, | |
| "learning_rate": 1.1978680879413725e-06, | |
| "loss": 0.2575, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 0.6862366250483031, | |
| "grad_norm": 1.6236628293991089, | |
| "learning_rate": 1.3311125916055965e-06, | |
| "loss": 0.1763, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.6862366250483031, | |
| "eval_dev_accuracy": 0.9702669491205983, | |
| "eval_dev_accuracy_threshold": 0.9349033832550049, | |
| "eval_dev_average_precision": 0.8468881842158165, | |
| "eval_dev_f1": 0.783245178180264, | |
| "eval_dev_f1_threshold": 0.763167142868042, | |
| "eval_dev_precision": 0.7643378519290928, | |
| "eval_dev_recall": 0.8031116467623535, | |
| "eval_loss": 0.2643745541572571, | |
| "eval_runtime": 526.1955, | |
| "eval_samples_per_second": 252.087, | |
| "eval_steps_per_second": 7.879, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.6875691233493677, | |
| "grad_norm": 28.033424377441406, | |
| "learning_rate": 1.4643570952698202e-06, | |
| "loss": 0.2108, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 0.6889016216504324, | |
| "grad_norm": 19.735244750976562, | |
| "learning_rate": 1.597601598934044e-06, | |
| "loss": 0.2313, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 0.6902341199514971, | |
| "grad_norm": 2.9967164993286133, | |
| "learning_rate": 1.7308461025982678e-06, | |
| "loss": 0.2344, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 0.6915666182525617, | |
| "grad_norm": 1.428648591041565, | |
| "learning_rate": 1.864090606262492e-06, | |
| "loss": 0.1968, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 0.6928991165536263, | |
| "grad_norm": 0.3774360418319702, | |
| "learning_rate": 1.9973351099267156e-06, | |
| "loss": 0.2222, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.6928991165536263, | |
| "eval_dev_accuracy": 0.9705609625547506, | |
| "eval_dev_accuracy_threshold": 0.9212765693664551, | |
| "eval_dev_average_precision": 0.8504652727472383, | |
| "eval_dev_f1": 0.786851950828434, | |
| "eval_dev_f1_threshold": 0.6886965036392212, | |
| "eval_dev_precision": 0.7681310654283627, | |
| "eval_dev_recall": 0.8065081625944999, | |
| "eval_loss": 0.26056790351867676, | |
| "eval_runtime": 524.9198, | |
| "eval_samples_per_second": 252.7, | |
| "eval_steps_per_second": 7.898, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.6942316148546911, | |
| "grad_norm": 5.1444525718688965, | |
| "learning_rate": 2.1305796135909398e-06, | |
| "loss": 0.2213, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 0.6955641131557557, | |
| "grad_norm": 0.18948954343795776, | |
| "learning_rate": 2.2638241172551636e-06, | |
| "loss": 0.2055, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 0.6968966114568204, | |
| "grad_norm": 13.482624053955078, | |
| "learning_rate": 2.3970686209193873e-06, | |
| "loss": 0.2321, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 0.6982291097578851, | |
| "grad_norm": 0.6994342803955078, | |
| "learning_rate": 2.530313124583611e-06, | |
| "loss": 0.257, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 0.6995616080589497, | |
| "grad_norm": 0.9283449053764343, | |
| "learning_rate": 2.663557628247835e-06, | |
| "loss": 0.2398, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.6995616080589497, | |
| "eval_dev_accuracy": 0.9711112953930356, | |
| "eval_dev_accuracy_threshold": 0.9353954195976257, | |
| "eval_dev_average_precision": 0.854664598144776, | |
| "eval_dev_f1": 0.789044289044289, | |
| "eval_dev_f1_threshold": 0.7551745176315308, | |
| "eval_dev_precision": 0.7638732177659248, | |
| "eval_dev_recall": 0.8159307549030349, | |
| "eval_loss": 0.2365955263376236, | |
| "eval_runtime": 526.0912, | |
| "eval_samples_per_second": 252.137, | |
| "eval_steps_per_second": 7.881, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.7008941063600144, | |
| "grad_norm": 69.95816040039062, | |
| "learning_rate": 2.7968021319120587e-06, | |
| "loss": 0.2168, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 0.7022266046610791, | |
| "grad_norm": 13.763835906982422, | |
| "learning_rate": 2.930046635576283e-06, | |
| "loss": 0.2066, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 0.7035591029621437, | |
| "grad_norm": 2.3356781005859375, | |
| "learning_rate": 3.0632911392405066e-06, | |
| "loss": 0.222, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 0.7048916012632084, | |
| "grad_norm": 4.479837417602539, | |
| "learning_rate": 3.1965356429047304e-06, | |
| "loss": 0.269, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 0.706224099564273, | |
| "grad_norm": 15.155440330505371, | |
| "learning_rate": 3.3297801465689546e-06, | |
| "loss": 0.2327, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.706224099564273, | |
| "eval_dev_accuracy": 0.971005752108981, | |
| "eval_dev_accuracy_threshold": 0.9340351819992065, | |
| "eval_dev_average_precision": 0.8546100599663748, | |
| "eval_dev_f1": 0.7908306421726932, | |
| "eval_dev_f1_threshold": 0.7827771306037903, | |
| "eval_dev_precision": 0.7651096086867445, | |
| "eval_dev_recall": 0.8183411854935905, | |
| "eval_loss": 0.24670535326004028, | |
| "eval_runtime": 524.1368, | |
| "eval_samples_per_second": 253.077, | |
| "eval_steps_per_second": 7.91, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.7075565978653378, | |
| "grad_norm": 30.88198471069336, | |
| "learning_rate": 3.4630246502331784e-06, | |
| "loss": 0.2274, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 0.7088890961664024, | |
| "grad_norm": 19.670501708984375, | |
| "learning_rate": 3.596269153897402e-06, | |
| "loss": 0.1619, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 0.710221594467467, | |
| "grad_norm": 1.817409873008728, | |
| "learning_rate": 3.729513657561626e-06, | |
| "loss": 0.2105, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 0.7115540927685318, | |
| "grad_norm": 7.859726428985596, | |
| "learning_rate": 3.862758161225849e-06, | |
| "loss": 0.2314, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 0.7128865910695964, | |
| "grad_norm": 1.2846513986587524, | |
| "learning_rate": 3.996002664890073e-06, | |
| "loss": 0.2118, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.7128865910695964, | |
| "eval_dev_accuracy": 0.9711263730050435, | |
| "eval_dev_accuracy_threshold": 0.956214189529419, | |
| "eval_dev_average_precision": 0.8529387869562187, | |
| "eval_dev_f1": 0.7885323513940031, | |
| "eval_dev_f1_threshold": 0.7215464115142822, | |
| "eval_dev_precision": 0.7583729636749975, | |
| "eval_dev_recall": 0.8211898761915196, | |
| "eval_loss": 0.251621812582016, | |
| "eval_runtime": 524.6134, | |
| "eval_samples_per_second": 252.847, | |
| "eval_steps_per_second": 7.903, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.714219089370661, | |
| "grad_norm": 29.144947052001953, | |
| "learning_rate": 4.129247168554298e-06, | |
| "loss": 0.223, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 0.7155515876717257, | |
| "grad_norm": 1.1121717691421509, | |
| "learning_rate": 4.2624916722185215e-06, | |
| "loss": 0.2177, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 0.7168840859727904, | |
| "grad_norm": 20.09768295288086, | |
| "learning_rate": 4.395736175882745e-06, | |
| "loss": 0.2092, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 0.718216584273855, | |
| "grad_norm": 0.34697094559669495, | |
| "learning_rate": 4.528980679546969e-06, | |
| "loss": 0.2112, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 0.7195490825749197, | |
| "grad_norm": 27.53289222717285, | |
| "learning_rate": 4.662225183211193e-06, | |
| "loss": 0.2188, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.7195490825749197, | |
| "eval_dev_accuracy": 0.9717445550973637, | |
| "eval_dev_accuracy_threshold": 0.9209288358688354, | |
| "eval_dev_average_precision": 0.8572864419695019, | |
| "eval_dev_f1": 0.7925902130849127, | |
| "eval_dev_f1_threshold": 0.5230389833450317, | |
| "eval_dev_precision": 0.7749973824730395, | |
| "eval_dev_recall": 0.8110003286950805, | |
| "eval_loss": 0.2652234435081482, | |
| "eval_runtime": 524.205, | |
| "eval_samples_per_second": 253.044, | |
| "eval_steps_per_second": 7.909, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.7208815808759844, | |
| "grad_norm": 0.12331326305866241, | |
| "learning_rate": 4.795469686875417e-06, | |
| "loss": 0.1995, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 0.722214079177049, | |
| "grad_norm": 26.130399703979492, | |
| "learning_rate": 4.92871419053964e-06, | |
| "loss": 0.1863, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 0.7235465774781137, | |
| "grad_norm": 63.348262786865234, | |
| "learning_rate": 5.061958694203864e-06, | |
| "loss": 0.1885, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 0.7248790757791784, | |
| "grad_norm": 4.434421539306641, | |
| "learning_rate": 5.195203197868088e-06, | |
| "loss": 0.2059, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 0.7262115740802431, | |
| "grad_norm": 1.5990498065948486, | |
| "learning_rate": 5.328447701532313e-06, | |
| "loss": 0.1944, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.7262115740802431, | |
| "eval_dev_accuracy": 0.9710962177810278, | |
| "eval_dev_accuracy_threshold": 0.938183069229126, | |
| "eval_dev_average_precision": 0.8581458729833185, | |
| "eval_dev_f1": 0.79388743943347, | |
| "eval_dev_f1_threshold": 0.692324697971344, | |
| "eval_dev_precision": 0.7722187694220013, | |
| "eval_dev_recall": 0.8168072751177824, | |
| "eval_loss": 0.24563372135162354, | |
| "eval_runtime": 524.5271, | |
| "eval_samples_per_second": 252.889, | |
| "eval_steps_per_second": 7.904, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.7275440723813077, | |
| "grad_norm": 13.777716636657715, | |
| "learning_rate": 5.461692205196536e-06, | |
| "loss": 0.2015, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 0.7288765706823723, | |
| "grad_norm": 0.40915578603744507, | |
| "learning_rate": 5.59493670886076e-06, | |
| "loss": 0.1804, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 0.7302090689834371, | |
| "grad_norm": 2.3663179874420166, | |
| "learning_rate": 5.728181212524984e-06, | |
| "loss": 0.2424, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 0.7315415672845017, | |
| "grad_norm": 19.617507934570312, | |
| "learning_rate": 5.861425716189208e-06, | |
| "loss": 0.2331, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 0.7328740655855663, | |
| "grad_norm": 1.4067281484603882, | |
| "learning_rate": 5.9946702198534315e-06, | |
| "loss": 0.197, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.7328740655855663, | |
| "eval_dev_accuracy": 0.9715711625592739, | |
| "eval_dev_accuracy_threshold": 0.9351357221603394, | |
| "eval_dev_average_precision": 0.8584440513483999, | |
| "eval_dev_f1": 0.7944548676255994, | |
| "eval_dev_f1_threshold": 0.3239399194717407, | |
| "eval_dev_precision": 0.7575787695060133, | |
| "eval_dev_recall": 0.8351046346006354, | |
| "eval_loss": 0.271222859621048, | |
| "eval_runtime": 526.0703, | |
| "eval_samples_per_second": 252.147, | |
| "eval_steps_per_second": 7.881, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.7342065638866311, | |
| "grad_norm": 0.45710641145706177, | |
| "learning_rate": 6.127914723517655e-06, | |
| "loss": 0.2503, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 0.7355390621876957, | |
| "grad_norm": 0.6267761588096619, | |
| "learning_rate": 6.261159227181879e-06, | |
| "loss": 0.2421, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 0.7368715604887603, | |
| "grad_norm": 11.160945892333984, | |
| "learning_rate": 6.394403730846103e-06, | |
| "loss": 0.2169, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 0.7382040587898251, | |
| "grad_norm": 0.22500374913215637, | |
| "learning_rate": 6.527648234510327e-06, | |
| "loss": 0.1801, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 0.7395365570908897, | |
| "grad_norm": 0.34952008724212646, | |
| "learning_rate": 6.660892738174551e-06, | |
| "loss": 0.2168, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.7395365570908897, | |
| "eval_dev_accuracy": 0.9718576371874222, | |
| "eval_dev_accuracy_threshold": 0.9311728477478027, | |
| "eval_dev_average_precision": 0.8606955219787713, | |
| "eval_dev_f1": 0.7966432680635458, | |
| "eval_dev_f1_threshold": 0.3317277133464813, | |
| "eval_dev_precision": 0.7685336048879837, | |
| "eval_dev_recall": 0.8268872575873781, | |
| "eval_loss": 0.24974019825458527, | |
| "eval_runtime": 524.3487, | |
| "eval_samples_per_second": 252.975, | |
| "eval_steps_per_second": 7.907, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.7408690553919544, | |
| "grad_norm": 13.866408348083496, | |
| "learning_rate": 6.794137241838775e-06, | |
| "loss": 0.2266, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 0.742201553693019, | |
| "grad_norm": 9.584277153015137, | |
| "learning_rate": 6.927381745502999e-06, | |
| "loss": 0.1882, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 0.7435340519940837, | |
| "grad_norm": 52.4222297668457, | |
| "learning_rate": 7.0606262491672225e-06, | |
| "loss": 0.2214, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 0.7448665502951484, | |
| "grad_norm": 15.216498374938965, | |
| "learning_rate": 7.193870752831446e-06, | |
| "loss": 0.23, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 0.746199048596213, | |
| "grad_norm": 21.095590591430664, | |
| "learning_rate": 7.32711525649567e-06, | |
| "loss": 0.2355, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.746199048596213, | |
| "eval_dev_accuracy": 0.9719631804714769, | |
| "eval_dev_accuracy_threshold": 0.9183558821678162, | |
| "eval_dev_average_precision": 0.8589405860687593, | |
| "eval_dev_f1": 0.7974690109434157, | |
| "eval_dev_f1_threshold": 0.33763912320137024, | |
| "eval_dev_precision": 0.7571400433326768, | |
| "eval_dev_recall": 0.8423359263723019, | |
| "eval_loss": 0.24558140337467194, | |
| "eval_runtime": 523.2596, | |
| "eval_samples_per_second": 253.501, | |
| "eval_steps_per_second": 7.923, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.7475315468972777, | |
| "grad_norm": 87.9457778930664, | |
| "learning_rate": 7.460359760159894e-06, | |
| "loss": 0.2105, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 0.7488640451983424, | |
| "grad_norm": 1.1765731573104858, | |
| "learning_rate": 7.593604263824118e-06, | |
| "loss": 0.1608, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 0.750196543499407, | |
| "grad_norm": 12.082050323486328, | |
| "learning_rate": 7.726848767488342e-06, | |
| "loss": 0.214, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 0.7515290418004718, | |
| "grad_norm": 17.673494338989258, | |
| "learning_rate": 7.860093271152565e-06, | |
| "loss": 0.2531, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 0.7528615401015364, | |
| "grad_norm": 4.850943565368652, | |
| "learning_rate": 7.99333777481679e-06, | |
| "loss": 0.2641, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.7528615401015364, | |
| "eval_dev_accuracy": 0.9718953312174418, | |
| "eval_dev_accuracy_threshold": 0.9289690852165222, | |
| "eval_dev_average_precision": 0.8607199959963239, | |
| "eval_dev_f1": 0.7934619562406249, | |
| "eval_dev_f1_threshold": 0.2598855793476105, | |
| "eval_dev_precision": 0.7515187144816774, | |
| "eval_dev_recall": 0.8403637558891202, | |
| "eval_loss": 0.24914328753948212, | |
| "eval_runtime": 526.4308, | |
| "eval_samples_per_second": 251.974, | |
| "eval_steps_per_second": 7.876, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.754194038402601, | |
| "grad_norm": 21.872079849243164, | |
| "learning_rate": 8.126582278481013e-06, | |
| "loss": 0.2002, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 0.7555265367036657, | |
| "grad_norm": 0.3463062345981598, | |
| "learning_rate": 8.259826782145237e-06, | |
| "loss": 0.1727, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 0.7568590350047304, | |
| "grad_norm": 4.641270637512207, | |
| "learning_rate": 8.39307128580946e-06, | |
| "loss": 0.2135, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 0.758191533305795, | |
| "grad_norm": 1.456807017326355, | |
| "learning_rate": 8.526315789473685e-06, | |
| "loss": 0.1694, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 0.7595240316068597, | |
| "grad_norm": 0.2848343551158905, | |
| "learning_rate": 8.659560293137908e-06, | |
| "loss": 0.1969, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.7595240316068597, | |
| "eval_dev_accuracy": 0.9716917834553364, | |
| "eval_dev_accuracy_threshold": 0.9249356389045715, | |
| "eval_dev_average_precision": 0.8628574223791167, | |
| "eval_dev_f1": 0.7945488333677474, | |
| "eval_dev_f1_threshold": 0.2702260911464691, | |
| "eval_dev_precision": 0.7511957052220596, | |
| "eval_dev_recall": 0.8432124465870494, | |
| "eval_loss": 0.2667163014411926, | |
| "eval_runtime": 523.0471, | |
| "eval_samples_per_second": 253.604, | |
| "eval_steps_per_second": 7.927, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 1.521692783285364, | |
| "grad_norm": 680.6102294921875, | |
| "learning_rate": 1.1723219044235212e-05, | |
| "loss": 0.1989, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 1.524357744376932, | |
| "grad_norm": 555.5462036132812, | |
| "learning_rate": 1.1900870492094512e-05, | |
| "loss": 0.1823, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 1.5270227054685002, | |
| "grad_norm": 19347.361328125, | |
| "learning_rate": 1.207852193995381e-05, | |
| "loss": 0.2099, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 1.5296876665600683, | |
| "grad_norm": 28487.04296875, | |
| "learning_rate": 1.225617338781311e-05, | |
| "loss": 0.2007, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 1.5323526276516364, | |
| "grad_norm": 33787.03515625, | |
| "learning_rate": 1.2433824835672413e-05, | |
| "loss": 0.1893, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 1.5323526276516364, | |
| "eval_dev_accuracy": 0.9712469939011059, | |
| "eval_dev_accuracy_threshold": 0.930076539516449, | |
| "eval_dev_average_precision": 0.8589126571915907, | |
| "eval_dev_f1": 0.788643194504079, | |
| "eval_dev_f1_threshold": 0.8417924642562866, | |
| "eval_dev_precision": 0.7729615991583377, | |
| "eval_dev_recall": 0.8049742522186918, | |
| "eval_loss": 0.22310471534729004, | |
| "eval_runtime": 911.6835, | |
| "eval_samples_per_second": 145.497, | |
| "eval_steps_per_second": 2.274, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 1.5350175887432043, | |
| "grad_norm": 10426.8994140625, | |
| "learning_rate": 1.2611476283531711e-05, | |
| "loss": 0.1941, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 1.5376825498347724, | |
| "grad_norm": 20932.927734375, | |
| "learning_rate": 1.2789127731391012e-05, | |
| "loss": 0.1917, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 1.5403475109263405, | |
| "grad_norm": 19958.53125, | |
| "learning_rate": 1.2966779179250314e-05, | |
| "loss": 0.1704, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 1.5430124720179086, | |
| "grad_norm": 4519.30517578125, | |
| "learning_rate": 1.3144430627109612e-05, | |
| "loss": 0.1769, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 1.5456774331094767, | |
| "grad_norm": 1185.6409912109375, | |
| "learning_rate": 1.3322082074968912e-05, | |
| "loss": 0.1917, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 1.5456774331094767, | |
| "eval_dev_accuracy": 0.971314843155141, | |
| "eval_dev_accuracy_threshold": 0.9302895069122314, | |
| "eval_dev_average_precision": 0.8581921137101376, | |
| "eval_dev_f1": 0.7902556259558663, | |
| "eval_dev_f1_threshold": 0.9142668843269348, | |
| "eval_dev_precision": 0.7879315978651563, | |
| "eval_dev_recall": 0.792593404185384, | |
| "eval_loss": 0.21683622896671295, | |
| "eval_runtime": 910.3929, | |
| "eval_samples_per_second": 145.703, | |
| "eval_steps_per_second": 2.277, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 1.5483423942010446, | |
| "grad_norm": 10156.921875, | |
| "learning_rate": 1.3499733522828211e-05, | |
| "loss": 0.156, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 1.5510073552926127, | |
| "grad_norm": 20830.22265625, | |
| "learning_rate": 1.3677384970687513e-05, | |
| "loss": 0.1882, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 1.5536723163841808, | |
| "grad_norm": 10158.1328125, | |
| "learning_rate": 1.3855036418546812e-05, | |
| "loss": 0.1914, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 1.556337277475749, | |
| "grad_norm": 12550.0205078125, | |
| "learning_rate": 1.4032687866406112e-05, | |
| "loss": 0.1859, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 1.559002238567317, | |
| "grad_norm": 25116.525390625, | |
| "learning_rate": 1.4210339314265414e-05, | |
| "loss": 0.1915, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 1.559002238567317, | |
| "eval_dev_accuracy": 0.9707343550928405, | |
| "eval_dev_accuracy_threshold": 0.9600124359130859, | |
| "eval_dev_average_precision": 0.8552104699335599, | |
| "eval_dev_f1": 0.788252996419862, | |
| "eval_dev_f1_threshold": 0.6280207633972168, | |
| "eval_dev_precision": 0.7486694263749261, | |
| "eval_dev_recall": 0.8322559439027063, | |
| "eval_loss": 0.22474558651447296, | |
| "eval_runtime": 912.547, | |
| "eval_samples_per_second": 145.359, | |
| "eval_steps_per_second": 2.272, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 1.561667199658885, | |
| "grad_norm": 1747.8248291015625, | |
| "learning_rate": 1.4387990762124712e-05, | |
| "loss": 0.1658, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 1.564332160750453, | |
| "grad_norm": 10528.990234375, | |
| "learning_rate": 1.4565642209984013e-05, | |
| "loss": 0.1877, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 1.5669971218420211, | |
| "grad_norm": 14108.591796875, | |
| "learning_rate": 1.4743293657843311e-05, | |
| "loss": 0.1972, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 1.5696620829335892, | |
| "grad_norm": 33609.73828125, | |
| "learning_rate": 1.4920945105702613e-05, | |
| "loss": 0.1915, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 1.5723270440251573, | |
| "grad_norm": 14393.123046875, | |
| "learning_rate": 1.5098596553561913e-05, | |
| "loss": 0.1982, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 1.5723270440251573, | |
| "eval_dev_accuracy": 0.9714354640512036, | |
| "eval_dev_accuracy_threshold": 0.861323356628418, | |
| "eval_dev_average_precision": 0.8617997355004788, | |
| "eval_dev_f1": 0.792690745885873, | |
| "eval_dev_f1_threshold": 0.5087981224060059, | |
| "eval_dev_precision": 0.7735947439774742, | |
| "eval_dev_recall": 0.8127533691245754, | |
| "eval_loss": 0.27619487047195435, | |
| "eval_runtime": 912.722, | |
| "eval_samples_per_second": 145.331, | |
| "eval_steps_per_second": 2.271, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 1.5749920051167252, | |
| "grad_norm": 2650.031982421875, | |
| "learning_rate": 1.5276248001421212e-05, | |
| "loss": 0.1977, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 1.5776569662082933, | |
| "grad_norm": 21126.404296875, | |
| "learning_rate": 1.5453899449280514e-05, | |
| "loss": 0.1646, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 1.5803219272998614, | |
| "grad_norm": 1604.2296142578125, | |
| "learning_rate": 1.5631550897139813e-05, | |
| "loss": 0.1855, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 1.5829868883914295, | |
| "grad_norm": 9624.1689453125, | |
| "learning_rate": 1.580920234499911e-05, | |
| "loss": 0.1809, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 1.5856518494829976, | |
| "grad_norm": 4949.5078125, | |
| "learning_rate": 1.5986853792858413e-05, | |
| "loss": 0.185, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 1.5856518494829976, | |
| "eval_dev_accuracy": 0.9717068610673442, | |
| "eval_dev_accuracy_threshold": 0.9281443357467651, | |
| "eval_dev_average_precision": 0.8651298435899648, | |
| "eval_dev_f1": 0.7949938492806332, | |
| "eval_dev_f1_threshold": 0.7204960584640503, | |
| "eval_dev_precision": 0.7765935214211076, | |
| "eval_dev_recall": 0.8142872795003835, | |
| "eval_loss": 0.23017099499702454, | |
| "eval_runtime": 912.0946, | |
| "eval_samples_per_second": 145.431, | |
| "eval_steps_per_second": 2.273, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 1.5883168105745655, | |
| "grad_norm": 4366.28125, | |
| "learning_rate": 1.6164505240717715e-05, | |
| "loss": 0.1524, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 1.5909817716661336, | |
| "grad_norm": 6088.126953125, | |
| "learning_rate": 1.6342156688577014e-05, | |
| "loss": 0.1626, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 1.5936467327577017, | |
| "grad_norm": 41741.02734375, | |
| "learning_rate": 1.6519808136436312e-05, | |
| "loss": 0.1855, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 1.5963116938492699, | |
| "grad_norm": 6351.677734375, | |
| "learning_rate": 1.6697459584295614e-05, | |
| "loss": 0.1777, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 1.598976654940838, | |
| "grad_norm": 667.612548828125, | |
| "learning_rate": 1.6875111032154913e-05, | |
| "loss": 0.1519, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 1.598976654940838, | |
| "eval_dev_accuracy": 0.9702217162845749, | |
| "eval_dev_accuracy_threshold": 0.9527369737625122, | |
| "eval_dev_average_precision": 0.8599004250878434, | |
| "eval_dev_f1": 0.7857490403849272, | |
| "eval_dev_f1_threshold": 0.9123563170433044, | |
| "eval_dev_precision": 0.7755602988260406, | |
| "eval_dev_recall": 0.7962090500712172, | |
| "eval_loss": 0.23386961221694946, | |
| "eval_runtime": 912.5307, | |
| "eval_samples_per_second": 145.362, | |
| "eval_steps_per_second": 2.272, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 1.6016416160324058, | |
| "grad_norm": 74362.328125, | |
| "learning_rate": 1.7052762480014215e-05, | |
| "loss": 0.1705, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 1.604306577123974, | |
| "grad_norm": 41024.45703125, | |
| "learning_rate": 1.7230413927873513e-05, | |
| "loss": 0.1868, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 1.606971538215542, | |
| "grad_norm": 10907.779296875, | |
| "learning_rate": 1.7408065375732815e-05, | |
| "loss": 0.1801, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 1.6096364993071102, | |
| "grad_norm": 17233.494140625, | |
| "learning_rate": 1.7585716823592114e-05, | |
| "loss": 0.1672, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 1.6123014603986783, | |
| "grad_norm": 6108.4228515625, | |
| "learning_rate": 1.7763368271451412e-05, | |
| "loss": 0.1619, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 1.6123014603986783, | |
| "eval_dev_accuracy": 0.9701990998665632, | |
| "eval_dev_accuracy_threshold": 0.9653939604759216, | |
| "eval_dev_average_precision": 0.8583701139769879, | |
| "eval_dev_f1": 0.7852786105654916, | |
| "eval_dev_f1_threshold": 0.4483921527862549, | |
| "eval_dev_precision": 0.7433212643115765, | |
| "eval_dev_recall": 0.8322559439027063, | |
| "eval_loss": 0.2841331958770752, | |
| "eval_runtime": 912.3726, | |
| "eval_samples_per_second": 145.387, | |
| "eval_steps_per_second": 2.272, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 1.6149664214902462, | |
| "grad_norm": 753.2778930664062, | |
| "learning_rate": 1.7941019719310714e-05, | |
| "loss": 0.1775, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 1.6176313825818143, | |
| "grad_norm": 7861.2724609375, | |
| "learning_rate": 1.8118671167170013e-05, | |
| "loss": 0.1539, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 1.6202963436733824, | |
| "grad_norm": 4606.5625, | |
| "learning_rate": 1.8296322615029315e-05, | |
| "loss": 0.1984, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 1.6229613047649505, | |
| "grad_norm": 3256.729248046875, | |
| "learning_rate": 1.8473974062888614e-05, | |
| "loss": 0.1936, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 1.6256262658565186, | |
| "grad_norm": 16788.51953125, | |
| "learning_rate": 1.8651625510747916e-05, | |
| "loss": 0.1928, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 1.6256262658565186, | |
| "eval_dev_accuracy": 0.9702820267326061, | |
| "eval_dev_accuracy_threshold": 0.9588229656219482, | |
| "eval_dev_average_precision": 0.8578683942622316, | |
| "eval_dev_f1": 0.78329335697153, | |
| "eval_dev_f1_threshold": 0.8013461232185364, | |
| "eval_dev_precision": 0.7472888269823899, | |
| "eval_dev_recall": 0.8229429166210146, | |
| "eval_loss": 0.21942387521266937, | |
| "eval_runtime": 911.7434, | |
| "eval_samples_per_second": 145.487, | |
| "eval_steps_per_second": 2.274, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 1.6282912269480865, | |
| "grad_norm": 1664.8751220703125, | |
| "learning_rate": 1.8829276958607214e-05, | |
| "loss": 0.166, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 1.6309561880396546, | |
| "grad_norm": 21448.6796875, | |
| "learning_rate": 1.9006928406466513e-05, | |
| "loss": 0.1774, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 1.6336211491312227, | |
| "grad_norm": 18060.765625, | |
| "learning_rate": 1.9184579854325815e-05, | |
| "loss": 0.1319, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 1.6362861102227908, | |
| "grad_norm": 7385.87353515625, | |
| "learning_rate": 1.9362231302185113e-05, | |
| "loss": 0.1971, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 1.638951071314359, | |
| "grad_norm": 5024.80078125, | |
| "learning_rate": 1.9539882750044415e-05, | |
| "loss": 0.1728, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 1.638951071314359, | |
| "eval_dev_accuracy": 0.9713073043491371, | |
| "eval_dev_accuracy_threshold": 0.9408199787139893, | |
| "eval_dev_average_precision": 0.8671213714406215, | |
| "eval_dev_f1": 0.7911789297658863, | |
| "eval_dev_f1_threshold": 0.6503252983093262, | |
| "eval_dev_precision": 0.7563193126186433, | |
| "eval_dev_recall": 0.829407253204777, | |
| "eval_loss": 0.23295743763446808, | |
| "eval_runtime": 911.9086, | |
| "eval_samples_per_second": 145.461, | |
| "eval_steps_per_second": 2.273, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 1.6416160324059268, | |
| "grad_norm": 8569.271484375, | |
| "learning_rate": 1.9717534197903714e-05, | |
| "loss": 0.1703, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 1.644280993497495, | |
| "grad_norm": 20367.513671875, | |
| "learning_rate": 1.9895185645763016e-05, | |
| "loss": 0.1624, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 1.646945954589063, | |
| "grad_norm": 1712.7371826171875, | |
| "learning_rate": 1.9991906350553724e-05, | |
| "loss": 0.1526, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 1.6496109156806311, | |
| "grad_norm": 408.11163330078125, | |
| "learning_rate": 1.9972165742148174e-05, | |
| "loss": 0.1611, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 1.6522758767721992, | |
| "grad_norm": 6086.27587890625, | |
| "learning_rate": 1.9952425133742624e-05, | |
| "loss": 0.1603, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 1.6522758767721992, | |
| "eval_dev_accuracy": 0.971164067035063, | |
| "eval_dev_accuracy_threshold": 0.9526249170303345, | |
| "eval_dev_average_precision": 0.8575865091547995, | |
| "eval_dev_f1": 0.7882105728821057, | |
| "eval_dev_f1_threshold": 0.9510890245437622, | |
| "eval_dev_precision": 0.7956905213799264, | |
| "eval_dev_recall": 0.7808699463131369, | |
| "eval_loss": 0.2797718644142151, | |
| "eval_runtime": 911.0244, | |
| "eval_samples_per_second": 145.602, | |
| "eval_steps_per_second": 2.275, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 1.6549408378637671, | |
| "grad_norm": 448.80615234375, | |
| "learning_rate": 1.993268452533707e-05, | |
| "loss": 0.1546, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 1.6576057989553352, | |
| "grad_norm": 31734.08984375, | |
| "learning_rate": 1.991294391693152e-05, | |
| "loss": 0.2302, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 1.6602707600469033, | |
| "grad_norm": 18211.0, | |
| "learning_rate": 1.989320330852597e-05, | |
| "loss": 0.1694, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 1.6629357211384712, | |
| "grad_norm": 8841.400390625, | |
| "learning_rate": 1.9873462700120417e-05, | |
| "loss": 0.1705, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 1.6656006822300395, | |
| "grad_norm": 24008.82421875, | |
| "learning_rate": 1.985372209171487e-05, | |
| "loss": 0.1606, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 1.6656006822300395, | |
| "eval_dev_accuracy": 0.9708549759889029, | |
| "eval_dev_accuracy_threshold": 0.9542537927627563, | |
| "eval_dev_average_precision": 0.841409192198319, | |
| "eval_dev_f1": 0.7890381515314348, | |
| "eval_dev_f1_threshold": 0.8909753561019897, | |
| "eval_dev_precision": 0.7742275651165244, | |
| "eval_dev_recall": 0.8044264270844746, | |
| "eval_loss": 0.2822663486003876, | |
| "eval_runtime": 911.9083, | |
| "eval_samples_per_second": 145.461, | |
| "eval_steps_per_second": 2.273, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 1.6682656433216074, | |
| "grad_norm": 2424.1279296875, | |
| "learning_rate": 1.9833981483309317e-05, | |
| "loss": 0.1887, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 1.6709306044131755, | |
| "grad_norm": 45195.04296875, | |
| "learning_rate": 1.9814240874903764e-05, | |
| "loss": 0.1918, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 1.6735955655047436, | |
| "grad_norm": 2223.521728515625, | |
| "learning_rate": 1.9794500266498217e-05, | |
| "loss": 0.1475, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 1.6762605265963115, | |
| "grad_norm": 2829.02099609375, | |
| "learning_rate": 1.9774759658092664e-05, | |
| "loss": 0.1995, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 1.6789254876878799, | |
| "grad_norm": 11702.283203125, | |
| "learning_rate": 1.975501904968711e-05, | |
| "loss": 0.1648, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 1.6789254876878799, | |
| "eval_dev_accuracy": 0.9710359073329966, | |
| "eval_dev_accuracy_threshold": 0.910698652267456, | |
| "eval_dev_average_precision": 0.849610869643878, | |
| "eval_dev_f1": 0.7900427192658614, | |
| "eval_dev_f1_threshold": 0.4727928936481476, | |
| "eval_dev_precision": 0.7616432784218019, | |
| "eval_dev_recall": 0.8206420510573025, | |
| "eval_loss": 0.25969284772872925, | |
| "eval_runtime": 910.5015, | |
| "eval_samples_per_second": 145.686, | |
| "eval_steps_per_second": 2.277, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 1.6815904487794477, | |
| "grad_norm": 21649.341796875, | |
| "learning_rate": 1.9735278441281564e-05, | |
| "loss": 0.1788, | |
| "step": 63100 | |
| }, | |
| { | |
| "epoch": 1.6842554098710159, | |
| "grad_norm": 86422.7421875, | |
| "learning_rate": 1.971553783287601e-05, | |
| "loss": 0.2286, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 1.686920370962584, | |
| "grad_norm": 45808.265625, | |
| "learning_rate": 1.969579722447046e-05, | |
| "loss": 0.1611, | |
| "step": 63300 | |
| }, | |
| { | |
| "epoch": 1.6895853320541518, | |
| "grad_norm": 13495.0380859375, | |
| "learning_rate": 1.967605661606491e-05, | |
| "loss": 0.1962, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 1.6922502931457202, | |
| "grad_norm": 22458.46484375, | |
| "learning_rate": 1.9656316007659357e-05, | |
| "loss": 0.1825, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 1.6922502931457202, | |
| "eval_dev_accuracy": 0.9710283685269927, | |
| "eval_dev_accuracy_threshold": 0.932883620262146, | |
| "eval_dev_average_precision": 0.8574042822114104, | |
| "eval_dev_f1": 0.7900720576461169, | |
| "eval_dev_f1_threshold": 0.9062104225158691, | |
| "eval_dev_precision": 0.7702955870108243, | |
| "eval_dev_recall": 0.8108907636682371, | |
| "eval_loss": 0.20927684009075165, | |
| "eval_runtime": 911.9738, | |
| "eval_samples_per_second": 145.45, | |
| "eval_steps_per_second": 2.273, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 1.694915254237288, | |
| "grad_norm": 4333.6484375, | |
| "learning_rate": 1.9636575399253807e-05, | |
| "loss": 0.1795, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 1.6975802153288562, | |
| "grad_norm": 51141.83203125, | |
| "learning_rate": 1.9616834790848257e-05, | |
| "loss": 0.1944, | |
| "step": 63700 | |
| }, | |
| { | |
| "epoch": 1.7002451764204243, | |
| "grad_norm": 24413.966796875, | |
| "learning_rate": 1.9597094182442704e-05, | |
| "loss": 0.196, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 1.7029101375119922, | |
| "grad_norm": 11386.5224609375, | |
| "learning_rate": 1.9577353574037154e-05, | |
| "loss": 0.1851, | |
| "step": 63900 | |
| }, | |
| { | |
| "epoch": 1.7055750986035605, | |
| "grad_norm": 1291.42236328125, | |
| "learning_rate": 1.9557612965631604e-05, | |
| "loss": 0.1787, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 1.7055750986035605, | |
| "eval_dev_accuracy": 0.9717747103213793, | |
| "eval_dev_accuracy_threshold": 0.9631803035736084, | |
| "eval_dev_average_precision": 0.8630868871875782, | |
| "eval_dev_f1": 0.7981506777345803, | |
| "eval_dev_f1_threshold": 0.9355161786079407, | |
| "eval_dev_precision": 0.7667305945291208, | |
| "eval_dev_recall": 0.8322559439027063, | |
| "eval_loss": 0.23051400482654572, | |
| "eval_runtime": 912.7714, | |
| "eval_samples_per_second": 145.323, | |
| "eval_steps_per_second": 2.271, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 1.7082400596951284, | |
| "grad_norm": 689.987060546875, | |
| "learning_rate": 1.953787235722605e-05, | |
| "loss": 0.1924, | |
| "step": 64100 | |
| }, | |
| { | |
| "epoch": 1.7109050207866965, | |
| "grad_norm": 11370.0517578125, | |
| "learning_rate": 1.95181317488205e-05, | |
| "loss": 0.1611, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 1.7135699818782646, | |
| "grad_norm": 15404.4140625, | |
| "learning_rate": 1.949839114041495e-05, | |
| "loss": 0.1799, | |
| "step": 64300 | |
| }, | |
| { | |
| "epoch": 1.7162349429698325, | |
| "grad_norm": 14026.65234375, | |
| "learning_rate": 1.9478650532009397e-05, | |
| "loss": 0.1977, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 1.7188999040614008, | |
| "grad_norm": 1225.2841796875, | |
| "learning_rate": 1.9458909923603847e-05, | |
| "loss": 0.1672, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 1.7188999040614008, | |
| "eval_dev_accuracy": 0.9720687237555315, | |
| "eval_dev_accuracy_threshold": 0.920991063117981, | |
| "eval_dev_average_precision": 0.845700489229083, | |
| "eval_dev_f1": 0.7995607383778697, | |
| "eval_dev_f1_threshold": 0.6048256158828735, | |
| "eval_dev_precision": 0.7648059223689476, | |
| "eval_dev_recall": 0.8376246302180343, | |
| "eval_loss": 0.21997055411338806, | |
| "eval_runtime": 912.741, | |
| "eval_samples_per_second": 145.328, | |
| "eval_steps_per_second": 2.271, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 1.7215648651529687, | |
| "grad_norm": 18876.72265625, | |
| "learning_rate": 1.9439169315198297e-05, | |
| "loss": 0.1812, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 1.7242298262445368, | |
| "grad_norm": 44768.2578125, | |
| "learning_rate": 1.9419428706792744e-05, | |
| "loss": 0.1641, | |
| "step": 64700 | |
| }, | |
| { | |
| "epoch": 1.726894787336105, | |
| "grad_norm": 1987.0482177734375, | |
| "learning_rate": 1.9399688098387194e-05, | |
| "loss": 0.1526, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 1.7295597484276728, | |
| "grad_norm": 1468.9228515625, | |
| "learning_rate": 1.9379947489981644e-05, | |
| "loss": 0.1745, | |
| "step": 64900 | |
| }, | |
| { | |
| "epoch": 1.7322247095192411, | |
| "grad_norm": 2461.248291015625, | |
| "learning_rate": 1.936020688157609e-05, | |
| "loss": 0.2017, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 1.7322247095192411, | |
| "eval_dev_accuracy": 0.9716842446493325, | |
| "eval_dev_accuracy_threshold": 0.8851553201675415, | |
| "eval_dev_average_precision": 0.8642482817005424, | |
| "eval_dev_f1": 0.7979695431472081, | |
| "eval_dev_f1_threshold": 0.674056887626648, | |
| "eval_dev_precision": 0.7787859824780976, | |
| "eval_dev_recall": 0.8181220554399036, | |
| "eval_loss": 0.25105008482933044, | |
| "eval_runtime": 933.219, | |
| "eval_samples_per_second": 142.139, | |
| "eval_steps_per_second": 2.221, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 1.734889670610809, | |
| "grad_norm": 1901.36474609375, | |
| "learning_rate": 1.934046627317054e-05, | |
| "loss": 0.2092, | |
| "step": 65100 | |
| }, | |
| { | |
| "epoch": 1.7375546317023771, | |
| "grad_norm": 25123.84375, | |
| "learning_rate": 1.932072566476499e-05, | |
| "loss": 0.1807, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 1.7402195927939452, | |
| "grad_norm": 21136.314453125, | |
| "learning_rate": 1.9300985056359437e-05, | |
| "loss": 0.1627, | |
| "step": 65300 | |
| }, | |
| { | |
| "epoch": 1.742884553885513, | |
| "grad_norm": 14610.0068359375, | |
| "learning_rate": 1.9281244447953887e-05, | |
| "loss": 0.1809, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 1.7455495149770814, | |
| "grad_norm": 5105.17529296875, | |
| "learning_rate": 1.9261503839548337e-05, | |
| "loss": 0.1774, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 1.7455495149770814, | |
| "eval_dev_accuracy": 0.9722270386816136, | |
| "eval_dev_accuracy_threshold": 0.9311126470565796, | |
| "eval_dev_average_precision": 0.8672414882858807, | |
| "eval_dev_f1": 0.801593625498008, | |
| "eval_dev_f1_threshold": 0.841367244720459, | |
| "eval_dev_precision": 0.7779954629820581, | |
| "eval_dev_recall": 0.8266681275336912, | |
| "eval_loss": 0.2049088478088379, | |
| "eval_runtime": 933.0893, | |
| "eval_samples_per_second": 142.159, | |
| "eval_steps_per_second": 2.222, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 1.7482144760686493, | |
| "grad_norm": 52553.86328125, | |
| "learning_rate": 1.9241763231142784e-05, | |
| "loss": 0.154, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 1.7508794371602174, | |
| "grad_norm": 8918.7666015625, | |
| "learning_rate": 1.9222022622737234e-05, | |
| "loss": 0.1871, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 1.7535443982517855, | |
| "grad_norm": 1728.83984375, | |
| "learning_rate": 1.9202282014331684e-05, | |
| "loss": 0.1929, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 1.7562093593433534, | |
| "grad_norm": 8542.5439453125, | |
| "learning_rate": 1.918254140592613e-05, | |
| "loss": 0.1519, | |
| "step": 65900 | |
| }, | |
| { | |
| "epoch": 1.7588743204349218, | |
| "grad_norm": 40360.875, | |
| "learning_rate": 1.916280079752058e-05, | |
| "loss": 0.2105, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 1.7588743204349218, | |
| "eval_dev_accuracy": 0.9723099655476566, | |
| "eval_dev_accuracy_threshold": 0.82029128074646, | |
| "eval_dev_average_precision": 0.8667448997071003, | |
| "eval_dev_f1": 0.801227852873068, | |
| "eval_dev_f1_threshold": 0.5722821354866028, | |
| "eval_dev_precision": 0.7878627409447151, | |
| "eval_dev_recall": 0.8150542346882875, | |
| "eval_loss": 0.2998444736003876, | |
| "eval_runtime": 935.7172, | |
| "eval_samples_per_second": 141.76, | |
| "eval_steps_per_second": 2.215, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 1.7615392815264896, | |
| "grad_norm": 46394.6875, | |
| "learning_rate": 1.914306018911503e-05, | |
| "loss": 0.1664, | |
| "step": 66100 | |
| }, | |
| { | |
| "epoch": 1.7642042426180577, | |
| "grad_norm": 3412.559814453125, | |
| "learning_rate": 1.9123319580709477e-05, | |
| "loss": 0.1806, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 1.7668692037096259, | |
| "grad_norm": 5545.865234375, | |
| "learning_rate": 1.910357897230393e-05, | |
| "loss": 0.1881, | |
| "step": 66300 | |
| }, | |
| { | |
| "epoch": 1.7695341648011937, | |
| "grad_norm": 85940.0234375, | |
| "learning_rate": 1.9083838363898377e-05, | |
| "loss": 0.1881, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 1.772199125892762, | |
| "grad_norm": 15622.53125, | |
| "learning_rate": 1.9064097755492824e-05, | |
| "loss": 0.1889, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 1.772199125892762, | |
| "eval_dev_accuracy": 0.9718651759934261, | |
| "eval_dev_accuracy_threshold": 0.9094328880310059, | |
| "eval_dev_average_precision": 0.8682256601471484, | |
| "eval_dev_f1": 0.7982062780269058, | |
| "eval_dev_f1_threshold": 0.6328648328781128, | |
| "eval_dev_precision": 0.7697395197395197, | |
| "eval_dev_recall": 0.8288594280705599, | |
| "eval_loss": 0.19647949934005737, | |
| "eval_runtime": 933.2357, | |
| "eval_samples_per_second": 142.137, | |
| "eval_steps_per_second": 2.221, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 1.77486408698433, | |
| "grad_norm": 1002.01220703125, | |
| "learning_rate": 1.9044357147087277e-05, | |
| "loss": 0.1722, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 1.777529048075898, | |
| "grad_norm": 45076.7421875, | |
| "learning_rate": 1.9024616538681724e-05, | |
| "loss": 0.1999, | |
| "step": 66700 | |
| }, | |
| { | |
| "epoch": 1.7801940091674662, | |
| "grad_norm": 2053.866455078125, | |
| "learning_rate": 1.900487593027617e-05, | |
| "loss": 0.1894, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 1.782858970259034, | |
| "grad_norm": 3085.87451171875, | |
| "learning_rate": 1.8985135321870624e-05, | |
| "loss": 0.1702, | |
| "step": 66900 | |
| }, | |
| { | |
| "epoch": 1.7855239313506024, | |
| "grad_norm": 1689.106201171875, | |
| "learning_rate": 1.896539471346507e-05, | |
| "loss": 0.1905, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 1.7855239313506024, | |
| "eval_dev_accuracy": 0.97235519838368, | |
| "eval_dev_accuracy_threshold": 0.8816102743148804, | |
| "eval_dev_average_precision": 0.8719513025342801, | |
| "eval_dev_f1": 0.8005663642561224, | |
| "eval_dev_f1_threshold": 0.595874547958374, | |
| "eval_dev_precision": 0.7677529672098169, | |
| "eval_dev_recall": 0.8363098498959132, | |
| "eval_loss": 0.22260619699954987, | |
| "eval_runtime": 935.4859, | |
| "eval_samples_per_second": 141.795, | |
| "eval_steps_per_second": 2.216, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 1.7881888924421703, | |
| "grad_norm": 24842.880859375, | |
| "learning_rate": 1.8945654105059517e-05, | |
| "loss": 0.1809, | |
| "step": 67100 | |
| }, | |
| { | |
| "epoch": 1.7908538535337384, | |
| "grad_norm": 60853.56640625, | |
| "learning_rate": 1.892591349665397e-05, | |
| "loss": 0.1825, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 1.7935188146253065, | |
| "grad_norm": 6448.2060546875, | |
| "learning_rate": 1.8906172888248417e-05, | |
| "loss": 0.1912, | |
| "step": 67300 | |
| }, | |
| { | |
| "epoch": 1.7961837757168744, | |
| "grad_norm": 28209.67578125, | |
| "learning_rate": 1.8886432279842867e-05, | |
| "loss": 0.1849, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 1.7988487368084427, | |
| "grad_norm": 1441.7255859375, | |
| "learning_rate": 1.8866691671437317e-05, | |
| "loss": 0.1812, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 1.7988487368084427, | |
| "eval_dev_accuracy": 0.9728678371919456, | |
| "eval_dev_accuracy_threshold": 0.8422494530677795, | |
| "eval_dev_average_precision": 0.8713666080730428, | |
| "eval_dev_f1": 0.8021557531662624, | |
| "eval_dev_f1_threshold": 0.6560682058334351, | |
| "eval_dev_precision": 0.7893508697496818, | |
| "eval_dev_recall": 0.8153829297688178, | |
| "eval_loss": 0.21360942721366882, | |
| "eval_runtime": 935.6466, | |
| "eval_samples_per_second": 141.77, | |
| "eval_steps_per_second": 2.216, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 1.8015136979000106, | |
| "grad_norm": 19593.896484375, | |
| "learning_rate": 1.8846951063031764e-05, | |
| "loss": 0.1729, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 1.8041786589915787, | |
| "grad_norm": 49532.5390625, | |
| "learning_rate": 1.8827210454626214e-05, | |
| "loss": 0.1981, | |
| "step": 67700 | |
| }, | |
| { | |
| "epoch": 1.8068436200831468, | |
| "grad_norm": 2939.565185546875, | |
| "learning_rate": 1.8807469846220664e-05, | |
| "loss": 0.172, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 1.8095085811747147, | |
| "grad_norm": 18294.060546875, | |
| "learning_rate": 1.878772923781511e-05, | |
| "loss": 0.1609, | |
| "step": 67900 | |
| }, | |
| { | |
| "epoch": 1.812173542266283, | |
| "grad_norm": 67081.5234375, | |
| "learning_rate": 1.876798862940956e-05, | |
| "loss": 0.18, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 1.812173542266283, | |
| "eval_dev_accuracy": 0.972196883457598, | |
| "eval_dev_accuracy_threshold": 0.9536248445510864, | |
| "eval_dev_average_precision": 0.8677887820499237, | |
| "eval_dev_f1": 0.7948606271777002, | |
| "eval_dev_f1_threshold": 0.8924222588539124, | |
| "eval_dev_precision": 0.7899577967752408, | |
| "eval_dev_recall": 0.7998246959570505, | |
| "eval_loss": 0.22794483602046967, | |
| "eval_runtime": 934.1595, | |
| "eval_samples_per_second": 141.996, | |
| "eval_steps_per_second": 2.219, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 1.814838503357851, | |
| "grad_norm": 3441.131103515625, | |
| "learning_rate": 1.874824802100401e-05, | |
| "loss": 0.1618, | |
| "step": 68100 | |
| }, | |
| { | |
| "epoch": 1.817503464449419, | |
| "grad_norm": 40774.67578125, | |
| "learning_rate": 1.8728507412598457e-05, | |
| "loss": 0.1673, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 1.8201684255409871, | |
| "grad_norm": 23139.685546875, | |
| "learning_rate": 1.8708766804192907e-05, | |
| "loss": 0.1793, | |
| "step": 68300 | |
| }, | |
| { | |
| "epoch": 1.822833386632555, | |
| "grad_norm": 8400.26171875, | |
| "learning_rate": 1.8689026195787357e-05, | |
| "loss": 0.219, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 1.8254983477241233, | |
| "grad_norm": 874.6626586914062, | |
| "learning_rate": 1.8669285587381804e-05, | |
| "loss": 0.1714, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 1.8254983477241233, | |
| "eval_dev_accuracy": 0.9731241565960783, | |
| "eval_dev_accuracy_threshold": 0.939326286315918, | |
| "eval_dev_average_precision": 0.872717385393903, | |
| "eval_dev_f1": 0.803395225464191, | |
| "eval_dev_f1_threshold": 0.7294609546661377, | |
| "eval_dev_precision": 0.7787719839555692, | |
| "eval_dev_recall": 0.8296263832584639, | |
| "eval_loss": 0.22690728306770325, | |
| "eval_runtime": 931.9708, | |
| "eval_samples_per_second": 142.33, | |
| "eval_steps_per_second": 2.224, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 1.8281633088156912, | |
| "grad_norm": 339.2591552734375, | |
| "learning_rate": 1.8649544978976254e-05, | |
| "loss": 0.1969, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 1.8308282699072593, | |
| "grad_norm": 48369.09375, | |
| "learning_rate": 1.8629804370570704e-05, | |
| "loss": 0.1715, | |
| "step": 68700 | |
| }, | |
| { | |
| "epoch": 1.8334932309988274, | |
| "grad_norm": 1295.3619384765625, | |
| "learning_rate": 1.861006376216515e-05, | |
| "loss": 0.1728, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 1.8361581920903953, | |
| "grad_norm": 13706.5322265625, | |
| "learning_rate": 1.85903231537596e-05, | |
| "loss": 0.1768, | |
| "step": 68900 | |
| }, | |
| { | |
| "epoch": 1.8388231531819637, | |
| "grad_norm": 36329.11328125, | |
| "learning_rate": 1.857058254535405e-05, | |
| "loss": 0.1821, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 1.8388231531819637, | |
| "eval_dev_accuracy": 0.9732900103281642, | |
| "eval_dev_accuracy_threshold": 0.9531596899032593, | |
| "eval_dev_average_precision": 0.8750664616109699, | |
| "eval_dev_f1": 0.8036220816059348, | |
| "eval_dev_f1_threshold": 0.925843358039856, | |
| "eval_dev_precision": 0.8002172732210755, | |
| "eval_dev_recall": 0.807055987728717, | |
| "eval_loss": 0.22201138734817505, | |
| "eval_runtime": 933.9653, | |
| "eval_samples_per_second": 142.026, | |
| "eval_steps_per_second": 2.22, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 1.8414881142735315, | |
| "grad_norm": 21184.15234375, | |
| "learning_rate": 1.8550841936948497e-05, | |
| "loss": 0.1925, | |
| "step": 69100 | |
| }, | |
| { | |
| "epoch": 1.8441530753650996, | |
| "grad_norm": 1523.7003173828125, | |
| "learning_rate": 1.8531101328542947e-05, | |
| "loss": 0.1761, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 1.8468180364566678, | |
| "grad_norm": 18345.251953125, | |
| "learning_rate": 1.8511360720137397e-05, | |
| "loss": 0.1656, | |
| "step": 69300 | |
| }, | |
| { | |
| "epoch": 1.8494829975482356, | |
| "grad_norm": 3282.25830078125, | |
| "learning_rate": 1.8491620111731844e-05, | |
| "loss": 0.2208, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 1.852147958639804, | |
| "grad_norm": 10842.587890625, | |
| "learning_rate": 1.8471879503326294e-05, | |
| "loss": 0.1579, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 1.852147958639804, | |
| "eval_dev_accuracy": 0.9733653983882032, | |
| "eval_dev_accuracy_threshold": 0.9553133249282837, | |
| "eval_dev_average_precision": 0.8719043392702807, | |
| "eval_dev_f1": 0.8058681249342727, | |
| "eval_dev_f1_threshold": 0.8481921553611755, | |
| "eval_dev_precision": 0.7747447174198766, | |
| "eval_dev_recall": 0.8395968007012161, | |
| "eval_loss": 0.2123890370130539, | |
| "eval_runtime": 932.9373, | |
| "eval_samples_per_second": 142.182, | |
| "eval_steps_per_second": 2.222, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 1.8548129197313719, | |
| "grad_norm": 14122.6357421875, | |
| "learning_rate": 1.8452138894920744e-05, | |
| "loss": 0.1684, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 1.85747788082294, | |
| "grad_norm": 22713.14453125, | |
| "learning_rate": 1.843239828651519e-05, | |
| "loss": 0.207, | |
| "step": 69700 | |
| }, | |
| { | |
| "epoch": 1.860142841914508, | |
| "grad_norm": 21279.48828125, | |
| "learning_rate": 1.841265767810964e-05, | |
| "loss": 0.1679, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 1.862807803006076, | |
| "grad_norm": 1724.1683349609375, | |
| "learning_rate": 1.839291706970409e-05, | |
| "loss": 0.1658, | |
| "step": 69900 | |
| }, | |
| { | |
| "epoch": 1.8654727640976443, | |
| "grad_norm": 25310.3359375, | |
| "learning_rate": 1.8373176461298537e-05, | |
| "loss": 0.2035, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 1.8654727640976443, | |
| "eval_dev_accuracy": 0.9732146222681252, | |
| "eval_dev_accuracy_threshold": 0.9318354725837708, | |
| "eval_dev_average_precision": 0.8761383143347535, | |
| "eval_dev_f1": 0.8027572731220147, | |
| "eval_dev_f1_threshold": 0.7808271646499634, | |
| "eval_dev_precision": 0.7954178767344304, | |
| "eval_dev_recall": 0.8102333735071765, | |
| "eval_loss": 0.2241707593202591, | |
| "eval_runtime": 934.0769, | |
| "eval_samples_per_second": 142.009, | |
| "eval_steps_per_second": 2.219, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 1.8681377251892122, | |
| "grad_norm": 1192.052978515625, | |
| "learning_rate": 1.8353435852892987e-05, | |
| "loss": 0.1671, | |
| "step": 70100 | |
| }, | |
| { | |
| "epoch": 1.8708026862807803, | |
| "grad_norm": 3381.109375, | |
| "learning_rate": 1.8333695244487437e-05, | |
| "loss": 0.1777, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 1.8734676473723484, | |
| "grad_norm": 2287.74267578125, | |
| "learning_rate": 1.8313954636081884e-05, | |
| "loss": 0.1894, | |
| "step": 70300 | |
| }, | |
| { | |
| "epoch": 1.8761326084639163, | |
| "grad_norm": 5671.9111328125, | |
| "learning_rate": 1.8294214027676334e-05, | |
| "loss": 0.2227, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 1.8787975695554846, | |
| "grad_norm": 8669.9560546875, | |
| "learning_rate": 1.8274473419270784e-05, | |
| "loss": 0.1754, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 1.8787975695554846, | |
| "eval_dev_accuracy": 0.9732221610741291, | |
| "eval_dev_accuracy_threshold": 0.9238910675048828, | |
| "eval_dev_average_precision": 0.8787022531852614, | |
| "eval_dev_f1": 0.8059863355384449, | |
| "eval_dev_f1_threshold": 0.760931134223938, | |
| "eval_dev_precision": 0.7978529253891573, | |
| "eval_dev_recall": 0.8142872795003835, | |
| "eval_loss": 0.1879546046257019, | |
| "eval_runtime": 934.0481, | |
| "eval_samples_per_second": 142.013, | |
| "eval_steps_per_second": 2.219, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 1.8814625306470525, | |
| "grad_norm": 21830.791015625, | |
| "learning_rate": 1.825473281086523e-05, | |
| "loss": 0.1683, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 1.8841274917386206, | |
| "grad_norm": 5870.6396484375, | |
| "learning_rate": 1.823499220245968e-05, | |
| "loss": 0.1618, | |
| "step": 70700 | |
| }, | |
| { | |
| "epoch": 1.8867924528301887, | |
| "grad_norm": 9237.384765625, | |
| "learning_rate": 1.821525159405413e-05, | |
| "loss": 0.1806, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 1.8894574139217566, | |
| "grad_norm": 5946.40380859375, | |
| "learning_rate": 1.8195510985648577e-05, | |
| "loss": 0.1701, | |
| "step": 70900 | |
| }, | |
| { | |
| "epoch": 1.892122375013325, | |
| "grad_norm": 4265.1650390625, | |
| "learning_rate": 1.8175770377243027e-05, | |
| "loss": 0.1752, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 1.892122375013325, | |
| "eval_dev_accuracy": 0.9730336909240315, | |
| "eval_dev_accuracy_threshold": 0.9348808526992798, | |
| "eval_dev_average_precision": 0.8700561831987852, | |
| "eval_dev_f1": 0.8034291366708798, | |
| "eval_dev_f1_threshold": 0.9348808526992798, | |
| "eval_dev_precision": 0.8059536934950385, | |
| "eval_dev_recall": 0.8009203462254848, | |
| "eval_loss": 0.201664537191391, | |
| "eval_runtime": 931.3245, | |
| "eval_samples_per_second": 142.428, | |
| "eval_steps_per_second": 2.226, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 1.8947873361048928, | |
| "grad_norm": 2272.4169921875, | |
| "learning_rate": 1.8156029768837477e-05, | |
| "loss": 0.1688, | |
| "step": 71100 | |
| }, | |
| { | |
| "epoch": 1.897452297196461, | |
| "grad_norm": 11893.5654296875, | |
| "learning_rate": 1.8136289160431924e-05, | |
| "loss": 0.184, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 1.900117258288029, | |
| "grad_norm": 3861.369384765625, | |
| "learning_rate": 1.8116548552026374e-05, | |
| "loss": 0.1665, | |
| "step": 71300 | |
| }, | |
| { | |
| "epoch": 1.902782219379597, | |
| "grad_norm": 35609.0, | |
| "learning_rate": 1.8096807943620824e-05, | |
| "loss": 0.1749, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 1.9054471804711652, | |
| "grad_norm": 11618.3125, | |
| "learning_rate": 1.8077067335215274e-05, | |
| "loss": 0.1899, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 1.9054471804711652, | |
| "eval_dev_accuracy": 0.9734483252542462, | |
| "eval_dev_accuracy_threshold": 0.943538248538971, | |
| "eval_dev_average_precision": 0.8746432264035248, | |
| "eval_dev_f1": 0.8067354698533405, | |
| "eval_dev_f1_threshold": 0.9360702037811279, | |
| "eval_dev_precision": 0.7999569104815254, | |
| "eval_dev_recall": 0.8136298893393229, | |
| "eval_loss": 0.20475232601165771, | |
| "eval_runtime": 860.459, | |
| "eval_samples_per_second": 154.158, | |
| "eval_steps_per_second": 2.409, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 1.9081121415627331, | |
| "grad_norm": 8260.7607421875, | |
| "learning_rate": 1.805732672680972e-05, | |
| "loss": 0.1886, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 1.9107771026543012, | |
| "grad_norm": 47676.78125, | |
| "learning_rate": 1.803758611840417e-05, | |
| "loss": 0.1858, | |
| "step": 71700 | |
| }, | |
| { | |
| "epoch": 1.9134420637458693, | |
| "grad_norm": 554.1092529296875, | |
| "learning_rate": 1.801784550999862e-05, | |
| "loss": 0.165, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 1.9161070248374372, | |
| "grad_norm": 12699.4365234375, | |
| "learning_rate": 1.7998104901593067e-05, | |
| "loss": 0.1784, | |
| "step": 71900 | |
| }, | |
| { | |
| "epoch": 1.9187719859290056, | |
| "grad_norm": 4534.798828125, | |
| "learning_rate": 1.7978364293187517e-05, | |
| "loss": 0.1767, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 1.9187719859290056, | |
| "eval_dev_accuracy": 0.9739835804805235, | |
| "eval_dev_accuracy_threshold": 0.9395354986190796, | |
| "eval_dev_average_precision": 0.8772612012666982, | |
| "eval_dev_f1": 0.8093941820122765, | |
| "eval_dev_f1_threshold": 0.875823974609375, | |
| "eval_dev_precision": 0.7891340549542049, | |
| "eval_dev_recall": 0.8307220335268982, | |
| "eval_loss": 0.20605036616325378, | |
| "eval_runtime": 861.3232, | |
| "eval_samples_per_second": 154.004, | |
| "eval_steps_per_second": 2.407, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 1.9214369470205734, | |
| "grad_norm": 65605.9375, | |
| "learning_rate": 1.7958623684781968e-05, | |
| "loss": 0.1687, | |
| "step": 72100 | |
| }, | |
| { | |
| "epoch": 1.9241019081121415, | |
| "grad_norm": 11532.1455078125, | |
| "learning_rate": 1.7938883076376414e-05, | |
| "loss": 0.1664, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 1.9267668692037097, | |
| "grad_norm": 11916.1513671875, | |
| "learning_rate": 1.7919142467970864e-05, | |
| "loss": 0.1669, | |
| "step": 72300 | |
| }, | |
| { | |
| "epoch": 1.9294318302952775, | |
| "grad_norm": 2029.2286376953125, | |
| "learning_rate": 1.7899401859565314e-05, | |
| "loss": 0.1787, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 1.9320967913868459, | |
| "grad_norm": 6753.46142578125, | |
| "learning_rate": 1.787966125115976e-05, | |
| "loss": 0.1728, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 1.9320967913868459, | |
| "eval_dev_accuracy": 0.9743379043627071, | |
| "eval_dev_accuracy_threshold": 0.8970457315444946, | |
| "eval_dev_average_precision": 0.8806920275415929, | |
| "eval_dev_f1": 0.8153239556692241, | |
| "eval_dev_f1_threshold": 0.7824004888534546, | |
| "eval_dev_precision": 0.7935898765688206, | |
| "eval_dev_recall": 0.8382820203790949, | |
| "eval_loss": 0.19223952293395996, | |
| "eval_runtime": 862.5657, | |
| "eval_samples_per_second": 153.782, | |
| "eval_steps_per_second": 2.403, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 1.9347617524784138, | |
| "grad_norm": 27343.193359375, | |
| "learning_rate": 1.785992064275421e-05, | |
| "loss": 0.1443, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 1.9374267135699819, | |
| "grad_norm": 13309.6455078125, | |
| "learning_rate": 1.784018003434866e-05, | |
| "loss": 0.1569, | |
| "step": 72700 | |
| }, | |
| { | |
| "epoch": 1.94009167466155, | |
| "grad_norm": 1874.899169921875, | |
| "learning_rate": 1.7820439425943108e-05, | |
| "loss": 0.1931, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 1.9427566357531179, | |
| "grad_norm": 31156.685546875, | |
| "learning_rate": 1.7800698817537558e-05, | |
| "loss": 0.1811, | |
| "step": 72900 | |
| }, | |
| { | |
| "epoch": 1.9454215968446862, | |
| "grad_norm": 4346.09912109375, | |
| "learning_rate": 1.7780958209132008e-05, | |
| "loss": 0.1836, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 1.9454215968446862, | |
| "eval_dev_accuracy": 0.9730563073420432, | |
| "eval_dev_accuracy_threshold": 0.9250275492668152, | |
| "eval_dev_average_precision": 0.8743046594125137, | |
| "eval_dev_f1": 0.8057607880929436, | |
| "eval_dev_f1_threshold": 0.8426618576049805, | |
| "eval_dev_precision": 0.7878756151188357, | |
| "eval_dev_recall": 0.8244768269968226, | |
| "eval_loss": 0.207134410738945, | |
| "eval_runtime": 861.6487, | |
| "eval_samples_per_second": 153.946, | |
| "eval_steps_per_second": 2.406, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 1.948086557936254, | |
| "grad_norm": 5061.1884765625, | |
| "learning_rate": 1.7761217600726454e-05, | |
| "loss": 0.1739, | |
| "step": 73100 | |
| }, | |
| { | |
| "epoch": 1.9507515190278222, | |
| "grad_norm": 103200.015625, | |
| "learning_rate": 1.7741476992320904e-05, | |
| "loss": 0.1966, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 1.9534164801193903, | |
| "grad_norm": 18783.486328125, | |
| "learning_rate": 1.7721736383915354e-05, | |
| "loss": 0.1723, | |
| "step": 73300 | |
| }, | |
| { | |
| "epoch": 1.9560814412109582, | |
| "grad_norm": 13243.9150390625, | |
| "learning_rate": 1.7701995775509804e-05, | |
| "loss": 0.1698, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 1.9587464023025265, | |
| "grad_norm": 4332.658203125, | |
| "learning_rate": 1.768225516710425e-05, | |
| "loss": 0.1801, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 1.9587464023025265, | |
| "eval_dev_accuracy": 0.972988458088008, | |
| "eval_dev_accuracy_threshold": 0.9180799126625061, | |
| "eval_dev_average_precision": 0.8762342719828209, | |
| "eval_dev_f1": 0.8045175392942646, | |
| "eval_dev_f1_threshold": 0.7035636901855469, | |
| "eval_dev_precision": 0.7662337662337663, | |
| "eval_dev_recall": 0.8468280924728827, | |
| "eval_loss": 0.18561449646949768, | |
| "eval_runtime": 862.9273, | |
| "eval_samples_per_second": 153.717, | |
| "eval_steps_per_second": 2.402, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 1.9614113633940944, | |
| "grad_norm": 13960.3876953125, | |
| "learning_rate": 1.76625145586987e-05, | |
| "loss": 0.1599, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 1.9640763244856625, | |
| "grad_norm": 12248.2890625, | |
| "learning_rate": 1.764277395029315e-05, | |
| "loss": 0.1722, | |
| "step": 73700 | |
| }, | |
| { | |
| "epoch": 1.9667412855772306, | |
| "grad_norm": 20745.55859375, | |
| "learning_rate": 1.7623033341887598e-05, | |
| "loss": 0.1708, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 1.9694062466687985, | |
| "grad_norm": 13722.9697265625, | |
| "learning_rate": 1.7603292733482048e-05, | |
| "loss": 0.1662, | |
| "step": 73900 | |
| }, | |
| { | |
| "epoch": 1.9720712077603668, | |
| "grad_norm": 18372.69140625, | |
| "learning_rate": 1.7583552125076498e-05, | |
| "loss": 0.1716, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 1.9720712077603668, | |
| "eval_dev_accuracy": 0.9739232700324922, | |
| "eval_dev_accuracy_threshold": 0.8308413624763489, | |
| "eval_dev_average_precision": 0.8841492699463087, | |
| "eval_dev_f1": 0.8137931034482759, | |
| "eval_dev_f1_threshold": 0.6751728057861328, | |
| "eval_dev_precision": 0.7832387515200648, | |
| "eval_dev_recall": 0.8468280924728827, | |
| "eval_loss": 0.2016657292842865, | |
| "eval_runtime": 862.1524, | |
| "eval_samples_per_second": 153.856, | |
| "eval_steps_per_second": 2.404, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 1.9747361688519347, | |
| "grad_norm": 22373.701171875, | |
| "learning_rate": 1.7563811516670944e-05, | |
| "loss": 0.1741, | |
| "step": 74100 | |
| }, | |
| { | |
| "epoch": 1.9774011299435028, | |
| "grad_norm": 1855.767822265625, | |
| "learning_rate": 1.7544070908265394e-05, | |
| "loss": 0.1318, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 1.980066091035071, | |
| "grad_norm": 20893.662109375, | |
| "learning_rate": 1.7524330299859844e-05, | |
| "loss": 0.1782, | |
| "step": 74300 | |
| }, | |
| { | |
| "epoch": 1.9827310521266388, | |
| "grad_norm": 1626.1358642578125, | |
| "learning_rate": 1.750458969145429e-05, | |
| "loss": 0.1842, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 1.9853960132182071, | |
| "grad_norm": 8638.869140625, | |
| "learning_rate": 1.748484908304874e-05, | |
| "loss": 0.1545, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 1.9853960132182071, | |
| "eval_dev_accuracy": 0.9740815849585742, | |
| "eval_dev_accuracy_threshold": 0.7622551918029785, | |
| "eval_dev_average_precision": 0.8838940929517627, | |
| "eval_dev_f1": 0.8130659767141011, | |
| "eval_dev_f1_threshold": 0.6812475919723511, | |
| "eval_dev_precision": 0.800212201591512, | |
| "eval_dev_recall": 0.826339432453161, | |
| "eval_loss": 0.21240267157554626, | |
| "eval_runtime": 862.2203, | |
| "eval_samples_per_second": 153.844, | |
| "eval_steps_per_second": 2.404, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 1.988060974309775, | |
| "grad_norm": 12036.10546875, | |
| "learning_rate": 1.746510847464319e-05, | |
| "loss": 0.1786, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 1.9907259354013431, | |
| "grad_norm": 3197.989013671875, | |
| "learning_rate": 1.7445367866237638e-05, | |
| "loss": 0.1589, | |
| "step": 74700 | |
| }, | |
| { | |
| "epoch": 1.9933908964929112, | |
| "grad_norm": 2326.903564453125, | |
| "learning_rate": 1.7425627257832088e-05, | |
| "loss": 0.1712, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 1.9960558575844791, | |
| "grad_norm": 13623.826171875, | |
| "learning_rate": 1.7405886649426538e-05, | |
| "loss": 0.1761, | |
| "step": 74900 | |
| }, | |
| { | |
| "epoch": 1.9987208186760475, | |
| "grad_norm": 7701.57861328125, | |
| "learning_rate": 1.7386146041020984e-05, | |
| "loss": 0.1958, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 1.9987208186760475, | |
| "eval_dev_accuracy": 0.9734558640602501, | |
| "eval_dev_accuracy_threshold": 0.957332968711853, | |
| "eval_dev_average_precision": 0.8773248937578426, | |
| "eval_dev_f1": 0.8058651661075641, | |
| "eval_dev_f1_threshold": 0.763139009475708, | |
| "eval_dev_precision": 0.796044895777659, | |
| "eval_dev_recall": 0.8159307549030349, | |
| "eval_loss": 0.25920844078063965, | |
| "eval_runtime": 862.3734, | |
| "eval_samples_per_second": 153.816, | |
| "eval_steps_per_second": 2.404, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 2.0013857797676153, | |
| "grad_norm": 19200.3828125, | |
| "learning_rate": 1.7366405432615434e-05, | |
| "loss": 0.1859, | |
| "step": 75100 | |
| }, | |
| { | |
| "epoch": 2.0040507408591837, | |
| "grad_norm": 27715.55859375, | |
| "learning_rate": 1.7346664824209884e-05, | |
| "loss": 0.215, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 2.0067157019507516, | |
| "grad_norm": 14230.0625, | |
| "learning_rate": 1.7326924215804334e-05, | |
| "loss": 0.1883, | |
| "step": 75300 | |
| }, | |
| { | |
| "epoch": 2.0093806630423194, | |
| "grad_norm": 214.24032592773438, | |
| "learning_rate": 1.730718360739878e-05, | |
| "loss": 0.1771, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 2.0120456241338878, | |
| "grad_norm": 11949.2451171875, | |
| "learning_rate": 1.728744299899323e-05, | |
| "loss": 0.1568, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 2.0120456241338878, | |
| "eval_dev_accuracy": 0.9732749327161564, | |
| "eval_dev_accuracy_threshold": 0.9531142115592957, | |
| "eval_dev_average_precision": 0.8772400052614694, | |
| "eval_dev_f1": 0.8078490242333263, | |
| "eval_dev_f1_threshold": 0.9034242630004883, | |
| "eval_dev_precision": 0.7909711286089239, | |
| "eval_dev_recall": 0.8254629122384135, | |
| "eval_loss": 0.27995508909225464, | |
| "eval_runtime": 861.968, | |
| "eval_samples_per_second": 153.889, | |
| "eval_steps_per_second": 2.405, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 2.0147105852254557, | |
| "grad_norm": 1409.49951171875, | |
| "learning_rate": 1.726770239058768e-05, | |
| "loss": 0.1797, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 2.017375546317024, | |
| "grad_norm": 5395.6484375, | |
| "learning_rate": 1.7247961782182128e-05, | |
| "loss": 0.1659, | |
| "step": 75700 | |
| }, | |
| { | |
| "epoch": 2.020040507408592, | |
| "grad_norm": 49720.015625, | |
| "learning_rate": 1.7228221173776578e-05, | |
| "loss": 0.1519, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 2.0227054685001598, | |
| "grad_norm": 39423.91015625, | |
| "learning_rate": 1.7208480565371028e-05, | |
| "loss": 0.1366, | |
| "step": 75900 | |
| }, | |
| { | |
| "epoch": 2.025370429591728, | |
| "grad_norm": 1205.4697265625, | |
| "learning_rate": 1.7188739956965474e-05, | |
| "loss": 0.1641, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 2.025370429591728, | |
| "eval_dev_accuracy": 0.9739760416745196, | |
| "eval_dev_accuracy_threshold": 0.9528675079345703, | |
| "eval_dev_average_precision": 0.8829642344114682, | |
| "eval_dev_f1": 0.8102727032036007, | |
| "eval_dev_f1_threshold": 0.8193379640579224, | |
| "eval_dev_precision": 0.7840746054519369, | |
| "eval_dev_recall": 0.8382820203790949, | |
| "eval_loss": 0.22183284163475037, | |
| "eval_runtime": 861.7487, | |
| "eval_samples_per_second": 153.928, | |
| "eval_steps_per_second": 2.406, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 2.028035390683296, | |
| "grad_norm": 143011.90625, | |
| "learning_rate": 1.7168999348559924e-05, | |
| "loss": 0.1551, | |
| "step": 76100 | |
| }, | |
| { | |
| "epoch": 2.0307003517748643, | |
| "grad_norm": 3733.740234375, | |
| "learning_rate": 1.7149258740154374e-05, | |
| "loss": 0.1612, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 2.033365312866432, | |
| "grad_norm": 13346.1015625, | |
| "learning_rate": 1.712951813174882e-05, | |
| "loss": 0.1643, | |
| "step": 76300 | |
| }, | |
| { | |
| "epoch": 2.036030273958, | |
| "grad_norm": 10167.767578125, | |
| "learning_rate": 1.710977752334327e-05, | |
| "loss": 0.1692, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 2.0386952350495684, | |
| "grad_norm": 26428.076171875, | |
| "learning_rate": 1.709003691493772e-05, | |
| "loss": 0.1708, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 2.0386952350495684, | |
| "eval_dev_accuracy": 0.9733277043581837, | |
| "eval_dev_accuracy_threshold": 0.9573899507522583, | |
| "eval_dev_average_precision": 0.8690568245333676, | |
| "eval_dev_f1": 0.8137024870952604, | |
| "eval_dev_f1_threshold": 0.8371973037719727, | |
| "eval_dev_precision": 0.7762634301631516, | |
| "eval_dev_recall": 0.8549359044592966, | |
| "eval_loss": 0.21817246079444885, | |
| "eval_runtime": 861.7458, | |
| "eval_samples_per_second": 153.928, | |
| "eval_steps_per_second": 2.406, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 2.0413601961411363, | |
| "grad_norm": 22541.1796875, | |
| "learning_rate": 1.7070296306532168e-05, | |
| "loss": 0.165, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 2.0440251572327046, | |
| "grad_norm": 49104.6015625, | |
| "learning_rate": 1.7050555698126618e-05, | |
| "loss": 0.1445, | |
| "step": 76700 | |
| }, | |
| { | |
| "epoch": 2.0466901183242725, | |
| "grad_norm": 47796.04296875, | |
| "learning_rate": 1.7030815089721068e-05, | |
| "loss": 0.1354, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 2.0493550794158404, | |
| "grad_norm": 21167.962890625, | |
| "learning_rate": 1.7011074481315514e-05, | |
| "loss": 0.1787, | |
| "step": 76900 | |
| }, | |
| { | |
| "epoch": 2.0520200405074087, | |
| "grad_norm": 75447.2890625, | |
| "learning_rate": 1.6991333872909964e-05, | |
| "loss": 0.1626, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 2.0520200405074087, | |
| "eval_dev_accuracy": 0.9745339133188086, | |
| "eval_dev_accuracy_threshold": 0.9593422412872314, | |
| "eval_dev_average_precision": 0.8806603026145806, | |
| "eval_dev_f1": 0.8148537765621713, | |
| "eval_dev_f1_threshold": 0.781623363494873, | |
| "eval_dev_precision": 0.7836115326251897, | |
| "eval_dev_recall": 0.848690697929221, | |
| "eval_loss": 0.2216637134552002, | |
| "eval_runtime": 862.3061, | |
| "eval_samples_per_second": 153.828, | |
| "eval_steps_per_second": 2.404, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 2.0546850015989766, | |
| "grad_norm": 4420.5458984375, | |
| "learning_rate": 1.6971593264504414e-05, | |
| "loss": 0.1418, | |
| "step": 77100 | |
| }, | |
| { | |
| "epoch": 2.057349962690545, | |
| "grad_norm": 14327.546875, | |
| "learning_rate": 1.695185265609886e-05, | |
| "loss": 0.2011, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 2.060014923782113, | |
| "grad_norm": 19713.06640625, | |
| "learning_rate": 1.693211204769331e-05, | |
| "loss": 0.1593, | |
| "step": 77300 | |
| }, | |
| { | |
| "epoch": 2.0626798848736807, | |
| "grad_norm": 5675.8125, | |
| "learning_rate": 1.691237143928776e-05, | |
| "loss": 0.1546, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 2.065344845965249, | |
| "grad_norm": 7002.0654296875, | |
| "learning_rate": 1.6892630830882208e-05, | |
| "loss": 0.177, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 2.065344845965249, | |
| "eval_dev_accuracy": 0.9752048670531561, | |
| "eval_dev_accuracy_threshold": 0.8865873217582703, | |
| "eval_dev_average_precision": 0.8890707955101652, | |
| "eval_dev_f1": 0.8212508115126596, | |
| "eval_dev_f1_threshold": 0.8439962863922119, | |
| "eval_dev_precision": 0.8111574222507214, | |
| "eval_dev_recall": 0.8315985537416457, | |
| "eval_loss": 0.21185144782066345, | |
| "eval_runtime": 860.1662, | |
| "eval_samples_per_second": 154.211, | |
| "eval_steps_per_second": 2.41, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 2.068009807056817, | |
| "grad_norm": 418.3937683105469, | |
| "learning_rate": 1.6872890222476658e-05, | |
| "loss": 0.1546, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 2.0706747681483852, | |
| "grad_norm": 47829.74609375, | |
| "learning_rate": 1.6853149614071108e-05, | |
| "loss": 0.1766, | |
| "step": 77700 | |
| }, | |
| { | |
| "epoch": 2.073339729239953, | |
| "grad_norm": 395.5926208496094, | |
| "learning_rate": 1.6833409005665554e-05, | |
| "loss": 0.1879, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 2.076004690331521, | |
| "grad_norm": 13378.1806640625, | |
| "learning_rate": 1.6813668397260004e-05, | |
| "loss": 0.1694, | |
| "step": 77900 | |
| }, | |
| { | |
| "epoch": 2.0786696514230893, | |
| "grad_norm": 4878.7451171875, | |
| "learning_rate": 1.6793927788854454e-05, | |
| "loss": 0.1546, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 2.0786696514230893, | |
| "eval_dev_accuracy": 0.9736971058523751, | |
| "eval_dev_accuracy_threshold": 0.9617332220077515, | |
| "eval_dev_average_precision": 0.8737670860803924, | |
| "eval_dev_f1": 0.8101625374783019, | |
| "eval_dev_f1_threshold": 0.8637624979019165, | |
| "eval_dev_precision": 0.7791380008093889, | |
| "eval_dev_recall": 0.8437602717212666, | |
| "eval_loss": 0.24948453903198242, | |
| "eval_runtime": 861.1759, | |
| "eval_samples_per_second": 154.03, | |
| "eval_steps_per_second": 2.407, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 2.0813346125146572, | |
| "grad_norm": 26331.390625, | |
| "learning_rate": 1.67741871804489e-05, | |
| "loss": 0.1742, | |
| "step": 78100 | |
| }, | |
| { | |
| "epoch": 2.0839995736062256, | |
| "grad_norm": 5203.9365234375, | |
| "learning_rate": 1.675444657204335e-05, | |
| "loss": 0.2024, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 2.0866645346977934, | |
| "grad_norm": 27641.3671875, | |
| "learning_rate": 1.67347059636378e-05, | |
| "loss": 0.2126, | |
| "step": 78300 | |
| }, | |
| { | |
| "epoch": 2.0893294957893613, | |
| "grad_norm": 3783.3671875, | |
| "learning_rate": 1.6714965355232248e-05, | |
| "loss": 0.1747, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 2.0919944568809297, | |
| "grad_norm": 20038.98046875, | |
| "learning_rate": 1.6695224746826698e-05, | |
| "loss": 0.1807, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 2.0919944568809297, | |
| "eval_dev_accuracy": 0.9743303655567032, | |
| "eval_dev_accuracy_threshold": 0.9270470142364502, | |
| "eval_dev_average_precision": 0.8818386397835865, | |
| "eval_dev_f1": 0.816217350257002, | |
| "eval_dev_f1_threshold": 0.7469815015792847, | |
| "eval_dev_precision": 0.7828755407988731, | |
| "eval_dev_recall": 0.8525254738687411, | |
| "eval_loss": 0.21055419743061066, | |
| "eval_runtime": 861.287, | |
| "eval_samples_per_second": 154.01, | |
| "eval_steps_per_second": 2.407, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 2.0946594179724976, | |
| "grad_norm": 18032.57421875, | |
| "learning_rate": 1.6675484138421148e-05, | |
| "loss": 0.1805, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 2.097324379064066, | |
| "grad_norm": 13172.416015625, | |
| "learning_rate": 1.6655743530015594e-05, | |
| "loss": 0.1498, | |
| "step": 78700 | |
| }, | |
| { | |
| "epoch": 2.0999893401556338, | |
| "grad_norm": 10491.02734375, | |
| "learning_rate": 1.6636002921610045e-05, | |
| "loss": 0.1899, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 2.1026543012472017, | |
| "grad_norm": 3893.85107421875, | |
| "learning_rate": 1.6616262313204495e-05, | |
| "loss": 0.1924, | |
| "step": 78900 | |
| }, | |
| { | |
| "epoch": 2.10531926233877, | |
| "grad_norm": 1639.23486328125, | |
| "learning_rate": 1.659652170479894e-05, | |
| "loss": 0.1521, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 2.10531926233877, | |
| "eval_dev_accuracy": 0.9743982148107383, | |
| "eval_dev_accuracy_threshold": 0.9525002837181091, | |
| "eval_dev_average_precision": 0.883524287942099, | |
| "eval_dev_f1": 0.8129610403803071, | |
| "eval_dev_f1_threshold": 0.9087203145027161, | |
| "eval_dev_precision": 0.8108785698713756, | |
| "eval_dev_recall": 0.8150542346882875, | |
| "eval_loss": 0.24836769700050354, | |
| "eval_runtime": 952.3381, | |
| "eval_samples_per_second": 139.286, | |
| "eval_steps_per_second": 2.177, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 2.107984223430338, | |
| "grad_norm": 7783.5283203125, | |
| "learning_rate": 1.657678109639339e-05, | |
| "loss": 0.1988, | |
| "step": 79100 | |
| }, | |
| { | |
| "epoch": 2.1106491845219058, | |
| "grad_norm": 1583.300537109375, | |
| "learning_rate": 1.655704048798784e-05, | |
| "loss": 0.1702, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 2.113314145613474, | |
| "grad_norm": 1492.0706787109375, | |
| "learning_rate": 1.6537299879582288e-05, | |
| "loss": 0.1824, | |
| "step": 79300 | |
| }, | |
| { | |
| "epoch": 2.115979106705042, | |
| "grad_norm": 18683.794921875, | |
| "learning_rate": 1.651755927117674e-05, | |
| "loss": 0.1688, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 2.1186440677966103, | |
| "grad_norm": 8736.2275390625, | |
| "learning_rate": 1.6497818662771188e-05, | |
| "loss": 0.1809, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 2.1186440677966103, | |
| "eval_dev_accuracy": 0.9739685028685157, | |
| "eval_dev_accuracy_threshold": 0.9717953205108643, | |
| "eval_dev_average_precision": 0.8798479877006415, | |
| "eval_dev_f1": 0.8135902528044657, | |
| "eval_dev_f1_threshold": 0.9465633630752563, | |
| "eval_dev_precision": 0.7974537037037037, | |
| "eval_dev_recall": 0.8303933384463679, | |
| "eval_loss": 0.22024385631084442, | |
| "eval_runtime": 951.2023, | |
| "eval_samples_per_second": 139.452, | |
| "eval_steps_per_second": 2.179, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 2.121309028888178, | |
| "grad_norm": 54950.51953125, | |
| "learning_rate": 1.6478078054365635e-05, | |
| "loss": 0.1858, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 2.1239739899797465, | |
| "grad_norm": 19716.146484375, | |
| "learning_rate": 1.6458337445960088e-05, | |
| "loss": 0.1642, | |
| "step": 79700 | |
| }, | |
| { | |
| "epoch": 2.1266389510713144, | |
| "grad_norm": 18239.75, | |
| "learning_rate": 1.6438596837554535e-05, | |
| "loss": 0.191, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 2.1293039121628823, | |
| "grad_norm": 41301.21875, | |
| "learning_rate": 1.641885622914898e-05, | |
| "loss": 0.1655, | |
| "step": 79900 | |
| }, | |
| { | |
| "epoch": 2.1319688732544506, | |
| "grad_norm": 1119.526123046875, | |
| "learning_rate": 1.6399115620743435e-05, | |
| "loss": 0.1789, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 2.1319688732544506, | |
| "eval_dev_accuracy": 0.9743152879446954, | |
| "eval_dev_accuracy_threshold": 0.8854852914810181, | |
| "eval_dev_average_precision": 0.8771901487923467, | |
| "eval_dev_f1": 0.813726025900224, | |
| "eval_dev_f1_threshold": 0.8826526403427124, | |
| "eval_dev_precision": 0.8116415958142577, | |
| "eval_dev_recall": 0.8158211898761916, | |
| "eval_loss": 0.1959654837846756, | |
| "eval_runtime": 952.4132, | |
| "eval_samples_per_second": 139.275, | |
| "eval_steps_per_second": 2.177, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 2.1346338343460185, | |
| "grad_norm": 3469.789794921875, | |
| "learning_rate": 1.637937501233788e-05, | |
| "loss": 0.2002, | |
| "step": 80100 | |
| }, | |
| { | |
| "epoch": 2.1372987954375864, | |
| "grad_norm": 15840.623046875, | |
| "learning_rate": 1.635963440393233e-05, | |
| "loss": 0.2139, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 2.1399637565291547, | |
| "grad_norm": 24576.1328125, | |
| "learning_rate": 1.633989379552678e-05, | |
| "loss": 0.199, | |
| "step": 80300 | |
| }, | |
| { | |
| "epoch": 2.1426287176207226, | |
| "grad_norm": 9852.4111328125, | |
| "learning_rate": 1.6320153187121228e-05, | |
| "loss": 0.165, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 2.145293678712291, | |
| "grad_norm": 280.64031982421875, | |
| "learning_rate": 1.6300412578715678e-05, | |
| "loss": 0.1848, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 2.145293678712291, | |
| "eval_dev_accuracy": 0.9742022058546368, | |
| "eval_dev_accuracy_threshold": 0.9753606915473938, | |
| "eval_dev_average_precision": 0.8782336024461705, | |
| "eval_dev_f1": 0.8096592433592701, | |
| "eval_dev_f1_threshold": 0.8103638887405396, | |
| "eval_dev_precision": 0.7934371055952881, | |
| "eval_dev_recall": 0.8265585625068478, | |
| "eval_loss": 0.26615819334983826, | |
| "eval_runtime": 951.0255, | |
| "eval_samples_per_second": 139.478, | |
| "eval_steps_per_second": 2.18, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 2.147958639803859, | |
| "grad_norm": 3749.137939453125, | |
| "learning_rate": 1.6280671970310128e-05, | |
| "loss": 0.2118, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 2.150623600895427, | |
| "grad_norm": 16408.94140625, | |
| "learning_rate": 1.6260931361904575e-05, | |
| "loss": 0.177, | |
| "step": 80700 | |
| }, | |
| { | |
| "epoch": 2.153288561986995, | |
| "grad_norm": 122466.71875, | |
| "learning_rate": 1.6241190753499025e-05, | |
| "loss": 0.169, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 2.155953523078563, | |
| "grad_norm": 35088.30078125, | |
| "learning_rate": 1.6221450145093475e-05, | |
| "loss": 0.1748, | |
| "step": 80900 | |
| }, | |
| { | |
| "epoch": 2.1586184841701312, | |
| "grad_norm": 2193.1103515625, | |
| "learning_rate": 1.620170953668792e-05, | |
| "loss": 0.1532, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 2.1586184841701312, | |
| "eval_dev_accuracy": 0.9747299222749101, | |
| "eval_dev_accuracy_threshold": 0.7087757587432861, | |
| "eval_dev_average_precision": 0.8839240203558189, | |
| "eval_dev_f1": 0.8178559791463017, | |
| "eval_dev_f1_threshold": 0.6686054468154907, | |
| "eval_dev_precision": 0.8108108108108109, | |
| "eval_dev_recall": 0.8250246521310398, | |
| "eval_loss": 0.2607557475566864, | |
| "eval_runtime": 952.9522, | |
| "eval_samples_per_second": 139.196, | |
| "eval_steps_per_second": 2.175, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 2.161283445261699, | |
| "grad_norm": 2420.868896484375, | |
| "learning_rate": 1.618196892828237e-05, | |
| "loss": 0.1618, | |
| "step": 81100 | |
| }, | |
| { | |
| "epoch": 2.163948406353267, | |
| "grad_norm": 706.0858764648438, | |
| "learning_rate": 1.616222831987682e-05, | |
| "loss": 0.1679, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 2.1666133674448353, | |
| "grad_norm": 23174.521484375, | |
| "learning_rate": 1.6142487711471268e-05, | |
| "loss": 0.1808, | |
| "step": 81300 | |
| }, | |
| { | |
| "epoch": 2.1692783285364032, | |
| "grad_norm": 15347.12890625, | |
| "learning_rate": 1.6122747103065718e-05, | |
| "loss": 0.1685, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 2.1719432896279716, | |
| "grad_norm": 19526.70703125, | |
| "learning_rate": 1.6103006494660168e-05, | |
| "loss": 0.1901, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 2.1719432896279716, | |
| "eval_dev_accuracy": 0.9744434476467617, | |
| "eval_dev_accuracy_threshold": 0.9750630855560303, | |
| "eval_dev_average_precision": 0.8830413621285588, | |
| "eval_dev_f1": 0.8129956790461085, | |
| "eval_dev_f1_threshold": 0.9695107936859131, | |
| "eval_dev_precision": 0.8117081695063346, | |
| "eval_dev_recall": 0.8142872795003835, | |
| "eval_loss": 0.23483458161354065, | |
| "eval_runtime": 950.8404, | |
| "eval_samples_per_second": 139.505, | |
| "eval_steps_per_second": 2.18, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 2.1746082507195394, | |
| "grad_norm": 1307.5916748046875, | |
| "learning_rate": 1.6083265886254615e-05, | |
| "loss": 0.184, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 2.177273211811108, | |
| "grad_norm": 40642.421875, | |
| "learning_rate": 1.6063525277849065e-05, | |
| "loss": 0.1667, | |
| "step": 81700 | |
| }, | |
| { | |
| "epoch": 2.1799381729026757, | |
| "grad_norm": 1084.0020751953125, | |
| "learning_rate": 1.6043784669443515e-05, | |
| "loss": 0.1816, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 2.1826031339942435, | |
| "grad_norm": 14024.021484375, | |
| "learning_rate": 1.602404406103796e-05, | |
| "loss": 0.159, | |
| "step": 81900 | |
| }, | |
| { | |
| "epoch": 2.185268095085812, | |
| "grad_norm": 8854.5498046875, | |
| "learning_rate": 1.600430345263241e-05, | |
| "loss": 0.1553, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 2.185268095085812, | |
| "eval_dev_accuracy": 0.9748957760069961, | |
| "eval_dev_accuracy_threshold": 0.8661369681358337, | |
| "eval_dev_average_precision": 0.8869519261803035, | |
| "eval_dev_f1": 0.8157429896224332, | |
| "eval_dev_f1_threshold": 0.8445290327072144, | |
| "eval_dev_precision": 0.8220046723773501, | |
| "eval_dev_recall": 0.8095759833461159, | |
| "eval_loss": 0.23748071491718292, | |
| "eval_runtime": 951.5083, | |
| "eval_samples_per_second": 139.407, | |
| "eval_steps_per_second": 2.179, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 2.1879330561773798, | |
| "grad_norm": 44325.265625, | |
| "learning_rate": 1.598456284422686e-05, | |
| "loss": 0.1572, | |
| "step": 82100 | |
| }, | |
| { | |
| "epoch": 2.1905980172689477, | |
| "grad_norm": 1203.1580810546875, | |
| "learning_rate": 1.5964822235821308e-05, | |
| "loss": 0.1629, | |
| "step": 82200 | |
| }, | |
| { | |
| "epoch": 2.193262978360516, | |
| "grad_norm": 745.87353515625, | |
| "learning_rate": 1.5945081627415758e-05, | |
| "loss": 0.194, | |
| "step": 82300 | |
| }, | |
| { | |
| "epoch": 2.195927939452084, | |
| "grad_norm": 17854.037109375, | |
| "learning_rate": 1.5925341019010208e-05, | |
| "loss": 0.1685, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 2.198592900543652, | |
| "grad_norm": 44721.08203125, | |
| "learning_rate": 1.5905600410604655e-05, | |
| "loss": 0.1859, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 2.198592900543652, | |
| "eval_dev_accuracy": 0.974345443168711, | |
| "eval_dev_accuracy_threshold": 0.9739015102386475, | |
| "eval_dev_average_precision": 0.8797686946603407, | |
| "eval_dev_f1": 0.8160733549083065, | |
| "eval_dev_f1_threshold": 0.9577875137329102, | |
| "eval_dev_precision": 0.8036757675555083, | |
| "eval_dev_recall": 0.8288594280705599, | |
| "eval_loss": 0.2292918860912323, | |
| "eval_runtime": 950.1815, | |
| "eval_samples_per_second": 139.602, | |
| "eval_steps_per_second": 2.182, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 2.20125786163522, | |
| "grad_norm": 170.60641479492188, | |
| "learning_rate": 1.5885859802199105e-05, | |
| "loss": 0.1483, | |
| "step": 82600 | |
| }, | |
| { | |
| "epoch": 2.2039228227267884, | |
| "grad_norm": 27626.072265625, | |
| "learning_rate": 1.5866119193793555e-05, | |
| "loss": 0.2056, | |
| "step": 82700 | |
| }, | |
| { | |
| "epoch": 2.2065877838183563, | |
| "grad_norm": 731.1361083984375, | |
| "learning_rate": 1.5846378585388e-05, | |
| "loss": 0.1799, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 2.209252744909924, | |
| "grad_norm": 36164.07421875, | |
| "learning_rate": 1.582663797698245e-05, | |
| "loss": 0.1645, | |
| "step": 82900 | |
| }, | |
| { | |
| "epoch": 2.2119177060014925, | |
| "grad_norm": 6034.74853515625, | |
| "learning_rate": 1.58068973685769e-05, | |
| "loss": 0.1633, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 2.2119177060014925, | |
| "eval_dev_accuracy": 0.9737423386883985, | |
| "eval_dev_accuracy_threshold": 0.950665295124054, | |
| "eval_dev_average_precision": 0.883874392367785, | |
| "eval_dev_f1": 0.8089262330859885, | |
| "eval_dev_f1_threshold": 0.9107600450515747, | |
| "eval_dev_precision": 0.8056732963808282, | |
| "eval_dev_recall": 0.8122055439903583, | |
| "eval_loss": 0.23654605448246002, | |
| "eval_runtime": 951.9974, | |
| "eval_samples_per_second": 139.335, | |
| "eval_steps_per_second": 2.178, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 2.2145826670930604, | |
| "grad_norm": 10695.716796875, | |
| "learning_rate": 1.5787156760171348e-05, | |
| "loss": 0.1714, | |
| "step": 83100 | |
| }, | |
| { | |
| "epoch": 2.2172476281846283, | |
| "grad_norm": 63246.39453125, | |
| "learning_rate": 1.5767416151765798e-05, | |
| "loss": 0.1793, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 2.2199125892761966, | |
| "grad_norm": 1381.2412109375, | |
| "learning_rate": 1.5747675543360248e-05, | |
| "loss": 0.154, | |
| "step": 83300 | |
| }, | |
| { | |
| "epoch": 2.2225775503677645, | |
| "grad_norm": 31067.8828125, | |
| "learning_rate": 1.5727934934954695e-05, | |
| "loss": 0.151, | |
| "step": 83400 | |
| }, | |
| { | |
| "epoch": 2.225242511459333, | |
| "grad_norm": 33396.78125, | |
| "learning_rate": 1.5708194326549148e-05, | |
| "loss": 0.1841, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 2.225242511459333, | |
| "eval_dev_accuracy": 0.9747902327229413, | |
| "eval_dev_accuracy_threshold": 0.9669053554534912, | |
| "eval_dev_average_precision": 0.8854411022874333, | |
| "eval_dev_f1": 0.8149101635827299, | |
| "eval_dev_f1_threshold": 0.9264481067657471, | |
| "eval_dev_precision": 0.7981718848497583, | |
| "eval_dev_recall": 0.8323655089295496, | |
| "eval_loss": 0.20306049287319183, | |
| "eval_runtime": 951.0541, | |
| "eval_samples_per_second": 139.474, | |
| "eval_steps_per_second": 2.18, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 2.2279074725509007, | |
| "grad_norm": 1647.4901123046875, | |
| "learning_rate": 1.5688453718143595e-05, | |
| "loss": 0.1709, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 2.230572433642469, | |
| "grad_norm": 310.0802307128906, | |
| "learning_rate": 1.566871310973804e-05, | |
| "loss": 0.1875, | |
| "step": 83700 | |
| }, | |
| { | |
| "epoch": 2.233237394734037, | |
| "grad_norm": 14275.015625, | |
| "learning_rate": 1.5648972501332495e-05, | |
| "loss": 0.2041, | |
| "step": 83800 | |
| }, | |
| { | |
| "epoch": 2.235902355825605, | |
| "grad_norm": 28323.603515625, | |
| "learning_rate": 1.562923189292694e-05, | |
| "loss": 0.1812, | |
| "step": 83900 | |
| }, | |
| { | |
| "epoch": 2.238567316917173, | |
| "grad_norm": 25161.5546875, | |
| "learning_rate": 1.5609491284521388e-05, | |
| "loss": 0.1779, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 2.238567316917173, | |
| "eval_dev_accuracy": 0.975137017799121, | |
| "eval_dev_accuracy_threshold": 0.9797601699829102, | |
| "eval_dev_average_precision": 0.8889116324411686, | |
| "eval_dev_f1": 0.8158041179744018, | |
| "eval_dev_f1_threshold": 0.9772592782974243, | |
| "eval_dev_precision": 0.8289042180255569, | |
| "eval_dev_recall": 0.8031116467623535, | |
| "eval_loss": 0.2351406365633011, | |
| "eval_runtime": 953.0513, | |
| "eval_samples_per_second": 139.181, | |
| "eval_steps_per_second": 2.175, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 2.241232278008741, | |
| "grad_norm": 1789.634033203125, | |
| "learning_rate": 1.558975067611584e-05, | |
| "loss": 0.224, | |
| "step": 84100 | |
| }, | |
| { | |
| "epoch": 2.243897239100309, | |
| "grad_norm": 5931.00048828125, | |
| "learning_rate": 1.5570010067710288e-05, | |
| "loss": 0.1624, | |
| "step": 84200 | |
| }, | |
| { | |
| "epoch": 2.2465622001918772, | |
| "grad_norm": 18578.33203125, | |
| "learning_rate": 1.5550269459304738e-05, | |
| "loss": 0.1361, | |
| "step": 84300 | |
| }, | |
| { | |
| "epoch": 2.249227161283445, | |
| "grad_norm": 1247.7115478515625, | |
| "learning_rate": 1.5530528850899188e-05, | |
| "loss": 0.1371, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 2.2518921223750135, | |
| "grad_norm": 713.0791625976562, | |
| "learning_rate": 1.5510788242493635e-05, | |
| "loss": 0.2314, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 2.2518921223750135, | |
| "eval_dev_accuracy": 0.974737461080914, | |
| "eval_dev_accuracy_threshold": 0.9574118256568909, | |
| "eval_dev_average_precision": 0.8861569751582977, | |
| "eval_dev_f1": 0.8148996509598603, | |
| "eval_dev_f1_threshold": 0.832693338394165, | |
| "eval_dev_precision": 0.8112715821478987, | |
| "eval_dev_recall": 0.8185603155472773, | |
| "eval_loss": 0.2932807505130768, | |
| "eval_runtime": 951.2089, | |
| "eval_samples_per_second": 139.451, | |
| "eval_steps_per_second": 2.179, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 2.2545570834665813, | |
| "grad_norm": 2987.9375, | |
| "learning_rate": 1.5491047634088085e-05, | |
| "loss": 0.1889, | |
| "step": 84600 | |
| }, | |
| { | |
| "epoch": 2.2572220445581497, | |
| "grad_norm": 2279.125, | |
| "learning_rate": 1.5471307025682535e-05, | |
| "loss": 0.2079, | |
| "step": 84700 | |
| }, | |
| { | |
| "epoch": 2.2598870056497176, | |
| "grad_norm": 1106.462890625, | |
| "learning_rate": 1.545156641727698e-05, | |
| "loss": 0.1783, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 2.2625519667412854, | |
| "grad_norm": 7212.99560546875, | |
| "learning_rate": 1.543182580887143e-05, | |
| "loss": 0.1551, | |
| "step": 84900 | |
| }, | |
| { | |
| "epoch": 2.2652169278328538, | |
| "grad_norm": 22761.849609375, | |
| "learning_rate": 1.541208520046588e-05, | |
| "loss": 0.1606, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 2.2652169278328538, | |
| "eval_dev_accuracy": 0.9748203879469569, | |
| "eval_dev_accuracy_threshold": 0.9453166723251343, | |
| "eval_dev_average_precision": 0.8915161607864528, | |
| "eval_dev_f1": 0.8188866156993647, | |
| "eval_dev_f1_threshold": 0.8840415477752686, | |
| "eval_dev_precision": 0.8050174658621785, | |
| "eval_dev_recall": 0.8332420291442971, | |
| "eval_loss": 0.2096114605665207, | |
| "eval_runtime": 952.4626, | |
| "eval_samples_per_second": 139.267, | |
| "eval_steps_per_second": 2.176, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 2.2678818889244217, | |
| "grad_norm": 661.853271484375, | |
| "learning_rate": 1.5392344592060328e-05, | |
| "loss": 0.1887, | |
| "step": 85100 | |
| }, | |
| { | |
| "epoch": 2.2705468500159895, | |
| "grad_norm": 26199.923828125, | |
| "learning_rate": 1.5372603983654778e-05, | |
| "loss": 0.1829, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 2.273211811107558, | |
| "grad_norm": 11920.501953125, | |
| "learning_rate": 1.5352863375249228e-05, | |
| "loss": 0.18, | |
| "step": 85300 | |
| }, | |
| { | |
| "epoch": 2.2758767721991258, | |
| "grad_norm": 13859.2724609375, | |
| "learning_rate": 1.5333122766843675e-05, | |
| "loss": 0.1935, | |
| "step": 85400 | |
| }, | |
| { | |
| "epoch": 2.278541733290694, | |
| "grad_norm": 476.45367431640625, | |
| "learning_rate": 1.5313382158438125e-05, | |
| "loss": 0.1934, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 2.278541733290694, | |
| "eval_dev_accuracy": 0.9749636252610312, | |
| "eval_dev_accuracy_threshold": 0.9518921375274658, | |
| "eval_dev_average_precision": 0.8866670870419442, | |
| "eval_dev_f1": 0.8237035470740602, | |
| "eval_dev_f1_threshold": 0.6820048093795776, | |
| "eval_dev_precision": 0.7958120531154239, | |
| "eval_dev_recall": 0.8536211241371754, | |
| "eval_loss": 0.23012706637382507, | |
| "eval_runtime": 952.604, | |
| "eval_samples_per_second": 139.247, | |
| "eval_steps_per_second": 2.176, | |
| "step": 85500 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 112572, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |