| { |
| "best_global_step": 169584, |
| "best_metric": 0.9357306547788711, |
| "best_model_checkpoint": "/workspace/cannopy/result/train/ner/ner.mm-bert.b32.lr2e-05/checkpoint-169584", |
| "epoch": 30.0, |
| "eval_steps": 500, |
| "global_step": 211980, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.8071305155754089, |
| "learning_rate": 1.933342768185678e-05, |
| "loss": 0.0833, |
| "step": 7066 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.9816472726923888, |
| "eval_f1": 0.9143601801750758, |
| "eval_loss": 0.06737913936376572, |
| "eval_precision": 0.9067209043611152, |
| "eval_recall": 0.9221292740903273, |
| "eval_runtime": 63.6285, |
| "eval_samples_per_second": 444.094, |
| "eval_steps_per_second": 13.893, |
| "step": 7066 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.6214423179626465, |
| "learning_rate": 1.8666761015190115e-05, |
| "loss": 0.041, |
| "step": 14132 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.9832133401124307, |
| "eval_f1": 0.9247473576465666, |
| "eval_loss": 0.06235222890973091, |
| "eval_precision": 0.9153111601195607, |
| "eval_recall": 0.9343801426220516, |
| "eval_runtime": 26.8725, |
| "eval_samples_per_second": 1051.52, |
| "eval_steps_per_second": 32.896, |
| "step": 14132 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.07880270481109619, |
| "learning_rate": 1.8000094348523447e-05, |
| "loss": 0.0238, |
| "step": 21198 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.9830166823353351, |
| "eval_f1": 0.9234717685487634, |
| "eval_loss": 0.07555678486824036, |
| "eval_precision": 0.91971390356147, |
| "eval_recall": 0.9272604680928872, |
| "eval_runtime": 25.0859, |
| "eval_samples_per_second": 1126.41, |
| "eval_steps_per_second": 35.239, |
| "step": 21198 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.9151579141616821, |
| "learning_rate": 1.733342768185678e-05, |
| "loss": 0.0146, |
| "step": 28264 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.9836390322518754, |
| "eval_f1": 0.9253655132503873, |
| "eval_loss": 0.08953021466732025, |
| "eval_precision": 0.9188440087882372, |
| "eval_recall": 0.931980252331322, |
| "eval_runtime": 33.5139, |
| "eval_samples_per_second": 843.142, |
| "eval_steps_per_second": 26.377, |
| "step": 28264 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.006677389610558748, |
| "learning_rate": 1.6666761015190114e-05, |
| "loss": 0.0096, |
| "step": 35330 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.983770936858464, |
| "eval_f1": 0.9272781274631878, |
| "eval_loss": 0.08903905749320984, |
| "eval_precision": 0.9269075296320826, |
| "eval_recall": 0.9276490217590053, |
| "eval_runtime": 24.8287, |
| "eval_samples_per_second": 1138.077, |
| "eval_steps_per_second": 35.604, |
| "step": 35330 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 0.060630589723587036, |
| "learning_rate": 1.6000094348523446e-05, |
| "loss": 0.0069, |
| "step": 42396 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.9842493908405441, |
| "eval_f1": 0.9304571065067249, |
| "eval_loss": 0.10069137066602707, |
| "eval_precision": 0.9249096657633243, |
| "eval_recall": 0.9360714938745658, |
| "eval_runtime": 24.4078, |
| "eval_samples_per_second": 1157.705, |
| "eval_steps_per_second": 36.218, |
| "step": 42396 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 0.6270243525505066, |
| "learning_rate": 1.533342768185678e-05, |
| "loss": 0.0055, |
| "step": 49462 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.9841822394044627, |
| "eval_f1": 0.9297173805559187, |
| "eval_loss": 0.1080186516046524, |
| "eval_precision": 0.924709180731881, |
| "eval_recall": 0.9347801243371732, |
| "eval_runtime": 25.0447, |
| "eval_samples_per_second": 1128.262, |
| "eval_steps_per_second": 35.297, |
| "step": 49462 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.0023466802667826414, |
| "learning_rate": 1.4666761015190114e-05, |
| "loss": 0.0044, |
| "step": 56528 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.9840215556109821, |
| "eval_f1": 0.9293982272654773, |
| "eval_loss": 0.13059474527835846, |
| "eval_precision": 0.9265480895915679, |
| "eval_recall": 0.9322659535564088, |
| "eval_runtime": 24.3355, |
| "eval_samples_per_second": 1161.141, |
| "eval_steps_per_second": 36.325, |
| "step": 56528 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 0.28039243817329407, |
| "learning_rate": 1.4000094348523448e-05, |
| "loss": 0.0038, |
| "step": 63594 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.983985581627367, |
| "eval_f1": 0.9288880061288766, |
| "eval_loss": 0.11633551865816116, |
| "eval_precision": 0.9225687907925736, |
| "eval_recall": 0.9352943865423295, |
| "eval_runtime": 25.2015, |
| "eval_samples_per_second": 1121.242, |
| "eval_steps_per_second": 35.077, |
| "step": 63594 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.08014234900474548, |
| "learning_rate": 1.3333427681856781e-05, |
| "loss": 0.0035, |
| "step": 70660 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.9841786420061012, |
| "eval_f1": 0.9307753433987968, |
| "eval_loss": 0.12983541190624237, |
| "eval_precision": 0.9246244756191078, |
| "eval_recall": 0.9370085938928506, |
| "eval_runtime": 24.5312, |
| "eval_samples_per_second": 1151.881, |
| "eval_steps_per_second": 36.036, |
| "step": 70660 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 0.00041744596092030406, |
| "learning_rate": 1.2666761015190115e-05, |
| "loss": 0.0028, |
| "step": 77726 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.9843285336044972, |
| "eval_f1": 0.9306707629288274, |
| "eval_loss": 0.14403043687343597, |
| "eval_precision": 0.9266535245734484, |
| "eval_recall": 0.9347229840921558, |
| "eval_runtime": 25.1796, |
| "eval_samples_per_second": 1122.216, |
| "eval_steps_per_second": 35.108, |
| "step": 77726 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 0.00028503904468379915, |
| "learning_rate": 1.2000094348523445e-05, |
| "loss": 0.0025, |
| "step": 84792 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.9844340572897681, |
| "eval_f1": 0.9317236158307861, |
| "eval_loss": 0.14104171097278595, |
| "eval_precision": 0.9281574978169406, |
| "eval_recall": 0.9353172426403364, |
| "eval_runtime": 24.4922, |
| "eval_samples_per_second": 1153.715, |
| "eval_steps_per_second": 36.093, |
| "step": 84792 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 0.0035878911148756742, |
| "learning_rate": 1.1333427681856779e-05, |
| "loss": 0.0022, |
| "step": 91858 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.9844544425471499, |
| "eval_f1": 0.9315798786508901, |
| "eval_loss": 0.13236555457115173, |
| "eval_precision": 0.9305759866808069, |
| "eval_recall": 0.9325859389285062, |
| "eval_runtime": 25.03, |
| "eval_samples_per_second": 1128.927, |
| "eval_steps_per_second": 35.318, |
| "step": 91858 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 0.00016986434638965875, |
| "learning_rate": 1.0666761015190112e-05, |
| "loss": 0.0021, |
| "step": 98924 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.9844880182651906, |
| "eval_f1": 0.9330711130112583, |
| "eval_loss": 0.13451294600963593, |
| "eval_precision": 0.9308583842312985, |
| "eval_recall": 0.9352943865423295, |
| "eval_runtime": 24.4183, |
| "eval_samples_per_second": 1157.205, |
| "eval_steps_per_second": 36.202, |
| "step": 98924 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 0.0010187036823481321, |
| "learning_rate": 1.0000094348523446e-05, |
| "loss": 0.002, |
| "step": 105990 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.9843788971815582, |
| "eval_f1": 0.9319383071528353, |
| "eval_loss": 0.13619445264339447, |
| "eval_precision": 0.929939349802574, |
| "eval_recall": 0.9339458767599196, |
| "eval_runtime": 25.0941, |
| "eval_samples_per_second": 1126.042, |
| "eval_steps_per_second": 35.227, |
| "step": 105990 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 0.00028575636679306626, |
| "learning_rate": 9.333427681856779e-06, |
| "loss": 0.0017, |
| "step": 113056 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.9843669058536866, |
| "eval_f1": 0.9323486068748073, |
| "eval_loss": 0.15387865900993347, |
| "eval_precision": 0.9319972593353888, |
| "eval_recall": 0.9327002194185409, |
| "eval_runtime": 24.3506, |
| "eval_samples_per_second": 1160.422, |
| "eval_steps_per_second": 36.303, |
| "step": 113056 |
| }, |
| { |
| "epoch": 17.0, |
| "grad_norm": 0.00035141929402016103, |
| "learning_rate": 8.666761015190113e-06, |
| "loss": 0.0016, |
| "step": 120122 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.9846175246062048, |
| "eval_f1": 0.9333576463482042, |
| "eval_loss": 0.14764520525932312, |
| "eval_precision": 0.9308063874524066, |
| "eval_recall": 0.9359229292375205, |
| "eval_runtime": 25.0122, |
| "eval_samples_per_second": 1129.728, |
| "eval_steps_per_second": 35.343, |
| "step": 120122 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 0.00019602595421019942, |
| "learning_rate": 8.000094348523446e-06, |
| "loss": 0.0014, |
| "step": 127188 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.9846570959881813, |
| "eval_f1": 0.9343024019274798, |
| "eval_loss": 0.16399884223937988, |
| "eval_precision": 0.9335402238524992, |
| "eval_recall": 0.93506582556226, |
| "eval_runtime": 24.5918, |
| "eval_samples_per_second": 1149.04, |
| "eval_steps_per_second": 35.947, |
| "step": 127188 |
| }, |
| { |
| "epoch": 19.0, |
| "grad_norm": 0.00016485151718370616, |
| "learning_rate": 7.33342768185678e-06, |
| "loss": 0.0013, |
| "step": 134254 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.984291360488095, |
| "eval_f1": 0.9317829016575214, |
| "eval_loss": 0.16133514046669006, |
| "eval_precision": 0.9272642908070302, |
| "eval_recall": 0.9363457670506491, |
| "eval_runtime": 36.0736, |
| "eval_samples_per_second": 783.315, |
| "eval_steps_per_second": 24.505, |
| "step": 134254 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 3.631131039583124e-05, |
| "learning_rate": 6.666761015190113e-06, |
| "loss": 0.0011, |
| "step": 141320 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.9844688321405959, |
| "eval_f1": 0.933409899819197, |
| "eval_loss": 0.15595203638076782, |
| "eval_precision": 0.9287944963678744, |
| "eval_recall": 0.9380714024501737, |
| "eval_runtime": 24.4307, |
| "eval_samples_per_second": 1156.619, |
| "eval_steps_per_second": 36.184, |
| "step": 141320 |
| }, |
| { |
| "epoch": 21.0, |
| "grad_norm": 0.00046227360144257545, |
| "learning_rate": 6.000094348523447e-06, |
| "loss": 0.001, |
| "step": 148386 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_accuracy": 0.9848237754455977, |
| "eval_f1": 0.9352061312288099, |
| "eval_loss": 0.15601831674575806, |
| "eval_precision": 0.9326181086702049, |
| "eval_recall": 0.9378085573230938, |
| "eval_runtime": 25.2528, |
| "eval_samples_per_second": 1118.963, |
| "eval_steps_per_second": 35.006, |
| "step": 148386 |
| }, |
| { |
| "epoch": 22.0, |
| "grad_norm": 1.7225727333425311e-06, |
| "learning_rate": 5.33342768185678e-06, |
| "loss": 0.0009, |
| "step": 155452 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_accuracy": 0.9847434335488575, |
| "eval_f1": 0.9346046246074793, |
| "eval_loss": 0.17021338641643524, |
| "eval_precision": 0.9338475229902106, |
| "eval_recall": 0.9353629548363503, |
| "eval_runtime": 27.4692, |
| "eval_samples_per_second": 1028.681, |
| "eval_steps_per_second": 32.182, |
| "step": 155452 |
| }, |
| { |
| "epoch": 23.0, |
| "grad_norm": 0.00017540222324896604, |
| "learning_rate": 4.666761015190113e-06, |
| "loss": 0.0009, |
| "step": 162518 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_accuracy": 0.9847638188062393, |
| "eval_f1": 0.9339199036024054, |
| "eval_loss": 0.16753220558166504, |
| "eval_precision": 0.9290092046045638, |
| "eval_recall": 0.9388827939294204, |
| "eval_runtime": 25.2471, |
| "eval_samples_per_second": 1119.219, |
| "eval_steps_per_second": 35.014, |
| "step": 162518 |
| }, |
| { |
| "epoch": 24.0, |
| "grad_norm": 1.526959204056766e-05, |
| "learning_rate": 4.000094348523446e-06, |
| "loss": 0.0008, |
| "step": 169584 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_accuracy": 0.9849400913259531, |
| "eval_f1": 0.9357306547788711, |
| "eval_loss": 0.17812186479568481, |
| "eval_precision": 0.9335486953158769, |
| "eval_recall": 0.9379228378131286, |
| "eval_runtime": 24.4734, |
| "eval_samples_per_second": 1154.601, |
| "eval_steps_per_second": 36.121, |
| "step": 169584 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 8.847219760355074e-06, |
| "learning_rate": 3.333427681856779e-06, |
| "loss": 0.0007, |
| "step": 176650 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_accuracy": 0.9848741390226589, |
| "eval_f1": 0.935050123741204, |
| "eval_loss": 0.17935001850128174, |
| "eval_precision": 0.9331451594545993, |
| "eval_recall": 0.9369628816968367, |
| "eval_runtime": 25.1905, |
| "eval_samples_per_second": 1121.735, |
| "eval_steps_per_second": 35.093, |
| "step": 176650 |
| }, |
| { |
| "epoch": 26.0, |
| "grad_norm": 0.00021123145415913314, |
| "learning_rate": 2.6667610151901125e-06, |
| "loss": 0.0007, |
| "step": 183716 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_accuracy": 0.9848621476947871, |
| "eval_f1": 0.9346344803971922, |
| "eval_loss": 0.17747123539447784, |
| "eval_precision": 0.9334518842760413, |
| "eval_recall": 0.9358200767964893, |
| "eval_runtime": 24.5312, |
| "eval_samples_per_second": 1151.881, |
| "eval_steps_per_second": 36.036, |
| "step": 183716 |
| }, |
| { |
| "epoch": 27.0, |
| "grad_norm": 2.859743472072296e-05, |
| "learning_rate": 2.000094348523446e-06, |
| "loss": 0.0007, |
| "step": 190782 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_accuracy": 0.9849700696456323, |
| "eval_f1": 0.9355334631546582, |
| "eval_loss": 0.1866326779127121, |
| "eval_precision": 0.9322530128685231, |
| "eval_recall": 0.9388370817334065, |
| "eval_runtime": 25.0727, |
| "eval_samples_per_second": 1127.002, |
| "eval_steps_per_second": 35.257, |
| "step": 190782 |
| }, |
| { |
| "epoch": 28.0, |
| "grad_norm": 0.0020877772476524115, |
| "learning_rate": 1.333427681856779e-06, |
| "loss": 0.0007, |
| "step": 197848 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_accuracy": 0.984804589321003, |
| "eval_f1": 0.9346563738215604, |
| "eval_loss": 0.1925719529390335, |
| "eval_precision": 0.9312210726926219, |
| "eval_recall": 0.9381171146461876, |
| "eval_runtime": 24.3867, |
| "eval_samples_per_second": 1158.706, |
| "eval_steps_per_second": 36.249, |
| "step": 197848 |
| }, |
| { |
| "epoch": 29.0, |
| "grad_norm": 1.088813405658584e-06, |
| "learning_rate": 6.667610151901123e-07, |
| "loss": 0.0006, |
| "step": 204914 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_accuracy": 0.9849496843882504, |
| "eval_f1": 0.9355303716618523, |
| "eval_loss": 0.198269322514534, |
| "eval_precision": 0.9328674507130277, |
| "eval_recall": 0.9382085390382154, |
| "eval_runtime": 25.149, |
| "eval_samples_per_second": 1123.581, |
| "eval_steps_per_second": 35.15, |
| "step": 204914 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 5.2584637160180137e-05, |
| "learning_rate": 9.434852344560809e-11, |
| "loss": 0.0006, |
| "step": 211980 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_accuracy": 0.9849424895915274, |
| "eval_f1": 0.9356125356125355, |
| "eval_loss": 0.20442676544189453, |
| "eval_precision": 0.9329969544070185, |
| "eval_recall": 0.9382428231852258, |
| "eval_runtime": 24.5229, |
| "eval_samples_per_second": 1152.27, |
| "eval_steps_per_second": 36.048, |
| "step": 211980 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 211980, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 30, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.3987517048244486e+17, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|