| { |
| "best_global_step": 4500, |
| "best_metric": 0.8526874352604716, |
| "best_model_checkpoint": "./camelbert-ner-author/checkpoint-4500", |
| "epoch": 4.0, |
| "eval_steps": 500, |
| "global_step": 8236, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.24283632831471588, |
| "grad_norm": 19.655189514160156, |
| "learning_rate": 1.8788246721709567e-05, |
| "loss": 1.3015, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.24283632831471588, |
| "eval_accuracy": 0.7120519605484724, |
| "eval_f1_macro": 0.6183057931510966, |
| "eval_f1_micro": 0.7120519605484724, |
| "eval_loss": 0.9594210982322693, |
| "eval_precision_macro": 0.6361214235541449, |
| "eval_precision_micro": 0.7120519605484724, |
| "eval_recall_macro": 0.6773081159897201, |
| "eval_recall_micro": 0.7120519605484724, |
| "eval_runtime": 78.8319, |
| "eval_samples_per_second": 52.732, |
| "eval_steps_per_second": 3.298, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.48567265662943176, |
| "grad_norm": 12.739048957824707, |
| "learning_rate": 1.757406508013599e-05, |
| "loss": 0.4807, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.48567265662943176, |
| "eval_accuracy": 0.7986528746692326, |
| "eval_f1_macro": 0.7134381815266957, |
| "eval_f1_micro": 0.7986528746692327, |
| "eval_loss": 0.685612678527832, |
| "eval_precision_macro": 0.7383782436733511, |
| "eval_precision_micro": 0.7986528746692326, |
| "eval_recall_macro": 0.7399947161579059, |
| "eval_recall_micro": 0.7986528746692326, |
| "eval_runtime": 78.5622, |
| "eval_samples_per_second": 52.913, |
| "eval_steps_per_second": 3.309, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.7285089849441476, |
| "grad_norm": 28.610925674438477, |
| "learning_rate": 1.635988343856241e-05, |
| "loss": 0.2896, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.7285089849441476, |
| "eval_accuracy": 0.8135674765455858, |
| "eval_f1_macro": 0.7371793410016984, |
| "eval_f1_micro": 0.8135674765455858, |
| "eval_loss": 0.6776726841926575, |
| "eval_precision_macro": 0.7424769213783606, |
| "eval_precision_micro": 0.8135674765455858, |
| "eval_recall_macro": 0.7767083489723529, |
| "eval_recall_micro": 0.8135674765455858, |
| "eval_runtime": 79.2486, |
| "eval_samples_per_second": 52.455, |
| "eval_steps_per_second": 3.281, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.9713453132588635, |
| "grad_norm": 3.9482295513153076, |
| "learning_rate": 1.514570179698883e-05, |
| "loss": 0.2291, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.9713453132588635, |
| "eval_accuracy": 0.8166947317777243, |
| "eval_f1_macro": 0.7624936105293934, |
| "eval_f1_micro": 0.8166947317777243, |
| "eval_loss": 0.6716341972351074, |
| "eval_precision_macro": 0.7604827616055949, |
| "eval_precision_micro": 0.8166947317777243, |
| "eval_recall_macro": 0.8180693724015187, |
| "eval_recall_micro": 0.8166947317777243, |
| "eval_runtime": 78.8286, |
| "eval_samples_per_second": 52.735, |
| "eval_steps_per_second": 3.298, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.2141816415735793, |
| "grad_norm": 0.23831093311309814, |
| "learning_rate": 1.393152015541525e-05, |
| "loss": 0.1293, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.2141816415735793, |
| "eval_accuracy": 0.8611979793120038, |
| "eval_f1_macro": 0.8086524377893386, |
| "eval_f1_micro": 0.8611979793120037, |
| "eval_loss": 0.5560595393180847, |
| "eval_precision_macro": 0.8065670541564225, |
| "eval_precision_micro": 0.8611979793120038, |
| "eval_recall_macro": 0.827913254542162, |
| "eval_recall_micro": 0.8611979793120038, |
| "eval_runtime": 79.2647, |
| "eval_samples_per_second": 52.445, |
| "eval_steps_per_second": 3.28, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.4570179698882952, |
| "grad_norm": 14.096162796020508, |
| "learning_rate": 1.2717338513841673e-05, |
| "loss": 0.0999, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.4570179698882952, |
| "eval_accuracy": 0.863122444070243, |
| "eval_f1_macro": 0.7992144552019207, |
| "eval_f1_micro": 0.863122444070243, |
| "eval_loss": 0.5559478998184204, |
| "eval_precision_macro": 0.8206342176388387, |
| "eval_precision_micro": 0.863122444070243, |
| "eval_recall_macro": 0.8136954515050157, |
| "eval_recall_micro": 0.863122444070243, |
| "eval_runtime": 78.8108, |
| "eval_samples_per_second": 52.747, |
| "eval_steps_per_second": 3.299, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.6998542982030111, |
| "grad_norm": 17.986045837402344, |
| "learning_rate": 1.1503156872268093e-05, |
| "loss": 0.0774, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.6998542982030111, |
| "eval_accuracy": 0.8874188116430118, |
| "eval_f1_macro": 0.8277841962841266, |
| "eval_f1_micro": 0.8874188116430118, |
| "eval_loss": 0.47720664739608765, |
| "eval_precision_macro": 0.8360578643228223, |
| "eval_precision_micro": 0.8874188116430118, |
| "eval_recall_macro": 0.8389572760941908, |
| "eval_recall_micro": 0.8874188116430118, |
| "eval_runtime": 79.1831, |
| "eval_samples_per_second": 52.499, |
| "eval_steps_per_second": 3.284, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.942690626517727, |
| "grad_norm": 0.42851337790489197, |
| "learning_rate": 1.0288975230694513e-05, |
| "loss": 0.0664, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.942690626517727, |
| "eval_accuracy": 0.8676930478710608, |
| "eval_f1_macro": 0.8128736306359143, |
| "eval_f1_micro": 0.8676930478710608, |
| "eval_loss": 0.6257432103157043, |
| "eval_precision_macro": 0.8270757594085362, |
| "eval_precision_micro": 0.8676930478710608, |
| "eval_recall_macro": 0.8277626111904449, |
| "eval_recall_micro": 0.8676930478710608, |
| "eval_runtime": 78.863, |
| "eval_samples_per_second": 52.712, |
| "eval_steps_per_second": 3.297, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.1855269548324427, |
| "grad_norm": 0.35300347208976746, |
| "learning_rate": 9.074793589120935e-06, |
| "loss": 0.0358, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.1855269548324427, |
| "eval_accuracy": 0.8982439259081068, |
| "eval_f1_macro": 0.8526874352604716, |
| "eval_f1_micro": 0.8982439259081068, |
| "eval_loss": 0.5168837308883667, |
| "eval_precision_macro": 0.8595883535398947, |
| "eval_precision_micro": 0.8982439259081068, |
| "eval_recall_macro": 0.8610977340509225, |
| "eval_recall_micro": 0.8982439259081068, |
| "eval_runtime": 78.7961, |
| "eval_samples_per_second": 52.756, |
| "eval_steps_per_second": 3.3, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.4283632831471587, |
| "grad_norm": 0.004080391488969326, |
| "learning_rate": 7.860611947547354e-06, |
| "loss": 0.0257, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.4283632831471587, |
| "eval_accuracy": 0.8811643011787347, |
| "eval_f1_macro": 0.8297981700465746, |
| "eval_f1_micro": 0.8811643011787347, |
| "eval_loss": 0.5981627106666565, |
| "eval_precision_macro": 0.8386903015490977, |
| "eval_precision_micro": 0.8811643011787347, |
| "eval_recall_macro": 0.8494999643786711, |
| "eval_recall_micro": 0.8811643011787347, |
| "eval_runtime": 79.1803, |
| "eval_samples_per_second": 52.5, |
| "eval_steps_per_second": 3.284, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.6711996114618746, |
| "grad_norm": 0.025048088282346725, |
| "learning_rate": 6.646430305973774e-06, |
| "loss": 0.0341, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.6711996114618746, |
| "eval_accuracy": 0.8842915564108732, |
| "eval_f1_macro": 0.8331171832679176, |
| "eval_f1_micro": 0.8842915564108732, |
| "eval_loss": 0.5772708654403687, |
| "eval_precision_macro": 0.8494868074895356, |
| "eval_precision_micro": 0.8842915564108732, |
| "eval_recall_macro": 0.8483270001882943, |
| "eval_recall_micro": 0.8842915564108732, |
| "eval_runtime": 78.6231, |
| "eval_samples_per_second": 52.873, |
| "eval_steps_per_second": 3.307, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.9140359397765905, |
| "grad_norm": 0.02463502809405327, |
| "learning_rate": 5.432248664400195e-06, |
| "loss": 0.0189, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.9140359397765905, |
| "eval_accuracy": 0.89126774115949, |
| "eval_f1_macro": 0.8419632069460932, |
| "eval_f1_micro": 0.89126774115949, |
| "eval_loss": 0.5399471521377563, |
| "eval_precision_macro": 0.8471174550638614, |
| "eval_precision_micro": 0.89126774115949, |
| "eval_recall_macro": 0.851015220437627, |
| "eval_recall_micro": 0.89126774115949, |
| "eval_runtime": 79.0105, |
| "eval_samples_per_second": 52.613, |
| "eval_steps_per_second": 3.291, |
| "step": 6000 |
| }, |
| { |
| "epoch": 3.1568722680913064, |
| "grad_norm": 0.0073256283067166805, |
| "learning_rate": 4.2180670228266156e-06, |
| "loss": 0.0116, |
| "step": 6500 |
| }, |
| { |
| "epoch": 3.1568722680913064, |
| "eval_accuracy": 0.8931922059177291, |
| "eval_f1_macro": 0.8414264169454924, |
| "eval_f1_micro": 0.8931922059177291, |
| "eval_loss": 0.5759193301200867, |
| "eval_precision_macro": 0.8427389427262956, |
| "eval_precision_micro": 0.8931922059177291, |
| "eval_recall_macro": 0.8551472022218021, |
| "eval_recall_micro": 0.8931922059177291, |
| "eval_runtime": 78.8344, |
| "eval_samples_per_second": 52.731, |
| "eval_steps_per_second": 3.298, |
| "step": 6500 |
| }, |
| { |
| "epoch": 3.3997085964060223, |
| "grad_norm": 0.0036666509695351124, |
| "learning_rate": 3.0038853812530354e-06, |
| "loss": 0.0059, |
| "step": 7000 |
| }, |
| { |
| "epoch": 3.3997085964060223, |
| "eval_accuracy": 0.8727447678614385, |
| "eval_f1_macro": 0.8203565475839012, |
| "eval_f1_micro": 0.8727447678614385, |
| "eval_loss": 0.672887921333313, |
| "eval_precision_macro": 0.8299847158193392, |
| "eval_precision_micro": 0.8727447678614385, |
| "eval_recall_macro": 0.8322106966291821, |
| "eval_recall_micro": 0.8727447678614385, |
| "eval_runtime": 78.7849, |
| "eval_samples_per_second": 52.764, |
| "eval_steps_per_second": 3.3, |
| "step": 7000 |
| }, |
| { |
| "epoch": 3.642544924720738, |
| "grad_norm": 0.004680951591581106, |
| "learning_rate": 1.789703739679456e-06, |
| "loss": 0.0042, |
| "step": 7500 |
| }, |
| { |
| "epoch": 3.642544924720738, |
| "eval_accuracy": 0.8878999278325715, |
| "eval_f1_macro": 0.8433308117001564, |
| "eval_f1_micro": 0.8878999278325715, |
| "eval_loss": 0.5990936756134033, |
| "eval_precision_macro": 0.8472220448304009, |
| "eval_precision_micro": 0.8878999278325715, |
| "eval_recall_macro": 0.8545322401470954, |
| "eval_recall_micro": 0.8878999278325715, |
| "eval_runtime": 79.0879, |
| "eval_samples_per_second": 52.562, |
| "eval_steps_per_second": 3.287, |
| "step": 7500 |
| }, |
| { |
| "epoch": 3.885381253035454, |
| "grad_norm": 0.010031532496213913, |
| "learning_rate": 5.755220981058767e-07, |
| "loss": 0.0031, |
| "step": 8000 |
| }, |
| { |
| "epoch": 3.885381253035454, |
| "eval_accuracy": 0.8917488573490499, |
| "eval_f1_macro": 0.8426490769860735, |
| "eval_f1_micro": 0.8917488573490499, |
| "eval_loss": 0.5693513751029968, |
| "eval_precision_macro": 0.8463441328885969, |
| "eval_precision_micro": 0.8917488573490499, |
| "eval_recall_macro": 0.8543482468908278, |
| "eval_recall_micro": 0.8917488573490499, |
| "eval_runtime": 78.783, |
| "eval_samples_per_second": 52.765, |
| "eval_steps_per_second": 3.3, |
| "step": 8000 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 8236, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|