{ "best_metric": 0.6313058518304987, "best_model_checkpoint": "logs/indian_ner/roberta-base/seed_1/checkpoint-1376", "epoch": 19.0, "eval_steps": 500, "global_step": 1634, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.7997455985661011, "eval_f1": 0.14692321878650028, "eval_loss": 0.35509759187698364, "eval_precision": 0.08916579614982838, "eval_recall": 0.41710296684118675, "eval_runtime": 3.1945, "eval_samples_per_second": 336.207, "eval_steps_per_second": 2.817, "step": 86 }, { "epoch": 2.0, "eval_accuracy": 0.8326539658678076, "eval_f1": 0.2069869669160176, "eval_loss": 0.23829466104507446, "eval_precision": 0.1328449811918432, "eval_recall": 0.46841186736474694, "eval_runtime": 3.449, "eval_samples_per_second": 311.39, "eval_steps_per_second": 2.609, "step": 172 }, { "epoch": 3.0, "eval_accuracy": 0.8921588467134997, "eval_f1": 0.29748962245503063, "eval_loss": 0.21593649685382843, "eval_precision": 0.20750034468495795, "eval_recall": 0.525305410122164, "eval_runtime": 2.5646, "eval_samples_per_second": 418.778, "eval_steps_per_second": 3.509, "step": 258 }, { "epoch": 4.0, "eval_accuracy": 0.902517996010523, "eval_f1": 0.325260250690461, "eval_loss": 0.2013314664363861, "eval_precision": 0.23377614903038632, "eval_recall": 0.5343804537521815, "eval_runtime": 2.5624, "eval_samples_per_second": 419.138, "eval_steps_per_second": 3.512, "step": 344 }, { "epoch": 5.0, "eval_accuracy": 0.9131180557563142, "eval_f1": 0.3645869640989892, "eval_loss": 0.1926085650920868, "eval_precision": 0.27324973876698017, "eval_recall": 0.5476439790575917, "eval_runtime": 2.6037, "eval_samples_per_second": 412.492, "eval_steps_per_second": 3.457, "step": 430 }, { "epoch": 5.813953488372093, "grad_norm": 3.6987311840057373, "learning_rate": 2.1313953488372093e-05, "loss": 0.396, "step": 500 }, { "epoch": 6.0, "eval_accuracy": 0.9134071482948358, "eval_f1": 0.37776752767527666, "eval_loss": 0.20021429657936096, "eval_precision": 0.28207335973824693, "eval_recall": 0.5717277486910994, "eval_runtime": 6.6862, "eval_samples_per_second": 160.629, "eval_steps_per_second": 1.346, "step": 516 }, { "epoch": 7.0, "eval_accuracy": 0.9267054050668285, "eval_f1": 0.440271772699197, "eval_loss": 0.21028228104114532, "eval_precision": 0.3407265774378585, "eval_recall": 0.6219895287958115, "eval_runtime": 2.6496, "eval_samples_per_second": 405.343, "eval_steps_per_second": 3.397, "step": 602 }, { "epoch": 8.0, "eval_accuracy": 0.9255972170024959, "eval_f1": 0.4397892931520274, "eval_loss": 0.1943674236536026, "eval_precision": 0.33880709701774253, "eval_recall": 0.6265270506108203, "eval_runtime": 2.5685, "eval_samples_per_second": 418.146, "eval_steps_per_second": 3.504, "step": 688 }, { "epoch": 9.0, "eval_accuracy": 0.9290759638827055, "eval_f1": 0.4493577075098814, "eval_loss": 0.21183457970619202, "eval_precision": 0.3477346587650545, "eval_recall": 0.6349040139616056, "eval_runtime": 6.3809, "eval_samples_per_second": 168.314, "eval_steps_per_second": 1.41, "step": 774 }, { "epoch": 10.0, "eval_accuracy": 0.9396182051207925, "eval_f1": 0.5092445853143158, "eval_loss": 0.22744828462600708, "eval_precision": 0.4096027193541534, "eval_recall": 0.6729493891797557, "eval_runtime": 2.6036, "eval_samples_per_second": 412.506, "eval_steps_per_second": 3.457, "step": 860 }, { "epoch": 11.0, "eval_accuracy": 0.9450338720090968, "eval_f1": 0.5512627986348123, "eval_loss": 0.23177774250507355, "eval_precision": 0.45269058295964126, "eval_recall": 0.7047120418848167, "eval_runtime": 2.5671, "eval_samples_per_second": 418.363, "eval_steps_per_second": 3.506, "step": 946 }, { "epoch": 11.627906976744185, "grad_norm": 2.9459869861602783, "learning_rate": 1.2593023255813954e-05, "loss": 0.0715, "step": 1000 }, { "epoch": 12.0, "eval_accuracy": 0.9442533221550885, "eval_f1": 0.541422935655013, "eval_loss": 0.2438974231481552, "eval_precision": 0.44360231832367364, "eval_recall": 0.6945898778359512, "eval_runtime": 2.5676, "eval_samples_per_second": 418.287, "eval_steps_per_second": 3.505, "step": 1032 }, { "epoch": 13.0, "eval_accuracy": 0.9460360594759716, "eval_f1": 0.5802113352545629, "eval_loss": 0.2385331243276596, "eval_precision": 0.47806422433288104, "eval_recall": 0.737870855148342, "eval_runtime": 2.5597, "eval_samples_per_second": 419.581, "eval_steps_per_second": 3.516, "step": 1118 }, { "epoch": 14.0, "eval_accuracy": 0.9460071502221195, "eval_f1": 0.5560439560439561, "eval_loss": 0.24203675985336304, "eval_precision": 0.45843714609286523, "eval_recall": 0.706457242582897, "eval_runtime": 2.5735, "eval_samples_per_second": 417.328, "eval_steps_per_second": 3.497, "step": 1204 }, { "epoch": 15.0, "eval_accuracy": 0.9501604463588795, "eval_f1": 0.5943502824858757, "eval_loss": 0.245487779378891, "eval_precision": 0.4991696322657177, "eval_recall": 0.7343804537521815, "eval_runtime": 2.5639, "eval_samples_per_second": 418.898, "eval_steps_per_second": 3.51, "step": 1290 }, { "epoch": 16.0, "eval_accuracy": 0.9571950314629046, "eval_f1": 0.6313058518304987, "eval_loss": 0.25131794810295105, "eval_precision": 0.537687208445863, "eval_recall": 0.7643979057591623, "eval_runtime": 2.5566, "eval_samples_per_second": 420.083, "eval_steps_per_second": 3.52, "step": 1376 }, { "epoch": 17.0, "eval_accuracy": 0.9557784780241488, "eval_f1": 0.6291390728476821, "eval_loss": 0.26701298356056213, "eval_precision": 0.5354079882381769, "eval_recall": 0.7626527050610821, "eval_runtime": 2.5575, "eval_samples_per_second": 419.947, "eval_steps_per_second": 3.519, "step": 1462 }, { "epoch": 17.441860465116278, "grad_norm": 0.8599265813827515, "learning_rate": 3.8720930232558145e-06, "loss": 0.0344, "step": 1500 }, { "epoch": 18.0, "eval_accuracy": 0.9505266302410068, "eval_f1": 0.596600566572238, "eval_loss": 0.26870501041412354, "eval_precision": 0.5020262216924911, "eval_recall": 0.7350785340314137, "eval_runtime": 4.3379, "eval_samples_per_second": 247.585, "eval_steps_per_second": 2.075, "step": 1548 }, { "epoch": 19.0, "eval_accuracy": 0.9547473813034218, "eval_f1": 0.619456366237482, "eval_loss": 0.2665688395500183, "eval_precision": 0.5248484848484849, "eval_recall": 0.7556719022687609, "eval_runtime": 2.5569, "eval_samples_per_second": 420.037, "eval_steps_per_second": 3.52, "step": 1634 }, { "epoch": 19.0, "step": 1634, "total_flos": 5.459952374908416e+16, "train_loss": 0.15556327351158078, "train_runtime": 746.2434, "train_samples_per_second": 294.676, "train_steps_per_second": 2.305 } ], "logging_steps": 500, "max_steps": 1720, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.459952374908416e+16, "train_batch_size": 128, "trial_name": null, "trial_params": null }