{ "best_global_step": 2752, "best_metric": 0.0057959225960075855, "best_model_checkpoint": "projects/PetBERT_annonymisation/data/case_sensitive/model/checkpoint-2752", "epoch": 7.0, "eval_steps": 500, "global_step": 4816, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_f1": 0.011880094095301812, "eval_loss": 2.4431910514831543, "eval_precision": 0.08875808758622007, "eval_recall": 0.05952071418656867, "eval_runtime": 23.7929, "eval_samples_per_second": 139.285, "eval_steps_per_second": 4.371, "step": 0 }, { "epoch": 0.7267441860465116, "grad_norm": 0.14246754348278046, "learning_rate": 4.996373546511628e-05, "loss": 0.0494, "step": 500 }, { "epoch": 1.0, "eval_f1": 0.8935989317265615, "eval_loss": 0.006424472201615572, "eval_precision": 0.8856397165335348, "eval_recall": 0.9052291580842897, "eval_runtime": 24.8439, "eval_samples_per_second": 133.393, "eval_steps_per_second": 4.186, "step": 688 }, { "epoch": 1.4534883720930232, "grad_norm": 0.013319989666342735, "learning_rate": 4.992739825581396e-05, "loss": 0.0044, "step": 1000 }, { "epoch": 2.0, "eval_f1": 0.921251638253682, "eval_loss": 0.006238908972591162, "eval_precision": 0.935008628082375, "eval_recall": 0.9144731612976222, "eval_runtime": 24.802, "eval_samples_per_second": 133.618, "eval_steps_per_second": 4.193, "step": 1376 }, { "epoch": 2.1802325581395348, "grad_norm": 0.04109887406229973, "learning_rate": 4.989106104651163e-05, "loss": 0.0032, "step": 1500 }, { "epoch": 2.9069767441860463, "grad_norm": 0.3754558861255646, "learning_rate": 4.985472383720931e-05, "loss": 0.0023, "step": 2000 }, { "epoch": 3.0, "eval_f1": 0.931311286064836, "eval_loss": 0.006664152257144451, "eval_precision": 0.9347111088313973, "eval_recall": 0.9310447645995317, "eval_runtime": 24.7968, "eval_samples_per_second": 133.646, "eval_steps_per_second": 4.194, "step": 2064 }, { "epoch": 3.633720930232558, "grad_norm": 0.002286644419655204, "learning_rate": 4.981838662790698e-05, "loss": 0.002, "step": 2500 }, { "epoch": 4.0, "eval_f1": 0.9188838249757393, "eval_loss": 0.0057959225960075855, "eval_precision": 0.9199713934746218, "eval_recall": 0.9194240810494807, "eval_runtime": 24.791, "eval_samples_per_second": 133.677, "eval_steps_per_second": 4.195, "step": 2752 }, { "epoch": 4.3604651162790695, "grad_norm": 0.0007801814354024827, "learning_rate": 4.978204941860465e-05, "loss": 0.0016, "step": 3000 }, { "epoch": 5.0, "eval_f1": 0.8780986671708252, "eval_loss": 0.008505718782544136, "eval_precision": 0.868721341594642, "eval_recall": 0.9097564277642886, "eval_runtime": 24.8121, "eval_samples_per_second": 133.564, "eval_steps_per_second": 4.192, "step": 3440 }, { "epoch": 5.087209302325581, "grad_norm": 0.09036080539226532, "learning_rate": 4.974571220930232e-05, "loss": 0.0011, "step": 3500 }, { "epoch": 5.813953488372093, "grad_norm": 0.08693202584981918, "learning_rate": 4.9709375e-05, "loss": 0.0012, "step": 4000 }, { "epoch": 6.0, "eval_f1": 0.9238025924793921, "eval_loss": 0.008721762336790562, "eval_precision": 0.9375153926584012, "eval_recall": 0.917450432310493, "eval_runtime": 24.7949, "eval_samples_per_second": 133.656, "eval_steps_per_second": 4.194, "step": 4128 }, { "epoch": 6.540697674418604, "grad_norm": 0.03196759149432182, "learning_rate": 4.967303779069767e-05, "loss": 0.0013, "step": 4500 }, { "epoch": 7.0, "eval_f1": 0.9309400750123076, "eval_loss": 0.009504728950560093, "eval_precision": 0.9228721238354496, "eval_recall": 0.9401722615087472, "eval_runtime": 24.8542, "eval_samples_per_second": 133.338, "eval_steps_per_second": 4.184, "step": 4816 } ], "logging_steps": 500, "max_steps": 688000, "num_input_tokens_seen": 0, "num_train_epochs": 1000, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.023931632713933e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }