{ "best_metric": 0.716598119152416, "best_model_checkpoint": "logs/eurlex/roberta-base/seed_1/checkpoint-6876", "epoch": 2.0, "eval_steps": 500, "global_step": 6876, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14543339150668994, "grad_norm": 10488.72265625, "learning_rate": 2.7818499127399653e-05, "loss": 0.1782, "step": 500 }, { "epoch": 0.29086678301337987, "grad_norm": 8601.787109375, "learning_rate": 2.56369982547993e-05, "loss": 0.109, "step": 1000 }, { "epoch": 0.4363001745200698, "grad_norm": 13134.970703125, "learning_rate": 2.3455497382198953e-05, "loss": 0.0926, "step": 1500 }, { "epoch": 0.5817335660267597, "grad_norm": 16072.458984375, "learning_rate": 2.1273996509598605e-05, "loss": 0.0851, "step": 2000 }, { "epoch": 0.7271669575334497, "grad_norm": 12878.12109375, "learning_rate": 1.9092495636998257e-05, "loss": 0.078, "step": 2500 }, { "epoch": 0.8726003490401396, "grad_norm": 12653.638671875, "learning_rate": 1.6910994764397905e-05, "loss": 0.0758, "step": 3000 }, { "epoch": 1.0, "eval_loss": 0.08141529560089111, "eval_macro-f1": 0.296432741440559, "eval_micro-f1": 0.6862200624783061, "eval_runtime": 20.7429, "eval_samples_per_second": 241.047, "eval_steps_per_second": 15.09, "step": 3438 }, { "epoch": 1.0180337405468296, "grad_norm": 14271.0712890625, "learning_rate": 1.4729493891797557e-05, "loss": 0.0715, "step": 3500 }, { "epoch": 1.1634671320535195, "grad_norm": 15767.826171875, "learning_rate": 1.2547993019197209e-05, "loss": 0.0679, "step": 4000 }, { "epoch": 1.3089005235602094, "grad_norm": 13860.2705078125, "learning_rate": 1.0366492146596857e-05, "loss": 0.0652, "step": 4500 }, { "epoch": 1.4543339150668992, "grad_norm": 11340.1640625, "learning_rate": 8.18499127399651e-06, "loss": 0.0651, "step": 5000 }, { "epoch": 1.5997673065735893, "grad_norm": 14212.9599609375, "learning_rate": 6.003490401396161e-06, "loss": 0.0633, "step": 5500 }, { "epoch": 1.7452006980802792, "grad_norm": 16454.734375, "learning_rate": 3.821989528795812e-06, "loss": 0.0624, "step": 6000 }, { "epoch": 1.8906340895869693, "grad_norm": 11580.310546875, "learning_rate": 1.6404886561954625e-06, "loss": 0.0618, "step": 6500 }, { "epoch": 2.0, "eval_loss": 0.07441535592079163, "eval_macro-f1": 0.35506427791546535, "eval_micro-f1": 0.716598119152416, "eval_runtime": 19.4037, "eval_samples_per_second": 257.682, "eval_steps_per_second": 16.131, "step": 6876 }, { "epoch": 2.0, "step": 6876, "total_flos": 2.896768241664e+16, "train_loss": 0.08159883026469232, "train_runtime": 766.6169, "train_samples_per_second": 143.488, "train_steps_per_second": 8.969 } ], "logging_steps": 500, "max_steps": 6876, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.896768241664e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }