{ "best_global_step": 872, "best_metric": 0.8792872839502528, "best_model_checkpoint": "finbert-news\\checkpoint-872", "epoch": 8.0, "eval_steps": 500, "global_step": 872, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.22935779816513763, "grad_norm": 14.148714065551758, "learning_rate": 5.4545454545454545e-06, "loss": 1.4992, "step": 25 }, { "epoch": 0.45871559633027525, "grad_norm": 3.876201629638672, "learning_rate": 1.1136363636363637e-05, "loss": 1.1135, "step": 50 }, { "epoch": 0.6880733944954128, "grad_norm": 5.878354072570801, "learning_rate": 1.681818181818182e-05, "loss": 1.0131, "step": 75 }, { "epoch": 0.9174311926605505, "grad_norm": 8.439997673034668, "learning_rate": 1.999028701591291e-05, "loss": 0.9438, "step": 100 }, { "epoch": 1.0, "eval_accuracy": 0.6847545219638242, "eval_best_tau": 0.3, "eval_f1": 0.676566431827065, "eval_loss": 0.7978772521018982, "eval_runtime": 25.6096, "eval_samples_per_second": 15.111, "eval_steps_per_second": 0.508, "step": 109 }, { "epoch": 1.146788990825688, "grad_norm": 11.146471977233887, "learning_rate": 1.9896130185675263e-05, "loss": 0.8281, "step": 125 }, { "epoch": 1.3761467889908257, "grad_norm": 13.593254089355469, "learning_rate": 1.9702741799106508e-05, "loss": 0.7709, "step": 150 }, { "epoch": 1.6055045871559632, "grad_norm": 18.543376922607422, "learning_rate": 1.9412061021336404e-05, "loss": 0.6926, "step": 175 }, { "epoch": 1.834862385321101, "grad_norm": 8.12672233581543, "learning_rate": 1.9027002598375012e-05, "loss": 0.7276, "step": 200 }, { "epoch": 2.0, "eval_accuracy": 0.7829457364341085, "eval_best_tau": 0.3, "eval_f1": 0.7652186280745745, "eval_loss": 0.5902541875839233, "eval_runtime": 26.2346, "eval_samples_per_second": 14.752, "eval_steps_per_second": 0.496, "step": 218 }, { "epoch": 2.0642201834862384, "grad_norm": 19.604272842407227, "learning_rate": 1.8551427630053464e-05, "loss": 0.6274, "step": 225 }, { "epoch": 2.293577981651376, "grad_norm": 14.275187492370605, "learning_rate": 1.7990104853582494e-05, "loss": 0.5545, "step": 250 }, { "epoch": 2.522935779816514, "grad_norm": 5.522476673126221, "learning_rate": 1.7348662825950356e-05, "loss": 0.6335, "step": 275 }, { "epoch": 2.7522935779816513, "grad_norm": 13.290278434753418, "learning_rate": 1.6633533484642104e-05, "loss": 0.5528, "step": 300 }, { "epoch": 2.981651376146789, "grad_norm": 11.074273109436035, "learning_rate": 1.5851887652614238e-05, "loss": 0.5402, "step": 325 }, { "epoch": 3.0, "eval_accuracy": 0.8268733850129198, "eval_best_tau": 0.3, "eval_f1": 0.815201593241898, "eval_loss": 0.49993327260017395, "eval_runtime": 26.1993, "eval_samples_per_second": 14.771, "eval_steps_per_second": 0.496, "step": 327 }, { "epoch": 3.2110091743119265, "grad_norm": 11.70258617401123, "learning_rate": 1.5011563134236408e-05, "loss": 0.4388, "step": 350 }, { "epoch": 3.4403669724770642, "grad_norm": 15.00348949432373, "learning_rate": 1.4120986123204257e-05, "loss": 0.3964, "step": 375 }, { "epoch": 3.669724770642202, "grad_norm": 13.193717002868652, "learning_rate": 1.3189086710490649e-05, "loss": 0.424, "step": 400 }, { "epoch": 3.8990825688073394, "grad_norm": 17.48609161376953, "learning_rate": 1.2225209339563144e-05, "loss": 0.4301, "step": 425 }, { "epoch": 4.0, "eval_accuracy": 0.8604651162790697, "eval_best_tau": 0.3, "eval_f1": 0.8555597375519169, "eval_loss": 0.4141731262207031, "eval_runtime": 25.6623, "eval_samples_per_second": 15.08, "eval_steps_per_second": 0.507, "step": 436 }, { "epoch": 4.128440366972477, "grad_norm": 29.30590057373047, "learning_rate": 1.1239019106760909e-05, "loss": 0.3741, "step": 450 }, { "epoch": 4.3577981651376145, "grad_norm": 12.956811904907227, "learning_rate": 1.024040484638617e-05, "loss": 0.3516, "step": 475 }, { "epoch": 4.587155963302752, "grad_norm": 13.52210807800293, "learning_rate": 9.239379972305992e-06, "loss": 0.3391, "step": 500 }, { "epoch": 4.81651376146789, "grad_norm": 15.72844123840332, "learning_rate": 8.245982070356186e-06, "loss": 0.312, "step": 525 }, { "epoch": 5.0, "eval_accuracy": 0.8811369509043928, "eval_best_tau": 0.3, "eval_f1": 0.8760670434788734, "eval_loss": 0.3646778464317322, "eval_runtime": 25.7825, "eval_samples_per_second": 15.01, "eval_steps_per_second": 0.504, "step": 545 }, { "epoch": 5.045871559633028, "grad_norm": 13.13558578491211, "learning_rate": 7.270172248365468e-06, "loss": 0.3204, "step": 550 }, { "epoch": 5.275229357798165, "grad_norm": 14.52351188659668, "learning_rate": 6.321735253048214e-06, "loss": 0.2749, "step": 575 }, { "epoch": 5.504587155963303, "grad_norm": 11.246268272399902, "learning_rate": 5.410181355324622e-06, "loss": 0.2962, "step": 600 }, { "epoch": 5.73394495412844, "grad_norm": 21.78761100769043, "learning_rate": 4.544650987894515e-06, "loss": 0.2512, "step": 625 }, { "epoch": 5.963302752293578, "grad_norm": 14.13925552368164, "learning_rate": 3.733823091293274e-06, "loss": 0.2983, "step": 650 }, { "epoch": 6.0, "eval_accuracy": 0.8837209302325582, "eval_best_tau": 0.3, "eval_f1": 0.8773838929062463, "eval_loss": 0.37674975395202637, "eval_runtime": 26.3938, "eval_samples_per_second": 14.663, "eval_steps_per_second": 0.493, "step": 654 }, { "epoch": 6.192660550458716, "grad_norm": 6.343225002288818, "learning_rate": 2.9858280874723833e-06, "loss": 0.266, "step": 675 }, { "epoch": 6.422018348623853, "grad_norm": 16.15406036376953, "learning_rate": 2.3081663535453736e-06, "loss": 0.2446, "step": 700 }, { "epoch": 6.651376146788991, "grad_norm": 12.924860000610352, "learning_rate": 1.7076330131880525e-06, "loss": 0.2401, "step": 725 }, { "epoch": 6.8807339449541285, "grad_norm": 13.268035888671875, "learning_rate": 1.1902497998330065e-06, "loss": 0.2426, "step": 750 }, { "epoch": 7.0, "eval_accuracy": 0.8811369509043928, "eval_best_tau": 0.3, "eval_f1": 0.8763676520557254, "eval_loss": 0.3656945824623108, "eval_runtime": 25.7215, "eval_samples_per_second": 15.046, "eval_steps_per_second": 0.505, "step": 763 }, { "epoch": 7.110091743119266, "grad_norm": 4.727287769317627, "learning_rate": 7.612046748871327e-07, "loss": 0.2214, "step": 775 }, { "epoch": 7.339449541284404, "grad_norm": 10.04178524017334, "learning_rate": 4.247998064389458e-07, "loss": 0.1936, "step": 800 }, { "epoch": 7.568807339449541, "grad_norm": 24.02179527282715, "learning_rate": 1.844084300893456e-07, "loss": 0.2567, "step": 825 }, { "epoch": 7.798165137614679, "grad_norm": 7.141385555267334, "learning_rate": 4.244102447555909e-08, "loss": 0.1959, "step": 850 }, { "epoch": 8.0, "eval_accuracy": 0.8837209302325582, "eval_best_tau": 0.3, "eval_f1": 0.8792872839502528, "eval_loss": 0.3580004572868347, "eval_runtime": 26.2238, "eval_samples_per_second": 14.758, "eval_steps_per_second": 0.496, "step": 872 } ], "logging_steps": 25, "max_steps": 872, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1832848067893248.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }