| { | |
| "best_global_step": 872, | |
| "best_metric": 0.8792872839502528, | |
| "best_model_checkpoint": "finbert-news\\checkpoint-872", | |
| "epoch": 8.0, | |
| "eval_steps": 500, | |
| "global_step": 872, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.22935779816513763, | |
| "grad_norm": 14.148714065551758, | |
| "learning_rate": 5.4545454545454545e-06, | |
| "loss": 1.4992, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.45871559633027525, | |
| "grad_norm": 3.876201629638672, | |
| "learning_rate": 1.1136363636363637e-05, | |
| "loss": 1.1135, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.6880733944954128, | |
| "grad_norm": 5.878354072570801, | |
| "learning_rate": 1.681818181818182e-05, | |
| "loss": 1.0131, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.9174311926605505, | |
| "grad_norm": 8.439997673034668, | |
| "learning_rate": 1.999028701591291e-05, | |
| "loss": 0.9438, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.6847545219638242, | |
| "eval_best_tau": 0.3, | |
| "eval_f1": 0.676566431827065, | |
| "eval_loss": 0.7978772521018982, | |
| "eval_runtime": 25.6096, | |
| "eval_samples_per_second": 15.111, | |
| "eval_steps_per_second": 0.508, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 1.146788990825688, | |
| "grad_norm": 11.146471977233887, | |
| "learning_rate": 1.9896130185675263e-05, | |
| "loss": 0.8281, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.3761467889908257, | |
| "grad_norm": 13.593254089355469, | |
| "learning_rate": 1.9702741799106508e-05, | |
| "loss": 0.7709, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.6055045871559632, | |
| "grad_norm": 18.543376922607422, | |
| "learning_rate": 1.9412061021336404e-05, | |
| "loss": 0.6926, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.834862385321101, | |
| "grad_norm": 8.12672233581543, | |
| "learning_rate": 1.9027002598375012e-05, | |
| "loss": 0.7276, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.7829457364341085, | |
| "eval_best_tau": 0.3, | |
| "eval_f1": 0.7652186280745745, | |
| "eval_loss": 0.5902541875839233, | |
| "eval_runtime": 26.2346, | |
| "eval_samples_per_second": 14.752, | |
| "eval_steps_per_second": 0.496, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 2.0642201834862384, | |
| "grad_norm": 19.604272842407227, | |
| "learning_rate": 1.8551427630053464e-05, | |
| "loss": 0.6274, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 2.293577981651376, | |
| "grad_norm": 14.275187492370605, | |
| "learning_rate": 1.7990104853582494e-05, | |
| "loss": 0.5545, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.522935779816514, | |
| "grad_norm": 5.522476673126221, | |
| "learning_rate": 1.7348662825950356e-05, | |
| "loss": 0.6335, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 2.7522935779816513, | |
| "grad_norm": 13.290278434753418, | |
| "learning_rate": 1.6633533484642104e-05, | |
| "loss": 0.5528, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.981651376146789, | |
| "grad_norm": 11.074273109436035, | |
| "learning_rate": 1.5851887652614238e-05, | |
| "loss": 0.5402, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.8268733850129198, | |
| "eval_best_tau": 0.3, | |
| "eval_f1": 0.815201593241898, | |
| "eval_loss": 0.49993327260017395, | |
| "eval_runtime": 26.1993, | |
| "eval_samples_per_second": 14.771, | |
| "eval_steps_per_second": 0.496, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 3.2110091743119265, | |
| "grad_norm": 11.70258617401123, | |
| "learning_rate": 1.5011563134236408e-05, | |
| "loss": 0.4388, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 3.4403669724770642, | |
| "grad_norm": 15.00348949432373, | |
| "learning_rate": 1.4120986123204257e-05, | |
| "loss": 0.3964, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 3.669724770642202, | |
| "grad_norm": 13.193717002868652, | |
| "learning_rate": 1.3189086710490649e-05, | |
| "loss": 0.424, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.8990825688073394, | |
| "grad_norm": 17.48609161376953, | |
| "learning_rate": 1.2225209339563144e-05, | |
| "loss": 0.4301, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8604651162790697, | |
| "eval_best_tau": 0.3, | |
| "eval_f1": 0.8555597375519169, | |
| "eval_loss": 0.4141731262207031, | |
| "eval_runtime": 25.6623, | |
| "eval_samples_per_second": 15.08, | |
| "eval_steps_per_second": 0.507, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 4.128440366972477, | |
| "grad_norm": 29.30590057373047, | |
| "learning_rate": 1.1239019106760909e-05, | |
| "loss": 0.3741, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 4.3577981651376145, | |
| "grad_norm": 12.956811904907227, | |
| "learning_rate": 1.024040484638617e-05, | |
| "loss": 0.3516, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 4.587155963302752, | |
| "grad_norm": 13.52210807800293, | |
| "learning_rate": 9.239379972305992e-06, | |
| "loss": 0.3391, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 4.81651376146789, | |
| "grad_norm": 15.72844123840332, | |
| "learning_rate": 8.245982070356186e-06, | |
| "loss": 0.312, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.8811369509043928, | |
| "eval_best_tau": 0.3, | |
| "eval_f1": 0.8760670434788734, | |
| "eval_loss": 0.3646778464317322, | |
| "eval_runtime": 25.7825, | |
| "eval_samples_per_second": 15.01, | |
| "eval_steps_per_second": 0.504, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 5.045871559633028, | |
| "grad_norm": 13.13558578491211, | |
| "learning_rate": 7.270172248365468e-06, | |
| "loss": 0.3204, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 5.275229357798165, | |
| "grad_norm": 14.52351188659668, | |
| "learning_rate": 6.321735253048214e-06, | |
| "loss": 0.2749, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 5.504587155963303, | |
| "grad_norm": 11.246268272399902, | |
| "learning_rate": 5.410181355324622e-06, | |
| "loss": 0.2962, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 5.73394495412844, | |
| "grad_norm": 21.78761100769043, | |
| "learning_rate": 4.544650987894515e-06, | |
| "loss": 0.2512, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 5.963302752293578, | |
| "grad_norm": 14.13925552368164, | |
| "learning_rate": 3.733823091293274e-06, | |
| "loss": 0.2983, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_best_tau": 0.3, | |
| "eval_f1": 0.8773838929062463, | |
| "eval_loss": 0.37674975395202637, | |
| "eval_runtime": 26.3938, | |
| "eval_samples_per_second": 14.663, | |
| "eval_steps_per_second": 0.493, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 6.192660550458716, | |
| "grad_norm": 6.343225002288818, | |
| "learning_rate": 2.9858280874723833e-06, | |
| "loss": 0.266, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 6.422018348623853, | |
| "grad_norm": 16.15406036376953, | |
| "learning_rate": 2.3081663535453736e-06, | |
| "loss": 0.2446, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 6.651376146788991, | |
| "grad_norm": 12.924860000610352, | |
| "learning_rate": 1.7076330131880525e-06, | |
| "loss": 0.2401, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 6.8807339449541285, | |
| "grad_norm": 13.268035888671875, | |
| "learning_rate": 1.1902497998330065e-06, | |
| "loss": 0.2426, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.8811369509043928, | |
| "eval_best_tau": 0.3, | |
| "eval_f1": 0.8763676520557254, | |
| "eval_loss": 0.3656945824623108, | |
| "eval_runtime": 25.7215, | |
| "eval_samples_per_second": 15.046, | |
| "eval_steps_per_second": 0.505, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 7.110091743119266, | |
| "grad_norm": 4.727287769317627, | |
| "learning_rate": 7.612046748871327e-07, | |
| "loss": 0.2214, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 7.339449541284404, | |
| "grad_norm": 10.04178524017334, | |
| "learning_rate": 4.247998064389458e-07, | |
| "loss": 0.1936, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 7.568807339449541, | |
| "grad_norm": 24.02179527282715, | |
| "learning_rate": 1.844084300893456e-07, | |
| "loss": 0.2567, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 7.798165137614679, | |
| "grad_norm": 7.141385555267334, | |
| "learning_rate": 4.244102447555909e-08, | |
| "loss": 0.1959, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_best_tau": 0.3, | |
| "eval_f1": 0.8792872839502528, | |
| "eval_loss": 0.3580004572868347, | |
| "eval_runtime": 26.2238, | |
| "eval_samples_per_second": 14.758, | |
| "eval_steps_per_second": 0.496, | |
| "step": 872 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 872, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 8, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 2, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1832848067893248.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |