| { | |
| "best_metric": 0.7644568347735216, | |
| "best_model_checkpoint": "nucleotide-transformer-finetuned/checkpoint-10500", | |
| "epoch": 2.7230290456431536, | |
| "eval_steps": 500, | |
| "global_step": 10500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.12966804979253113, | |
| "grad_norm": 2.4697630405426025, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 0.9082, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.12966804979253113, | |
| "eval_accuracy": 0.8507642200272479, | |
| "eval_f1_score": 0.6717309860785999, | |
| "eval_loss": 0.44925403594970703, | |
| "eval_runtime": 27.2642, | |
| "eval_samples_per_second": 3450.677, | |
| "eval_steps_per_second": 13.498, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.25933609958506226, | |
| "grad_norm": 1.604984164237976, | |
| "learning_rate": 5.333333333333333e-05, | |
| "loss": 0.41, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.25933609958506226, | |
| "eval_accuracy": 0.8567779291553134, | |
| "eval_f1_score": 0.6951178092124136, | |
| "eval_loss": 0.42545419931411743, | |
| "eval_runtime": 27.28, | |
| "eval_samples_per_second": 3448.682, | |
| "eval_steps_per_second": 13.49, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.38900414937759337, | |
| "grad_norm": 1.0481213331222534, | |
| "learning_rate": 8e-05, | |
| "loss": 0.354, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.38900414937759337, | |
| "eval_accuracy": 0.8549897820163488, | |
| "eval_f1_score": 0.7070964269415976, | |
| "eval_loss": 0.42259615659713745, | |
| "eval_runtime": 27.2867, | |
| "eval_samples_per_second": 3447.839, | |
| "eval_steps_per_second": 13.486, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.5186721991701245, | |
| "grad_norm": 1.2174954414367676, | |
| "learning_rate": 7.993926065733265e-05, | |
| "loss": 0.3341, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5186721991701245, | |
| "eval_accuracy": 0.8659954019073569, | |
| "eval_f1_score": 0.7420034809783201, | |
| "eval_loss": 0.39591285586357117, | |
| "eval_runtime": 27.2541, | |
| "eval_samples_per_second": 3451.961, | |
| "eval_steps_per_second": 13.503, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.6483402489626556, | |
| "grad_norm": 0.8332775831222534, | |
| "learning_rate": 7.975722709271799e-05, | |
| "loss": 0.3016, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.6483402489626556, | |
| "eval_accuracy": 0.8649097411444142, | |
| "eval_f1_score": 0.7200074103536797, | |
| "eval_loss": 0.39967137575149536, | |
| "eval_runtime": 27.2593, | |
| "eval_samples_per_second": 3451.294, | |
| "eval_steps_per_second": 13.5, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.7780082987551867, | |
| "grad_norm": 1.4946839809417725, | |
| "learning_rate": 7.94544521361089e-05, | |
| "loss": 0.2963, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.7780082987551867, | |
| "eval_accuracy": 0.8722113419618529, | |
| "eval_f1_score": 0.762706973392787, | |
| "eval_loss": 0.3748593032360077, | |
| "eval_runtime": 27.2556, | |
| "eval_samples_per_second": 3451.767, | |
| "eval_steps_per_second": 13.502, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.9076763485477178, | |
| "grad_norm": 0.8928861618041992, | |
| "learning_rate": 7.903185530509743e-05, | |
| "loss": 0.2824, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.9076763485477178, | |
| "eval_accuracy": 0.8655164339237057, | |
| "eval_f1_score": 0.7593168456181596, | |
| "eval_loss": 0.3944181501865387, | |
| "eval_runtime": 27.2652, | |
| "eval_samples_per_second": 3450.546, | |
| "eval_steps_per_second": 13.497, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.037344398340249, | |
| "grad_norm": 0.7256332635879517, | |
| "learning_rate": 7.849072001237001e-05, | |
| "loss": 0.2672, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.037344398340249, | |
| "eval_accuracy": 0.8731692779291553, | |
| "eval_f1_score": 0.760049724716224, | |
| "eval_loss": 0.3828999400138855, | |
| "eval_runtime": 27.2571, | |
| "eval_samples_per_second": 3451.579, | |
| "eval_steps_per_second": 13.501, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.1670124481327802, | |
| "grad_norm": 0.8104374408721924, | |
| "learning_rate": 7.783268966802539e-05, | |
| "loss": 0.2383, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.1670124481327802, | |
| "eval_accuracy": 0.8741165701634878, | |
| "eval_f1_score": 0.750188219325254, | |
| "eval_loss": 0.3871263861656189, | |
| "eval_runtime": 27.2708, | |
| "eval_samples_per_second": 3449.845, | |
| "eval_steps_per_second": 13.494, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.2966804979253113, | |
| "grad_norm": 1.0798064470291138, | |
| "learning_rate": 7.705976268859207e-05, | |
| "loss": 0.2371, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.2966804979253113, | |
| "eval_accuracy": 0.872264560626703, | |
| "eval_f1_score": 0.7557877951536082, | |
| "eval_loss": 0.38182297348976135, | |
| "eval_runtime": 27.2826, | |
| "eval_samples_per_second": 3448.353, | |
| "eval_steps_per_second": 13.488, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.4263485477178424, | |
| "grad_norm": 0.905769407749176, | |
| "learning_rate": 7.61742864279031e-05, | |
| "loss": 0.2362, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.4263485477178424, | |
| "eval_accuracy": 0.8758514986376021, | |
| "eval_f1_score": 0.7580900780338423, | |
| "eval_loss": 0.38698655366897583, | |
| "eval_runtime": 27.2696, | |
| "eval_samples_per_second": 3449.99, | |
| "eval_steps_per_second": 13.495, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.5560165975103735, | |
| "grad_norm": 1.2751230001449585, | |
| "learning_rate": 7.517895004825956e-05, | |
| "loss": 0.2368, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.5560165975103735, | |
| "eval_accuracy": 0.8758834298365122, | |
| "eval_f1_score": 0.7618952550392576, | |
| "eval_loss": 0.38447433710098267, | |
| "eval_runtime": 27.2481, | |
| "eval_samples_per_second": 3452.724, | |
| "eval_steps_per_second": 13.506, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.6856846473029046, | |
| "grad_norm": 0.914685070514679, | |
| "learning_rate": 7.407677635353308e-05, | |
| "loss": 0.2363, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.6856846473029046, | |
| "eval_accuracy": 0.8790126873297003, | |
| "eval_f1_score": 0.7640527203630835, | |
| "eval_loss": 0.373322993516922, | |
| "eval_runtime": 27.2729, | |
| "eval_samples_per_second": 3449.582, | |
| "eval_steps_per_second": 13.493, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.8153526970954357, | |
| "grad_norm": 1.1023831367492676, | |
| "learning_rate": 7.28711126090098e-05, | |
| "loss": 0.2315, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.8153526970954357, | |
| "eval_accuracy": 0.8774054836512262, | |
| "eval_f1_score": 0.7444152176417628, | |
| "eval_loss": 0.38009563088417053, | |
| "eval_runtime": 27.2782, | |
| "eval_samples_per_second": 3448.913, | |
| "eval_steps_per_second": 13.491, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.9450207468879668, | |
| "grad_norm": 0.9494823813438416, | |
| "learning_rate": 7.156562037585576e-05, | |
| "loss": 0.2313, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.9450207468879668, | |
| "eval_accuracy": 0.8732544277929155, | |
| "eval_f1_score": 0.7243437098127102, | |
| "eval_loss": 0.39090830087661743, | |
| "eval_runtime": 27.2888, | |
| "eval_samples_per_second": 3447.563, | |
| "eval_steps_per_second": 13.485, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.074688796680498, | |
| "grad_norm": 0.9810757637023926, | |
| "learning_rate": 7.016426439107586e-05, | |
| "loss": 0.1833, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.074688796680498, | |
| "eval_accuracy": 0.8750957935967303, | |
| "eval_f1_score": 0.7539908472094665, | |
| "eval_loss": 0.4634763300418854, | |
| "eval_runtime": 27.2517, | |
| "eval_samples_per_second": 3452.266, | |
| "eval_steps_per_second": 13.504, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.204356846473029, | |
| "grad_norm": 2.2647929191589355, | |
| "learning_rate": 6.867130052673806e-05, | |
| "loss": 0.1435, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.204356846473029, | |
| "eval_accuracy": 0.8713066246594006, | |
| "eval_f1_score": 0.75353223953074, | |
| "eval_loss": 0.4573169946670532, | |
| "eval_runtime": 27.2786, | |
| "eval_samples_per_second": 3448.858, | |
| "eval_steps_per_second": 13.49, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.3340248962655603, | |
| "grad_norm": 1.596372365951538, | |
| "learning_rate": 6.709126286502965e-05, | |
| "loss": 0.1444, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.3340248962655603, | |
| "eval_accuracy": 0.876926515667575, | |
| "eval_f1_score": 0.7590948191097212, | |
| "eval_loss": 0.4492976665496826, | |
| "eval_runtime": 27.2687, | |
| "eval_samples_per_second": 3450.111, | |
| "eval_steps_per_second": 13.495, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.4636929460580914, | |
| "grad_norm": 1.2077136039733887, | |
| "learning_rate": 6.542894992839873e-05, | |
| "loss": 0.1432, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.4636929460580914, | |
| "eval_accuracy": 0.8714556369209809, | |
| "eval_f1_score": 0.7461849778780044, | |
| "eval_loss": 0.4478127658367157, | |
| "eval_runtime": 27.271, | |
| "eval_samples_per_second": 3449.821, | |
| "eval_steps_per_second": 13.494, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.5933609958506225, | |
| "grad_norm": 0.8201944828033447, | |
| "learning_rate": 6.368941010659921e-05, | |
| "loss": 0.1475, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.5933609958506225, | |
| "eval_accuracy": 0.8705083446866485, | |
| "eval_f1_score": 0.7433809202400957, | |
| "eval_loss": 0.44828951358795166, | |
| "eval_runtime": 27.2671, | |
| "eval_samples_per_second": 3450.317, | |
| "eval_steps_per_second": 13.496, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.7230290456431536, | |
| "grad_norm": 0.8232116103172302, | |
| "learning_rate": 6.18779263248971e-05, | |
| "loss": 0.1432, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.7230290456431536, | |
| "eval_accuracy": 0.8751809434604905, | |
| "eval_f1_score": 0.7644568347735216, | |
| "eval_loss": 0.4298805892467499, | |
| "eval_runtime": 27.2878, | |
| "eval_samples_per_second": 3447.699, | |
| "eval_steps_per_second": 13.486, | |
| "step": 10500 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 30000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 8, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.654584853376e+16, | |
| "train_batch_size": 128, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |