{ "best_metric": 0.7644568347735216, "best_model_checkpoint": "nucleotide-transformer-finetuned/checkpoint-10500", "epoch": 2.7230290456431536, "eval_steps": 500, "global_step": 10500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12966804979253113, "grad_norm": 2.4697630405426025, "learning_rate": 2.6666666666666667e-05, "loss": 0.9082, "step": 500 }, { "epoch": 0.12966804979253113, "eval_accuracy": 0.8507642200272479, "eval_f1_score": 0.6717309860785999, "eval_loss": 0.44925403594970703, "eval_runtime": 27.2642, "eval_samples_per_second": 3450.677, "eval_steps_per_second": 13.498, "step": 500 }, { "epoch": 0.25933609958506226, "grad_norm": 1.604984164237976, "learning_rate": 5.333333333333333e-05, "loss": 0.41, "step": 1000 }, { "epoch": 0.25933609958506226, "eval_accuracy": 0.8567779291553134, "eval_f1_score": 0.6951178092124136, "eval_loss": 0.42545419931411743, "eval_runtime": 27.28, "eval_samples_per_second": 3448.682, "eval_steps_per_second": 13.49, "step": 1000 }, { "epoch": 0.38900414937759337, "grad_norm": 1.0481213331222534, "learning_rate": 8e-05, "loss": 0.354, "step": 1500 }, { "epoch": 0.38900414937759337, "eval_accuracy": 0.8549897820163488, "eval_f1_score": 0.7070964269415976, "eval_loss": 0.42259615659713745, "eval_runtime": 27.2867, "eval_samples_per_second": 3447.839, "eval_steps_per_second": 13.486, "step": 1500 }, { "epoch": 0.5186721991701245, "grad_norm": 1.2174954414367676, "learning_rate": 7.993926065733265e-05, "loss": 0.3341, "step": 2000 }, { "epoch": 0.5186721991701245, "eval_accuracy": 0.8659954019073569, "eval_f1_score": 0.7420034809783201, "eval_loss": 0.39591285586357117, "eval_runtime": 27.2541, "eval_samples_per_second": 3451.961, "eval_steps_per_second": 13.503, "step": 2000 }, { "epoch": 0.6483402489626556, "grad_norm": 0.8332775831222534, "learning_rate": 7.975722709271799e-05, "loss": 0.3016, "step": 2500 }, { "epoch": 0.6483402489626556, "eval_accuracy": 0.8649097411444142, "eval_f1_score": 0.7200074103536797, "eval_loss": 0.39967137575149536, "eval_runtime": 27.2593, "eval_samples_per_second": 3451.294, "eval_steps_per_second": 13.5, "step": 2500 }, { "epoch": 0.7780082987551867, "grad_norm": 1.4946839809417725, "learning_rate": 7.94544521361089e-05, "loss": 0.2963, "step": 3000 }, { "epoch": 0.7780082987551867, "eval_accuracy": 0.8722113419618529, "eval_f1_score": 0.762706973392787, "eval_loss": 0.3748593032360077, "eval_runtime": 27.2556, "eval_samples_per_second": 3451.767, "eval_steps_per_second": 13.502, "step": 3000 }, { "epoch": 0.9076763485477178, "grad_norm": 0.8928861618041992, "learning_rate": 7.903185530509743e-05, "loss": 0.2824, "step": 3500 }, { "epoch": 0.9076763485477178, "eval_accuracy": 0.8655164339237057, "eval_f1_score": 0.7593168456181596, "eval_loss": 0.3944181501865387, "eval_runtime": 27.2652, "eval_samples_per_second": 3450.546, "eval_steps_per_second": 13.497, "step": 3500 }, { "epoch": 1.037344398340249, "grad_norm": 0.7256332635879517, "learning_rate": 7.849072001237001e-05, "loss": 0.2672, "step": 4000 }, { "epoch": 1.037344398340249, "eval_accuracy": 0.8731692779291553, "eval_f1_score": 0.760049724716224, "eval_loss": 0.3828999400138855, "eval_runtime": 27.2571, "eval_samples_per_second": 3451.579, "eval_steps_per_second": 13.501, "step": 4000 }, { "epoch": 1.1670124481327802, "grad_norm": 0.8104374408721924, "learning_rate": 7.783268966802539e-05, "loss": 0.2383, "step": 4500 }, { "epoch": 1.1670124481327802, "eval_accuracy": 0.8741165701634878, "eval_f1_score": 0.750188219325254, "eval_loss": 0.3871263861656189, "eval_runtime": 27.2708, "eval_samples_per_second": 3449.845, "eval_steps_per_second": 13.494, "step": 4500 }, { "epoch": 1.2966804979253113, "grad_norm": 1.0798064470291138, "learning_rate": 7.705976268859207e-05, "loss": 0.2371, "step": 5000 }, { "epoch": 1.2966804979253113, "eval_accuracy": 0.872264560626703, "eval_f1_score": 0.7557877951536082, "eval_loss": 0.38182297348976135, "eval_runtime": 27.2826, "eval_samples_per_second": 3448.353, "eval_steps_per_second": 13.488, "step": 5000 }, { "epoch": 1.4263485477178424, "grad_norm": 0.905769407749176, "learning_rate": 7.61742864279031e-05, "loss": 0.2362, "step": 5500 }, { "epoch": 1.4263485477178424, "eval_accuracy": 0.8758514986376021, "eval_f1_score": 0.7580900780338423, "eval_loss": 0.38698655366897583, "eval_runtime": 27.2696, "eval_samples_per_second": 3449.99, "eval_steps_per_second": 13.495, "step": 5500 }, { "epoch": 1.5560165975103735, "grad_norm": 1.2751230001449585, "learning_rate": 7.517895004825956e-05, "loss": 0.2368, "step": 6000 }, { "epoch": 1.5560165975103735, "eval_accuracy": 0.8758834298365122, "eval_f1_score": 0.7618952550392576, "eval_loss": 0.38447433710098267, "eval_runtime": 27.2481, "eval_samples_per_second": 3452.724, "eval_steps_per_second": 13.506, "step": 6000 }, { "epoch": 1.6856846473029046, "grad_norm": 0.914685070514679, "learning_rate": 7.407677635353308e-05, "loss": 0.2363, "step": 6500 }, { "epoch": 1.6856846473029046, "eval_accuracy": 0.8790126873297003, "eval_f1_score": 0.7640527203630835, "eval_loss": 0.373322993516922, "eval_runtime": 27.2729, "eval_samples_per_second": 3449.582, "eval_steps_per_second": 13.493, "step": 6500 }, { "epoch": 1.8153526970954357, "grad_norm": 1.1023831367492676, "learning_rate": 7.28711126090098e-05, "loss": 0.2315, "step": 7000 }, { "epoch": 1.8153526970954357, "eval_accuracy": 0.8774054836512262, "eval_f1_score": 0.7444152176417628, "eval_loss": 0.38009563088417053, "eval_runtime": 27.2782, "eval_samples_per_second": 3448.913, "eval_steps_per_second": 13.491, "step": 7000 }, { "epoch": 1.9450207468879668, "grad_norm": 0.9494823813438416, "learning_rate": 7.156562037585576e-05, "loss": 0.2313, "step": 7500 }, { "epoch": 1.9450207468879668, "eval_accuracy": 0.8732544277929155, "eval_f1_score": 0.7243437098127102, "eval_loss": 0.39090830087661743, "eval_runtime": 27.2888, "eval_samples_per_second": 3447.563, "eval_steps_per_second": 13.485, "step": 7500 }, { "epoch": 2.074688796680498, "grad_norm": 0.9810757637023926, "learning_rate": 7.016426439107586e-05, "loss": 0.1833, "step": 8000 }, { "epoch": 2.074688796680498, "eval_accuracy": 0.8750957935967303, "eval_f1_score": 0.7539908472094665, "eval_loss": 0.4634763300418854, "eval_runtime": 27.2517, "eval_samples_per_second": 3452.266, "eval_steps_per_second": 13.504, "step": 8000 }, { "epoch": 2.204356846473029, "grad_norm": 2.2647929191589355, "learning_rate": 6.867130052673806e-05, "loss": 0.1435, "step": 8500 }, { "epoch": 2.204356846473029, "eval_accuracy": 0.8713066246594006, "eval_f1_score": 0.75353223953074, "eval_loss": 0.4573169946670532, "eval_runtime": 27.2786, "eval_samples_per_second": 3448.858, "eval_steps_per_second": 13.49, "step": 8500 }, { "epoch": 2.3340248962655603, "grad_norm": 1.596372365951538, "learning_rate": 6.709126286502965e-05, "loss": 0.1444, "step": 9000 }, { "epoch": 2.3340248962655603, "eval_accuracy": 0.876926515667575, "eval_f1_score": 0.7590948191097212, "eval_loss": 0.4492976665496826, "eval_runtime": 27.2687, "eval_samples_per_second": 3450.111, "eval_steps_per_second": 13.495, "step": 9000 }, { "epoch": 2.4636929460580914, "grad_norm": 1.2077136039733887, "learning_rate": 6.542894992839873e-05, "loss": 0.1432, "step": 9500 }, { "epoch": 2.4636929460580914, "eval_accuracy": 0.8714556369209809, "eval_f1_score": 0.7461849778780044, "eval_loss": 0.4478127658367157, "eval_runtime": 27.271, "eval_samples_per_second": 3449.821, "eval_steps_per_second": 13.494, "step": 9500 }, { "epoch": 2.5933609958506225, "grad_norm": 0.8201944828033447, "learning_rate": 6.368941010659921e-05, "loss": 0.1475, "step": 10000 }, { "epoch": 2.5933609958506225, "eval_accuracy": 0.8705083446866485, "eval_f1_score": 0.7433809202400957, "eval_loss": 0.44828951358795166, "eval_runtime": 27.2671, "eval_samples_per_second": 3450.317, "eval_steps_per_second": 13.496, "step": 10000 }, { "epoch": 2.7230290456431536, "grad_norm": 0.8232116103172302, "learning_rate": 6.18779263248971e-05, "loss": 0.1432, "step": 10500 }, { "epoch": 2.7230290456431536, "eval_accuracy": 0.8751809434604905, "eval_f1_score": 0.7644568347735216, "eval_loss": 0.4298805892467499, "eval_runtime": 27.2878, "eval_samples_per_second": 3447.699, "eval_steps_per_second": 13.486, "step": 10500 } ], "logging_steps": 500, "max_steps": 30000, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8.654584853376e+16, "train_batch_size": 128, "trial_name": null, "trial_params": null }