| { | |
| "best_metric": 1.972524881362915, | |
| "best_model_checkpoint": "./TAPT_data-V2_Bioformer-16L_LR-2e-05/checkpoint-2436", | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 6090, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 25.086040496826172, | |
| "learning_rate": 1.9150943396226415e-05, | |
| "loss": 2.2144, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 2.0326976776123047, | |
| "eval_runtime": 2.0957, | |
| "eval_samples_per_second": 928.082, | |
| "eval_steps_per_second": 58.214, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 25.068096160888672, | |
| "learning_rate": 1.7023060796645702e-05, | |
| "loss": 1.9612, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 1.9747841358184814, | |
| "eval_runtime": 2.0904, | |
| "eval_samples_per_second": 930.443, | |
| "eval_steps_per_second": 58.362, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 24.59003448486328, | |
| "learning_rate": 1.4895178197064991e-05, | |
| "loss": 1.839, | |
| "step": 1827 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 2.05035138130188, | |
| "eval_runtime": 2.2132, | |
| "eval_samples_per_second": 878.802, | |
| "eval_steps_per_second": 55.123, | |
| "step": 1827 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 24.1608829498291, | |
| "learning_rate": 1.2767295597484276e-05, | |
| "loss": 1.7475, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 1.972524881362915, | |
| "eval_runtime": 2.1181, | |
| "eval_samples_per_second": 918.286, | |
| "eval_steps_per_second": 57.599, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 32.5782356262207, | |
| "learning_rate": 1.0639412997903564e-05, | |
| "loss": 1.6781, | |
| "step": 3045 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 2.0161244869232178, | |
| "eval_runtime": 2.1106, | |
| "eval_samples_per_second": 921.539, | |
| "eval_steps_per_second": 57.804, | |
| "step": 3045 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 18.229835510253906, | |
| "learning_rate": 8.511530398322851e-06, | |
| "loss": 1.6427, | |
| "step": 3654 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 1.9868873357772827, | |
| "eval_runtime": 2.1141, | |
| "eval_samples_per_second": 920.02, | |
| "eval_steps_per_second": 57.708, | |
| "step": 3654 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 19.829086303710938, | |
| "learning_rate": 6.383647798742138e-06, | |
| "loss": 1.6113, | |
| "step": 4263 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 2.0333664417266846, | |
| "eval_runtime": 2.1163, | |
| "eval_samples_per_second": 919.049, | |
| "eval_steps_per_second": 57.647, | |
| "step": 4263 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 24.086036682128906, | |
| "learning_rate": 4.2557651991614255e-06, | |
| "loss": 1.5701, | |
| "step": 4872 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 1.9919387102127075, | |
| "eval_runtime": 2.1017, | |
| "eval_samples_per_second": 925.423, | |
| "eval_steps_per_second": 58.047, | |
| "step": 4872 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 32.23151397705078, | |
| "learning_rate": 2.1278825995807127e-06, | |
| "loss": 1.5342, | |
| "step": 5481 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 1.9797741174697876, | |
| "eval_runtime": 2.1025, | |
| "eval_samples_per_second": 925.096, | |
| "eval_steps_per_second": 58.027, | |
| "step": 5481 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 15.350183486938477, | |
| "learning_rate": 0.0, | |
| "loss": 1.5188, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 2.00888991355896, | |
| "eval_runtime": 2.1152, | |
| "eval_samples_per_second": 919.522, | |
| "eval_steps_per_second": 57.677, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 6090, | |
| "total_flos": 1536221652235776.0, | |
| "train_loss": 1.731730433872768, | |
| "train_runtime": 362.8983, | |
| "train_samples_per_second": 268.202, | |
| "train_steps_per_second": 16.782 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 6090, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1536221652235776.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |