| { |
| "best_global_step": 2188, |
| "best_metric": 0.8126, |
| "best_model_checkpoint": "/ceph/home/student.aau.dk/uo02pm/exam_m4/assignment_2/results_a2/checkpoint-2188", |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 2188, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04570383912248629, |
| "grad_norm": 1.421935796737671, |
| "learning_rate": 9.04109589041096e-06, |
| "loss": 0.6737904357910156, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.09140767824497258, |
| "grad_norm": 2.940828800201416, |
| "learning_rate": 1.8173515981735163e-05, |
| "loss": 0.5519969177246093, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.13711151736745886, |
| "grad_norm": 3.252488851547241, |
| "learning_rate": 1.9187404773996955e-05, |
| "loss": 0.5031718063354492, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.18281535648994515, |
| "grad_norm": 3.113980770111084, |
| "learning_rate": 1.8171660741493145e-05, |
| "loss": 0.4962876510620117, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.22851919561243145, |
| "grad_norm": 3.6596839427948, |
| "learning_rate": 1.7155916708989335e-05, |
| "loss": 0.4816793060302734, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2742230347349177, |
| "grad_norm": 2.7801551818847656, |
| "learning_rate": 1.6140172676485525e-05, |
| "loss": 0.4870823669433594, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.31992687385740404, |
| "grad_norm": 4.328639984130859, |
| "learning_rate": 1.5124428643981718e-05, |
| "loss": 0.45206031799316404, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.3656307129798903, |
| "grad_norm": 3.445432186126709, |
| "learning_rate": 1.410868461147791e-05, |
| "loss": 0.45428401947021485, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.4113345521023766, |
| "grad_norm": 2.499093770980835, |
| "learning_rate": 1.30929405789741e-05, |
| "loss": 0.4185186004638672, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4570383912248629, |
| "grad_norm": 2.7945611476898193, |
| "learning_rate": 1.2077196546470291e-05, |
| "loss": 0.4620367431640625, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.5027422303473492, |
| "grad_norm": 5.78580904006958, |
| "learning_rate": 1.1061452513966481e-05, |
| "loss": 0.443508186340332, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.5484460694698354, |
| "grad_norm": 2.114068031311035, |
| "learning_rate": 1.0045708481462673e-05, |
| "loss": 0.43310195922851563, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.5941499085923218, |
| "grad_norm": 3.4981911182403564, |
| "learning_rate": 9.029964448958863e-06, |
| "loss": 0.4277406311035156, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.6398537477148081, |
| "grad_norm": 3.414874792098999, |
| "learning_rate": 8.014220416455055e-06, |
| "loss": 0.4359885025024414, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.6855575868372943, |
| "grad_norm": 3.164499282836914, |
| "learning_rate": 6.998476383951245e-06, |
| "loss": 0.4253990173339844, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.7312614259597806, |
| "grad_norm": 4.53344202041626, |
| "learning_rate": 5.9827323514474355e-06, |
| "loss": 0.4402793884277344, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.7769652650822669, |
| "grad_norm": 2.963273525238037, |
| "learning_rate": 4.966988318943627e-06, |
| "loss": 0.43116336822509765, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.8226691042047533, |
| "grad_norm": 2.139761447906494, |
| "learning_rate": 3.951244286439818e-06, |
| "loss": 0.4201315307617188, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.8683729433272395, |
| "grad_norm": 4.3587775230407715, |
| "learning_rate": 2.9355002539360083e-06, |
| "loss": 0.44918079376220704, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.9140767824497258, |
| "grad_norm": 3.786616802215576, |
| "learning_rate": 1.9197562214321996e-06, |
| "loss": 0.4404931640625, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.9597806215722121, |
| "grad_norm": 4.491762161254883, |
| "learning_rate": 9.040121889283901e-07, |
| "loss": 0.4099996566772461, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.8126, |
| "eval_loss": 0.4067275822162628, |
| "eval_runtime": 37.3001, |
| "eval_samples_per_second": 134.048, |
| "eval_steps_per_second": 4.209, |
| "step": 2188 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 2188, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4604443468800000.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|