| { |
| "best_global_step": 282, |
| "best_metric": 0.0, |
| "best_model_checkpoint": "./star_trek_guard_finetuned/checkpoint-282", |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 282, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0071111111111111115, |
| "grad_norm": 1424.3302001953125, |
| "learning_rate": 0.0, |
| "loss": 34.4477, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.07111111111111111, |
| "grad_norm": 359.5785827636719, |
| "learning_rate": 4.186046511627907e-05, |
| "loss": 25.5053, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.14222222222222222, |
| "grad_norm": 55.2808952331543, |
| "learning_rate": 8.837209302325582e-05, |
| "loss": 4.533, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.21333333333333335, |
| "grad_norm": 0.00021057801495771855, |
| "learning_rate": 0.00013488372093023256, |
| "loss": 0.1875, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.28444444444444444, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001813953488372093, |
| "loss": 0.6068, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.35555555555555557, |
| "grad_norm": 0.00012314319610595703, |
| "learning_rate": 0.00019987699691483048, |
| "loss": 1.8401, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4266666666666667, |
| "grad_norm": 0.0010618583764880896, |
| "learning_rate": 0.00019912640693269752, |
| "loss": 0.0, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.49777777777777776, |
| "grad_norm": 0.018764492124319077, |
| "learning_rate": 0.00019769868307835994, |
| "loss": 0.0, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.5688888888888889, |
| "grad_norm": 7.790327072143555e-05, |
| "learning_rate": 0.00019560357815343577, |
| "loss": 0.003, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.00014328956604003906, |
| "learning_rate": 0.00019285540384897073, |
| "loss": 0.04, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.7111111111111111, |
| "grad_norm": 0.08614543080329895, |
| "learning_rate": 0.00018947293298207635, |
| "loss": 0.1796, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.7822222222222223, |
| "grad_norm": 0.0, |
| "learning_rate": 0.0001854792712585539, |
| "loss": 0.0002, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.8533333333333334, |
| "grad_norm": 0.0016153233591467142, |
| "learning_rate": 0.00018090169943749476, |
| "loss": 0.0269, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.9244444444444444, |
| "grad_norm": 0.000989419175311923, |
| "learning_rate": 0.0001757714869760335, |
| "loss": 0.4039, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.9955555555555555, |
| "grad_norm": 0.8112388849258423, |
| "learning_rate": 0.00017012367842724887, |
| "loss": 0.7485, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.00433349609375, |
| "eval_runtime": 37.2872, |
| "eval_samples_per_second": 13.409, |
| "eval_steps_per_second": 3.352, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.064, |
| "grad_norm": 0.00020039081573486328, |
| "learning_rate": 0.00016399685405033167, |
| "loss": 0.3372, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.1351111111111112, |
| "grad_norm": 0.006611417979001999, |
| "learning_rate": 0.00015743286626829437, |
| "loss": 0.0, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.2062222222222223, |
| "grad_norm": 2.5033950805664062e-05, |
| "learning_rate": 0.0001504765537734844, |
| "loss": 0.0735, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.2773333333333334, |
| "grad_norm": 3.987550735473633e-05, |
| "learning_rate": 0.00014317543523384928, |
| "loss": 0.2116, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.3484444444444446, |
| "grad_norm": 8.791685104370117e-05, |
| "learning_rate": 0.00013557938469225167, |
| "loss": 0.0, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.4195555555555557, |
| "grad_norm": 0.00015270709991455078, |
| "learning_rate": 0.00012774029087618446, |
| "loss": 0.0, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.4906666666666666, |
| "grad_norm": 152.50794982910156, |
| "learning_rate": 0.00011971170274514802, |
| "loss": 0.2612, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.561777777777778, |
| "grad_norm": 0.00700717605650425, |
| "learning_rate": 0.00011154846369695863, |
| "loss": 0.0043, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.6328888888888888, |
| "grad_norm": 0.025026371702551842, |
| "learning_rate": 0.00010330633693173082, |
| "loss": 0.0, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.704, |
| "grad_norm": 0.0, |
| "learning_rate": 9.504162453267777e-05, |
| "loss": 0.0, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.775111111111111, |
| "grad_norm": 0.00010925531387329102, |
| "learning_rate": 8.681078286579311e-05, |
| "loss": 0.0, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.8462222222222222, |
| "grad_norm": 1.3470649719238281e-05, |
| "learning_rate": 7.867003692562534e-05, |
| "loss": 0.2697, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.9173333333333333, |
| "grad_norm": 5.3822994232177734e-05, |
| "learning_rate": 7.067499626155354e-05, |
| "loss": 0.0, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.9884444444444445, |
| "grad_norm": 1.329183578491211e-05, |
| "learning_rate": 6.28802751081779e-05, |
| "loss": 0.0, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.0, |
| "eval_runtime": 37.3645, |
| "eval_samples_per_second": 13.382, |
| "eval_steps_per_second": 3.345, |
| "step": 282 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 423, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.93252304093184e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|