| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.97165991902834, |
| "eval_steps": 500, |
| "global_step": 82, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.024291497975708502, |
| "grad_norm": 0.09191887920247531, |
| "learning_rate": 1.111111111111111e-06, |
| "loss": 0.4142, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.1214574898785425, |
| "grad_norm": 0.09020457134242624, |
| "learning_rate": 5.555555555555557e-06, |
| "loss": 0.4433, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.242914979757085, |
| "grad_norm": 0.0766066808817807, |
| "learning_rate": 9.995370575511151e-06, |
| "loss": 0.4128, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.3643724696356275, |
| "grad_norm": 0.06441683314911666, |
| "learning_rate": 9.834239068026388e-06, |
| "loss": 0.4322, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.48582995951417, |
| "grad_norm": 0.055907229466585676, |
| "learning_rate": 9.450137882173385e-06, |
| "loss": 0.4369, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.6072874493927125, |
| "grad_norm": 0.04914388925866741, |
| "learning_rate": 8.860782922495821e-06, |
| "loss": 0.3921, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.728744939271255, |
| "grad_norm": 0.0453987667724486, |
| "learning_rate": 8.093357016312518e-06, |
| "loss": 0.3694, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.8502024291497976, |
| "grad_norm": 0.053149652003049455, |
| "learning_rate": 7.183256159780321e-06, |
| "loss": 0.3811, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.97165991902834, |
| "grad_norm": 0.04557625788490129, |
| "learning_rate": 6.1724569478520495e-06, |
| "loss": 0.391, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.40577438473701477, |
| "eval_runtime": 89.9891, |
| "eval_samples_per_second": 6.101, |
| "eval_steps_per_second": 1.534, |
| "step": 42 |
| }, |
| { |
| "epoch": 1.0728744939271255, |
| "grad_norm": 0.04223782838268721, |
| "learning_rate": 5.107580487181112e-06, |
| "loss": 0.3726, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.194331983805668, |
| "grad_norm": 0.04745008341707443, |
| "learning_rate": 4.037742090145851e-06, |
| "loss": 0.3255, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.3157894736842106, |
| "grad_norm": 0.044998456767885005, |
| "learning_rate": 3.0122859285872214e-06, |
| "loss": 0.2935, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.4372469635627532, |
| "grad_norm": 0.047569447760125, |
| "learning_rate": 2.0785091318581577e-06, |
| "loss": 0.3633, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.5587044534412957, |
| "grad_norm": 0.042433025378520894, |
| "learning_rate": 1.2794803006431984e-06, |
| "loss": 0.3413, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.680161943319838, |
| "grad_norm": 0.04190184177466641, |
| "learning_rate": 6.52053053266945e-07, |
| "loss": 0.3395, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.8016194331983806, |
| "grad_norm": 0.043317378643889536, |
| "learning_rate": 2.2516622572372416e-07, |
| "loss": 0.3358, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.9230769230769231, |
| "grad_norm": 0.04222433691390119, |
| "learning_rate": 1.850912532696092e-08, |
| "loss": 0.3539, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.97165991902834, |
| "eval_loss": 0.41050779819488525, |
| "eval_runtime": 89.729, |
| "eval_samples_per_second": 6.118, |
| "eval_steps_per_second": 1.538, |
| "step": 82 |
| }, |
| { |
| "epoch": 1.97165991902834, |
| "step": 82, |
| "total_flos": 2.0517622997476966e+17, |
| "train_loss": 0.37475141192354805, |
| "train_runtime": 2614.8185, |
| "train_samples_per_second": 1.511, |
| "train_steps_per_second": 0.031 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 82, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.0517622997476966e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|