| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 1000, |
| "global_step": 287, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03484320557491289, |
| "grad_norm": 3.216470119539481, |
| "learning_rate": 6e-06, |
| "loss": 1.4213, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.06968641114982578, |
| "grad_norm": 0.9358414619228234, |
| "learning_rate": 9.994664874011864e-06, |
| "loss": 0.8739, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.10452961672473868, |
| "grad_norm": 0.762921970961328, |
| "learning_rate": 9.93477538444123e-06, |
| "loss": 0.7614, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.13937282229965156, |
| "grad_norm": 0.5825132744489881, |
| "learning_rate": 9.809128215864096e-06, |
| "loss": 0.6971, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.17421602787456447, |
| "grad_norm": 0.5779846846514067, |
| "learning_rate": 9.619397662556434e-06, |
| "loss": 0.6933, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.20905923344947736, |
| "grad_norm": 0.6432877395688604, |
| "learning_rate": 9.368111953231849e-06, |
| "loss": 0.6534, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.24390243902439024, |
| "grad_norm": 0.5161784198014567, |
| "learning_rate": 9.058619561473308e-06, |
| "loss": 0.6454, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2787456445993031, |
| "grad_norm": 0.5515024392970438, |
| "learning_rate": 8.695044586103297e-06, |
| "loss": 0.6467, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.313588850174216, |
| "grad_norm": 0.5638319890229825, |
| "learning_rate": 8.282231796065215e-06, |
| "loss": 0.655, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.34843205574912894, |
| "grad_norm": 0.5348182598237928, |
| "learning_rate": 7.82568207211296e-06, |
| "loss": 0.6496, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3832752613240418, |
| "grad_norm": 0.5584260715846722, |
| "learning_rate": 7.33147910557174e-06, |
| "loss": 0.6559, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.4181184668989547, |
| "grad_norm": 0.522415331465173, |
| "learning_rate": 6.806208330935766e-06, |
| "loss": 0.6036, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4529616724738676, |
| "grad_norm": 0.6016439526998959, |
| "learning_rate": 6.2568691725555144e-06, |
| "loss": 0.6176, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.4878048780487805, |
| "grad_norm": 0.564282507025487, |
| "learning_rate": 5.690781774759412e-06, |
| "loss": 0.6249, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.5226480836236934, |
| "grad_norm": 0.6212522251481895, |
| "learning_rate": 5.115489458265006e-06, |
| "loss": 0.6282, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5574912891986062, |
| "grad_norm": 0.5367960373375557, |
| "learning_rate": 4.53865820268349e-06, |
| "loss": 0.6033, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5923344947735192, |
| "grad_norm": 0.5093764149323716, |
| "learning_rate": 3.967974494549803e-06, |
| "loss": 0.5936, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.627177700348432, |
| "grad_norm": 0.552515840679091, |
| "learning_rate": 3.4110429020904924e-06, |
| "loss": 0.625, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.662020905923345, |
| "grad_norm": 0.535450593688453, |
| "learning_rate": 2.8752847415828923e-06, |
| "loss": 0.6178, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6968641114982579, |
| "grad_norm": 0.5983891293222291, |
| "learning_rate": 2.3678391856132203e-06, |
| "loss": 0.5937, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7317073170731707, |
| "grad_norm": 0.5614553572127612, |
| "learning_rate": 1.8954681310021434e-06, |
| "loss": 0.609, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7665505226480837, |
| "grad_norm": 0.4869927011463621, |
| "learning_rate": 1.4644660940672628e-06, |
| "loss": 0.6036, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.8013937282229965, |
| "grad_norm": 0.5359904941043641, |
| "learning_rate": 1.0805763339010329e-06, |
| "loss": 0.6067, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.8362369337979094, |
| "grad_norm": 0.5351035133523918, |
| "learning_rate": 7.489143213519301e-07, |
| "loss": 0.6201, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.8710801393728222, |
| "grad_norm": 0.546788838937359, |
| "learning_rate": 4.738995735125895e-07, |
| "loss": 0.6082, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.9059233449477352, |
| "grad_norm": 0.5760609679486035, |
| "learning_rate": 2.5919676204517073e-07, |
| "loss": 0.6264, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.9407665505226481, |
| "grad_norm": 0.47281891685039446, |
| "learning_rate": 1.0766688009695548e-07, |
| "loss": 0.5798, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.975609756097561, |
| "grad_norm": 0.5289223385943652, |
| "learning_rate": 2.1329118524827662e-08, |
| "loss": 0.5909, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 287, |
| "total_flos": 1387071100944384.0, |
| "train_loss": 0.6654956722924102, |
| "train_runtime": 27782.0837, |
| "train_samples_per_second": 1.322, |
| "train_steps_per_second": 0.01 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 287, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1387071100944384.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|