| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0417287630402385, | |
| "eval_steps": 50, | |
| "global_step": 350, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.029806259314456036, | |
| "grad_norm": 2.8659629606409e-05, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 1.0016, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05961251862891207, | |
| "grad_norm": 1.3777846106677316e-05, | |
| "learning_rate": 2.814814814814815e-05, | |
| "loss": 0.59, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.08941877794336811, | |
| "grad_norm": 7.97840766608715e-06, | |
| "learning_rate": 4.296296296296296e-05, | |
| "loss": 0.6772, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.11922503725782414, | |
| "grad_norm": 6.855416359030642e-06, | |
| "learning_rate": 5.7777777777777776e-05, | |
| "loss": 0.6207, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.14903129657228018, | |
| "grad_norm": 8.57202394399792e-06, | |
| "learning_rate": 7.25925925925926e-05, | |
| "loss": 0.6395, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.14903129657228018, | |
| "eval_loss": 0.5846871733665466, | |
| "eval_runtime": 87.5403, | |
| "eval_samples_per_second": 3.233, | |
| "eval_steps_per_second": 1.622, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.17883755588673622, | |
| "grad_norm": 3.3820235785242403e-06, | |
| "learning_rate": 8.740740740740741e-05, | |
| "loss": 0.4937, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.20864381520119224, | |
| "grad_norm": 5.906346359552117e-06, | |
| "learning_rate": 0.00010222222222222222, | |
| "loss": 0.467, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.23845007451564829, | |
| "grad_norm": 5.968211553408764e-06, | |
| "learning_rate": 0.00011703703703703704, | |
| "loss": 0.6052, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.26825633383010433, | |
| "grad_norm": 6.654247954429593e-06, | |
| "learning_rate": 0.00013185185185185186, | |
| "loss": 0.4269, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.29806259314456035, | |
| "grad_norm": 1.4780930541746784e-05, | |
| "learning_rate": 0.00014666666666666666, | |
| "loss": 0.4415, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.29806259314456035, | |
| "eval_loss": 0.49561959505081177, | |
| "eval_runtime": 86.7616, | |
| "eval_samples_per_second": 3.262, | |
| "eval_steps_per_second": 1.637, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.32786885245901637, | |
| "grad_norm": 5.26007761436631e-06, | |
| "learning_rate": 0.0001614814814814815, | |
| "loss": 0.4201, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.35767511177347244, | |
| "grad_norm": 1.0745498912001494e-05, | |
| "learning_rate": 0.0001762962962962963, | |
| "loss": 0.5687, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.38748137108792846, | |
| "grad_norm": 9.13943767955061e-06, | |
| "learning_rate": 0.00019111111111111114, | |
| "loss": 0.4544, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.4172876304023845, | |
| "grad_norm": 9.74733575276332e-06, | |
| "learning_rate": 0.00019999459826567048, | |
| "loss": 0.3984, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.44709388971684055, | |
| "grad_norm": 7.19069566912367e-06, | |
| "learning_rate": 0.00019993383545625465, | |
| "loss": 0.3551, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.44709388971684055, | |
| "eval_loss": 0.44105133414268494, | |
| "eval_runtime": 86.7076, | |
| "eval_samples_per_second": 3.264, | |
| "eval_steps_per_second": 1.638, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.47690014903129657, | |
| "grad_norm": 5.66791504752473e-06, | |
| "learning_rate": 0.00019980559883241722, | |
| "loss": 0.3437, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.5067064083457526, | |
| "grad_norm": 9.916246199281886e-06, | |
| "learning_rate": 0.0001996099749775874, | |
| "loss": 0.533, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.5365126676602087, | |
| "grad_norm": 6.7572823354566935e-06, | |
| "learning_rate": 0.00019934709597403352, | |
| "loss": 0.4875, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.5663189269746647, | |
| "grad_norm": 2.7519972718437202e-05, | |
| "learning_rate": 0.00019901713931368332, | |
| "loss": 0.4088, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5961251862891207, | |
| "grad_norm": 6.2564413383370265e-06, | |
| "learning_rate": 0.00019862032777828405, | |
| "loss": 0.3734, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5961251862891207, | |
| "eval_loss": 0.416751503944397, | |
| "eval_runtime": 86.855, | |
| "eval_samples_per_second": 3.258, | |
| "eval_steps_per_second": 1.635, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6259314456035767, | |
| "grad_norm": 6.829235189798055e-06, | |
| "learning_rate": 0.00019815692928898347, | |
| "loss": 0.4013, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.6557377049180327, | |
| "grad_norm": 2.6349375730205793e-06, | |
| "learning_rate": 0.00019762725672543371, | |
| "loss": 0.439, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.6855439642324889, | |
| "grad_norm": 6.470134849223541e-06, | |
| "learning_rate": 0.00019703166771453952, | |
| "loss": 0.3611, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.7153502235469449, | |
| "grad_norm": 4.913066732115112e-06, | |
| "learning_rate": 0.0001963705643889941, | |
| "loss": 0.3843, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.7451564828614009, | |
| "grad_norm": 8.839782822178677e-06, | |
| "learning_rate": 0.00019564439311576512, | |
| "loss": 0.4593, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.7451564828614009, | |
| "eval_loss": 0.3998318612575531, | |
| "eval_runtime": 86.6219, | |
| "eval_samples_per_second": 3.267, | |
| "eval_steps_per_second": 1.639, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.7749627421758569, | |
| "grad_norm": 6.905674126755912e-06, | |
| "learning_rate": 0.00019485364419471454, | |
| "loss": 0.3549, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.8047690014903129, | |
| "grad_norm": 1.1015033123840112e-05, | |
| "learning_rate": 0.00019399885152755558, | |
| "loss": 0.33, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.834575260804769, | |
| "grad_norm": 7.4981344369007275e-06, | |
| "learning_rate": 0.00019308059225737014, | |
| "loss": 0.464, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.8643815201192251, | |
| "grad_norm": 1.2525980309874285e-05, | |
| "learning_rate": 0.00019209948637893088, | |
| "loss": 0.4893, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.8941877794336811, | |
| "grad_norm": 8.533593245374504e-06, | |
| "learning_rate": 0.00019105619632008982, | |
| "loss": 0.3002, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.8941877794336811, | |
| "eval_loss": 0.38659366965293884, | |
| "eval_runtime": 86.718, | |
| "eval_samples_per_second": 3.263, | |
| "eval_steps_per_second": 1.637, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.9239940387481371, | |
| "grad_norm": 6.511543233500561e-06, | |
| "learning_rate": 0.0001899514264945173, | |
| "loss": 0.3519, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.9538002980625931, | |
| "grad_norm": 9.529394446872175e-06, | |
| "learning_rate": 0.00018878592282609228, | |
| "loss": 0.2376, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.9836065573770492, | |
| "grad_norm": 5.924814558966318e-06, | |
| "learning_rate": 0.00018756047224526606, | |
| "loss": 0.3868, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.0119225037257824, | |
| "grad_norm": 7.456989806087222e-06, | |
| "learning_rate": 0.0001862759021577385, | |
| "loss": 0.4924, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.0417287630402385, | |
| "grad_norm": 8.004604751477018e-06, | |
| "learning_rate": 0.00018493307988580652, | |
| "loss": 0.3768, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.0417287630402385, | |
| "eval_loss": 0.3797300159931183, | |
| "eval_runtime": 86.5928, | |
| "eval_samples_per_second": 3.268, | |
| "eval_steps_per_second": 1.64, | |
| "step": 350 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1344, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.971853201788314e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |