| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9927360774818403, | |
| "eval_steps": 500, | |
| "global_step": 309, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.09685230024213075, | |
| "grad_norm": 1.6106698543432953, | |
| "learning_rate": 5e-06, | |
| "loss": 0.649, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.1937046004842615, | |
| "grad_norm": 0.9911008535692477, | |
| "learning_rate": 5e-06, | |
| "loss": 0.5864, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.29055690072639223, | |
| "grad_norm": 0.6386745349148736, | |
| "learning_rate": 5e-06, | |
| "loss": 0.5582, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.387409200968523, | |
| "grad_norm": 1.0205474281151061, | |
| "learning_rate": 5e-06, | |
| "loss": 0.5464, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.48426150121065376, | |
| "grad_norm": 0.741493469056709, | |
| "learning_rate": 5e-06, | |
| "loss": 0.5311, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.5811138014527845, | |
| "grad_norm": 0.7786501028026512, | |
| "learning_rate": 5e-06, | |
| "loss": 0.5181, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.6779661016949152, | |
| "grad_norm": 0.6711455522499474, | |
| "learning_rate": 5e-06, | |
| "loss": 0.5158, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.774818401937046, | |
| "grad_norm": 0.6218873174772125, | |
| "learning_rate": 5e-06, | |
| "loss": 0.5126, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.8716707021791767, | |
| "grad_norm": 0.6652769231147759, | |
| "learning_rate": 5e-06, | |
| "loss": 0.5105, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.9685230024213075, | |
| "grad_norm": 1.4354098566929865, | |
| "learning_rate": 5e-06, | |
| "loss": 0.5082, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.9975786924939467, | |
| "eval_loss": 0.49066221714019775, | |
| "eval_runtime": 69.4631, | |
| "eval_samples_per_second": 40.021, | |
| "eval_steps_per_second": 0.633, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 1.0653753026634383, | |
| "grad_norm": 0.8075871605198771, | |
| "learning_rate": 5e-06, | |
| "loss": 0.5159, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.162227602905569, | |
| "grad_norm": 0.7654895903052866, | |
| "learning_rate": 5e-06, | |
| "loss": 0.4583, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.2590799031476998, | |
| "grad_norm": 0.47351004510337863, | |
| "learning_rate": 5e-06, | |
| "loss": 0.4586, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.3559322033898304, | |
| "grad_norm": 0.5062829494154636, | |
| "learning_rate": 5e-06, | |
| "loss": 0.4572, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.4527845036319613, | |
| "grad_norm": 0.6119092771725125, | |
| "learning_rate": 5e-06, | |
| "loss": 0.4544, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.549636803874092, | |
| "grad_norm": 0.6212058614890003, | |
| "learning_rate": 5e-06, | |
| "loss": 0.4561, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.6464891041162226, | |
| "grad_norm": 0.5105359500584984, | |
| "learning_rate": 5e-06, | |
| "loss": 0.4518, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.7433414043583535, | |
| "grad_norm": 0.5867880979483323, | |
| "learning_rate": 5e-06, | |
| "loss": 0.4551, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.8401937046004844, | |
| "grad_norm": 0.4498960324504211, | |
| "learning_rate": 5e-06, | |
| "loss": 0.454, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.937046004842615, | |
| "grad_norm": 0.5182866069406472, | |
| "learning_rate": 5e-06, | |
| "loss": 0.4499, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.9951573849878934, | |
| "eval_loss": 0.47824251651763916, | |
| "eval_runtime": 71.5938, | |
| "eval_samples_per_second": 38.83, | |
| "eval_steps_per_second": 0.615, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 2.0338983050847457, | |
| "grad_norm": 0.9414090883543634, | |
| "learning_rate": 5e-06, | |
| "loss": 0.4671, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.1307506053268765, | |
| "grad_norm": 0.5171048417889069, | |
| "learning_rate": 5e-06, | |
| "loss": 0.4066, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.2276029055690074, | |
| "grad_norm": 0.5123629438372025, | |
| "learning_rate": 5e-06, | |
| "loss": 0.4113, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.324455205811138, | |
| "grad_norm": 0.5363285052863767, | |
| "learning_rate": 5e-06, | |
| "loss": 0.4081, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.4213075060532687, | |
| "grad_norm": 0.4907788960865576, | |
| "learning_rate": 5e-06, | |
| "loss": 0.407, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.5181598062953996, | |
| "grad_norm": 0.507228977380475, | |
| "learning_rate": 5e-06, | |
| "loss": 0.4051, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.61501210653753, | |
| "grad_norm": 0.4923140802099653, | |
| "learning_rate": 5e-06, | |
| "loss": 0.4109, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.711864406779661, | |
| "grad_norm": 0.5763086112386324, | |
| "learning_rate": 5e-06, | |
| "loss": 0.3986, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.8087167070217918, | |
| "grad_norm": 0.4788239568139877, | |
| "learning_rate": 5e-06, | |
| "loss": 0.4115, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.9055690072639226, | |
| "grad_norm": 0.5281993404834231, | |
| "learning_rate": 5e-06, | |
| "loss": 0.4124, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.9927360774818403, | |
| "eval_loss": 0.47956007719039917, | |
| "eval_runtime": 68.3404, | |
| "eval_samples_per_second": 40.679, | |
| "eval_steps_per_second": 0.644, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 2.9927360774818403, | |
| "step": 309, | |
| "total_flos": 517377129185280.0, | |
| "train_loss": 0.4712127952513957, | |
| "train_runtime": 10324.3717, | |
| "train_samples_per_second": 15.347, | |
| "train_steps_per_second": 0.03 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 309, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 517377129185280.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |