| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9834710743801653, |
| "eval_steps": 500, |
| "global_step": 30, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.06611570247933884, |
| "grad_norm": 2.8536525686285126, |
| "learning_rate": 9.972609476841368e-06, |
| "loss": 0.6559, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.1322314049586777, |
| "grad_norm": 1.970499624359255, |
| "learning_rate": 9.890738003669029e-06, |
| "loss": 0.6169, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.19834710743801653, |
| "grad_norm": 1.5028468452447263, |
| "learning_rate": 9.755282581475769e-06, |
| "loss": 0.5649, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.2644628099173554, |
| "grad_norm": 1.3243129446505333, |
| "learning_rate": 9.567727288213005e-06, |
| "loss": 0.5808, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.3305785123966942, |
| "grad_norm": 1.4267365776204979, |
| "learning_rate": 9.330127018922195e-06, |
| "loss": 0.5175, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.39669421487603307, |
| "grad_norm": 1.17117628074097, |
| "learning_rate": 9.045084971874738e-06, |
| "loss": 0.5006, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.4628099173553719, |
| "grad_norm": 0.9693499445615545, |
| "learning_rate": 8.715724127386971e-06, |
| "loss": 0.4971, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.5289256198347108, |
| "grad_norm": 0.9115887246842794, |
| "learning_rate": 8.345653031794292e-06, |
| "loss": 0.4583, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.5950413223140496, |
| "grad_norm": 0.9935130160518126, |
| "learning_rate": 7.938926261462366e-06, |
| "loss": 0.4894, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.6611570247933884, |
| "grad_norm": 0.716940184449768, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.4594, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.7272727272727273, |
| "grad_norm": 0.6291261771580062, |
| "learning_rate": 7.033683215379002e-06, |
| "loss": 0.4632, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.7933884297520661, |
| "grad_norm": 0.607799984402943, |
| "learning_rate": 6.545084971874738e-06, |
| "loss": 0.4418, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.859504132231405, |
| "grad_norm": 0.5585873261520395, |
| "learning_rate": 6.039558454088796e-06, |
| "loss": 0.4408, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.9256198347107438, |
| "grad_norm": 0.5208050211378504, |
| "learning_rate": 5.522642316338268e-06, |
| "loss": 0.4488, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.9917355371900827, |
| "grad_norm": 0.4813980524846592, |
| "learning_rate": 5e-06, |
| "loss": 0.4231, |
| "step": 15 |
| }, |
| { |
| "epoch": 1.0578512396694215, |
| "grad_norm": 0.9979175258168443, |
| "learning_rate": 4.477357683661734e-06, |
| "loss": 0.7701, |
| "step": 16 |
| }, |
| { |
| "epoch": 1.1239669421487604, |
| "grad_norm": 0.40747510690543914, |
| "learning_rate": 3.960441545911205e-06, |
| "loss": 0.4195, |
| "step": 17 |
| }, |
| { |
| "epoch": 1.1900826446280992, |
| "grad_norm": 0.41881515491543164, |
| "learning_rate": 3.4549150281252635e-06, |
| "loss": 0.4118, |
| "step": 18 |
| }, |
| { |
| "epoch": 1.256198347107438, |
| "grad_norm": 0.4616342375325426, |
| "learning_rate": 2.966316784621e-06, |
| "loss": 0.4312, |
| "step": 19 |
| }, |
| { |
| "epoch": 1.322314049586777, |
| "grad_norm": 0.37891854032063194, |
| "learning_rate": 2.5000000000000015e-06, |
| "loss": 0.4391, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.3884297520661157, |
| "grad_norm": 0.35467137628778883, |
| "learning_rate": 2.061073738537635e-06, |
| "loss": 0.4185, |
| "step": 21 |
| }, |
| { |
| "epoch": 1.4545454545454546, |
| "grad_norm": 0.37549455615977917, |
| "learning_rate": 1.6543469682057105e-06, |
| "loss": 0.4221, |
| "step": 22 |
| }, |
| { |
| "epoch": 1.5206611570247934, |
| "grad_norm": 0.3236181228683793, |
| "learning_rate": 1.2842758726130283e-06, |
| "loss": 0.4163, |
| "step": 23 |
| }, |
| { |
| "epoch": 1.5867768595041323, |
| "grad_norm": 0.3253282977160117, |
| "learning_rate": 9.549150281252633e-07, |
| "loss": 0.4022, |
| "step": 24 |
| }, |
| { |
| "epoch": 1.6528925619834711, |
| "grad_norm": 0.32266576896837756, |
| "learning_rate": 6.698729810778065e-07, |
| "loss": 0.429, |
| "step": 25 |
| }, |
| { |
| "epoch": 1.71900826446281, |
| "grad_norm": 0.326714939228544, |
| "learning_rate": 4.322727117869951e-07, |
| "loss": 0.3895, |
| "step": 26 |
| }, |
| { |
| "epoch": 1.7851239669421488, |
| "grad_norm": 0.3028739766903029, |
| "learning_rate": 2.447174185242324e-07, |
| "loss": 0.4057, |
| "step": 27 |
| }, |
| { |
| "epoch": 1.8512396694214877, |
| "grad_norm": 0.3100776604423323, |
| "learning_rate": 1.0926199633097156e-07, |
| "loss": 0.405, |
| "step": 28 |
| }, |
| { |
| "epoch": 1.9173553719008265, |
| "grad_norm": 0.3090876877654164, |
| "learning_rate": 2.7390523158633552e-08, |
| "loss": 0.4142, |
| "step": 29 |
| }, |
| { |
| "epoch": 1.9834710743801653, |
| "grad_norm": 0.312042538014464, |
| "learning_rate": 0.0, |
| "loss": 0.41, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.9834710743801653, |
| "step": 30, |
| "total_flos": 13617126703104.0, |
| "train_loss": 0.4714245935281118, |
| "train_runtime": 431.1995, |
| "train_samples_per_second": 2.236, |
| "train_steps_per_second": 0.07 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 30, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 70000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 13617126703104.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|