| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9950617283950618, |
| "eval_steps": 100, |
| "global_step": 202, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.09876543209876543, |
| "grad_norm": 0.3113946318626404, |
| "learning_rate": 0.00019988322268323268, |
| "loss": 1.1897, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.19753086419753085, |
| "grad_norm": 0.25871261954307556, |
| "learning_rate": 0.00019781476007338058, |
| "loss": 0.6655, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.2962962962962963, |
| "grad_norm": 0.13192948698997498, |
| "learning_rate": 0.0001932129465573568, |
| "loss": 0.363, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.3950617283950617, |
| "grad_norm": 0.1178111806511879, |
| "learning_rate": 0.00018619696668800492, |
| "loss": 0.2902, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.49382716049382713, |
| "grad_norm": 0.06365109980106354, |
| "learning_rate": 0.00017694853065861662, |
| "loss": 0.2627, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.5925925925925926, |
| "grad_norm": 0.06805580109357834, |
| "learning_rate": 0.0001657071681043731, |
| "loss": 0.2479, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.691358024691358, |
| "grad_norm": 0.05795082449913025, |
| "learning_rate": 0.0001527640244106133, |
| "loss": 0.248, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.7901234567901234, |
| "grad_norm": 0.054490040987730026, |
| "learning_rate": 0.0001384543202002851, |
| "loss": 0.2374, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 0.050945691764354706, |
| "learning_rate": 0.00012314866929589432, |
| "loss": 0.2364, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.9876543209876543, |
| "grad_norm": 0.05336359888315201, |
| "learning_rate": 0.00010724348001617625, |
| "loss": 0.2314, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.9876543209876543, |
| "eval_loss": 0.23581312596797943, |
| "eval_runtime": 270.7343, |
| "eval_samples_per_second": 0.665, |
| "eval_steps_per_second": 0.665, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.0864197530864197, |
| "grad_norm": 0.052394308149814606, |
| "learning_rate": 9.115068840886417e-05, |
| "loss": 0.2339, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.1851851851851851, |
| "grad_norm": 0.06545981019735336, |
| "learning_rate": 7.528708932343304e-05, |
| "loss": 0.2301, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.2839506172839505, |
| "grad_norm": 0.06937788426876068, |
| "learning_rate": 6.006354164343046e-05, |
| "loss": 0.2322, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.382716049382716, |
| "grad_norm": 0.07582119852304459, |
| "learning_rate": 4.587432725720687e-05, |
| "loss": 0.2234, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.4814814814814814, |
| "grad_norm": 0.11781789362430573, |
| "learning_rate": 3.308693936411421e-05, |
| "loss": 0.2164, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.5802469135802468, |
| "grad_norm": 0.08647400885820389, |
| "learning_rate": 2.2032564593677774e-05, |
| "loss": 0.2197, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.6790123456790123, |
| "grad_norm": 0.09350328147411346, |
| "learning_rate": 1.2997505445856084e-05, |
| "loss": 0.2194, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.7777777777777777, |
| "grad_norm": 0.14273136854171753, |
| "learning_rate": 6.215765206679569e-06, |
| "loss": 0.2219, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.876543209876543, |
| "grad_norm": 0.10584201663732529, |
| "learning_rate": 1.8629873860586566e-06, |
| "loss": 0.2298, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.9753086419753085, |
| "grad_norm": 0.08690895885229111, |
| "learning_rate": 5.190664313851068e-08, |
| "loss": 0.2228, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.9753086419753085, |
| "eval_loss": 0.2289411574602127, |
| "eval_runtime": 273.2063, |
| "eval_samples_per_second": 0.659, |
| "eval_steps_per_second": 0.659, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.9950617283950618, |
| "step": 202, |
| "total_flos": 1.2609051312227942e+17, |
| "train_loss": 0.3101574260704588, |
| "train_runtime": 14633.2305, |
| "train_samples_per_second": 0.221, |
| "train_steps_per_second": 0.014 |
| }, |
| { |
| "epoch": 1.9950617283950618, |
| "eval_loss": 0.22894252836704254, |
| "eval_runtime": 273.8519, |
| "eval_samples_per_second": 0.657, |
| "eval_steps_per_second": 0.657, |
| "step": 202 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 202, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.2609051312227942e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|