| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.016, | |
| "eval_steps": 500, | |
| "global_step": 200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0008, | |
| "grad_norm": 0.2573789358139038, | |
| "learning_rate": 1.8e-05, | |
| "loss": 0.8789, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0016, | |
| "grad_norm": 0.2554769217967987, | |
| "learning_rate": 3.8e-05, | |
| "loss": 0.9406, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0024, | |
| "grad_norm": 0.1636732518672943, | |
| "learning_rate": 5.8e-05, | |
| "loss": 0.9042, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0032, | |
| "grad_norm": 0.1901659220457077, | |
| "learning_rate": 7.800000000000001e-05, | |
| "loss": 0.8011, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.004, | |
| "grad_norm": 0.19001711905002594, | |
| "learning_rate": 9.8e-05, | |
| "loss": 0.868, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0048, | |
| "grad_norm": 0.21186105906963348, | |
| "learning_rate": 0.000118, | |
| "loss": 0.8855, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0056, | |
| "grad_norm": 0.18638649582862854, | |
| "learning_rate": 0.000138, | |
| "loss": 0.8751, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0064, | |
| "grad_norm": 0.20771396160125732, | |
| "learning_rate": 0.00015800000000000002, | |
| "loss": 0.8288, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.0072, | |
| "grad_norm": 0.18098486959934235, | |
| "learning_rate": 0.00017800000000000002, | |
| "loss": 0.8364, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.008, | |
| "grad_norm": 0.19161753356456757, | |
| "learning_rate": 0.00019800000000000002, | |
| "loss": 0.7082, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0088, | |
| "grad_norm": 0.21872420608997345, | |
| "learning_rate": 0.00019985483870967743, | |
| "loss": 0.8027, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0096, | |
| "grad_norm": 0.19647376239299774, | |
| "learning_rate": 0.00019969354838709677, | |
| "loss": 0.7546, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.0104, | |
| "grad_norm": 0.174669086933136, | |
| "learning_rate": 0.00019953225806451614, | |
| "loss": 0.7488, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.0112, | |
| "grad_norm": 0.25444668531417847, | |
| "learning_rate": 0.00019937096774193548, | |
| "loss": 0.6572, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.012, | |
| "grad_norm": 0.2063271701335907, | |
| "learning_rate": 0.00019920967741935485, | |
| "loss": 0.7714, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0128, | |
| "grad_norm": 0.21145090460777283, | |
| "learning_rate": 0.0001990483870967742, | |
| "loss": 0.7931, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.0136, | |
| "grad_norm": 0.21990713477134705, | |
| "learning_rate": 0.00019888709677419355, | |
| "loss": 0.7836, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.0144, | |
| "grad_norm": 0.1941397488117218, | |
| "learning_rate": 0.00019872580645161292, | |
| "loss": 0.8162, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.0152, | |
| "grad_norm": 0.18625402450561523, | |
| "learning_rate": 0.00019856451612903226, | |
| "loss": 0.7964, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.016, | |
| "grad_norm": 0.1731642633676529, | |
| "learning_rate": 0.00019840322580645163, | |
| "loss": 0.7157, | |
| "step": 200 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 12500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.8444666897014784e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |