{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.016, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008, "grad_norm": 0.2573789358139038, "learning_rate": 1.8e-05, "loss": 0.8789, "step": 10 }, { "epoch": 0.0016, "grad_norm": 0.2554769217967987, "learning_rate": 3.8e-05, "loss": 0.9406, "step": 20 }, { "epoch": 0.0024, "grad_norm": 0.1636732518672943, "learning_rate": 5.8e-05, "loss": 0.9042, "step": 30 }, { "epoch": 0.0032, "grad_norm": 0.1901659220457077, "learning_rate": 7.800000000000001e-05, "loss": 0.8011, "step": 40 }, { "epoch": 0.004, "grad_norm": 0.19001711905002594, "learning_rate": 9.8e-05, "loss": 0.868, "step": 50 }, { "epoch": 0.0048, "grad_norm": 0.21186105906963348, "learning_rate": 0.000118, "loss": 0.8855, "step": 60 }, { "epoch": 0.0056, "grad_norm": 0.18638649582862854, "learning_rate": 0.000138, "loss": 0.8751, "step": 70 }, { "epoch": 0.0064, "grad_norm": 0.20771396160125732, "learning_rate": 0.00015800000000000002, "loss": 0.8288, "step": 80 }, { "epoch": 0.0072, "grad_norm": 0.18098486959934235, "learning_rate": 0.00017800000000000002, "loss": 0.8364, "step": 90 }, { "epoch": 0.008, "grad_norm": 0.19161753356456757, "learning_rate": 0.00019800000000000002, "loss": 0.7082, "step": 100 }, { "epoch": 0.0088, "grad_norm": 0.21872420608997345, "learning_rate": 0.00019985483870967743, "loss": 0.8027, "step": 110 }, { "epoch": 0.0096, "grad_norm": 0.19647376239299774, "learning_rate": 0.00019969354838709677, "loss": 0.7546, "step": 120 }, { "epoch": 0.0104, "grad_norm": 0.174669086933136, "learning_rate": 0.00019953225806451614, "loss": 0.7488, "step": 130 }, { "epoch": 0.0112, "grad_norm": 0.25444668531417847, "learning_rate": 0.00019937096774193548, "loss": 0.6572, "step": 140 }, { "epoch": 0.012, "grad_norm": 0.2063271701335907, "learning_rate": 0.00019920967741935485, "loss": 0.7714, "step": 150 }, { "epoch": 0.0128, "grad_norm": 0.21145090460777283, "learning_rate": 0.0001990483870967742, "loss": 0.7931, "step": 160 }, { "epoch": 0.0136, "grad_norm": 0.21990713477134705, "learning_rate": 0.00019888709677419355, "loss": 0.7836, "step": 170 }, { "epoch": 0.0144, "grad_norm": 0.1941397488117218, "learning_rate": 0.00019872580645161292, "loss": 0.8162, "step": 180 }, { "epoch": 0.0152, "grad_norm": 0.18625402450561523, "learning_rate": 0.00019856451612903226, "loss": 0.7964, "step": 190 }, { "epoch": 0.016, "grad_norm": 0.1731642633676529, "learning_rate": 0.00019840322580645163, "loss": 0.7157, "step": 200 } ], "logging_steps": 10, "max_steps": 12500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.8444666897014784e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }