| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 234, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.12841091492776885, | |
| "grad_norm": 0.35282963514328003, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 4.8145, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.2568218298555377, | |
| "grad_norm": 0.49915072321891785, | |
| "learning_rate": 0.00015833333333333332, | |
| "loss": 4.9759, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.3852327447833066, | |
| "grad_norm": 0.9177737832069397, | |
| "learning_rate": 0.00019972037971811802, | |
| "loss": 4.7644, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.5136436597110754, | |
| "grad_norm": 0.9219063520431519, | |
| "learning_rate": 0.00019749279121818235, | |
| "loss": 4.584, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.6420545746388443, | |
| "grad_norm": 1.11421537399292, | |
| "learning_rate": 0.00019308737486442045, | |
| "loss": 4.3618, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.7704654895666132, | |
| "grad_norm": 1.0577869415283203, | |
| "learning_rate": 0.00018660254037844388, | |
| "loss": 4.0276, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.898876404494382, | |
| "grad_norm": 1.1793265342712402, | |
| "learning_rate": 0.000178183148246803, | |
| "loss": 4.0805, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.0256821829855538, | |
| "grad_norm": 1.2243174314498901, | |
| "learning_rate": 0.00016801727377709194, | |
| "loss": 3.9028, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.1540930979133226, | |
| "grad_norm": 0.8244380354881287, | |
| "learning_rate": 0.0001563320058063622, | |
| "loss": 3.7643, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.2825040128410916, | |
| "grad_norm": 0.7699925303459167, | |
| "learning_rate": 0.00014338837391175582, | |
| "loss": 3.7726, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.4109149277688604, | |
| "grad_norm": 0.9207503795623779, | |
| "learning_rate": 0.00012947551744109043, | |
| "loss": 3.6279, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.5393258426966292, | |
| "grad_norm": 0.8555833697319031, | |
| "learning_rate": 0.00011490422661761744, | |
| "loss": 3.7526, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.667736757624398, | |
| "grad_norm": 0.9841089844703674, | |
| "learning_rate": 0.0001, | |
| "loss": 3.6943, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.7961476725521668, | |
| "grad_norm": 0.9110783338546753, | |
| "learning_rate": 8.509577338238255e-05, | |
| "loss": 3.6543, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.9245585874799358, | |
| "grad_norm": 0.7824547290802002, | |
| "learning_rate": 7.052448255890957e-05, | |
| "loss": 3.7434, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.0513643659711076, | |
| "grad_norm": 0.8478178977966309, | |
| "learning_rate": 5.6611626088244194e-05, | |
| "loss": 3.6021, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.1797752808988764, | |
| "grad_norm": 0.8073328137397766, | |
| "learning_rate": 4.3667994193637796e-05, | |
| "loss": 3.6057, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.308186195826645, | |
| "grad_norm": 0.8824509978294373, | |
| "learning_rate": 3.1982726222908056e-05, | |
| "loss": 3.6472, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.436597110754414, | |
| "grad_norm": 0.8347837328910828, | |
| "learning_rate": 2.1816851753197032e-05, | |
| "loss": 3.561, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.5650080256821832, | |
| "grad_norm": 1.0345772504806519, | |
| "learning_rate": 1.339745962155613e-05, | |
| "loss": 3.6356, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.693418940609952, | |
| "grad_norm": 0.960090160369873, | |
| "learning_rate": 6.9126251355795864e-06, | |
| "loss": 3.5557, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.821829855537721, | |
| "grad_norm": 0.7885724902153015, | |
| "learning_rate": 2.5072087818176382e-06, | |
| "loss": 3.638, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.9502407704654896, | |
| "grad_norm": 0.8719391226768494, | |
| "learning_rate": 2.7962028188198706e-07, | |
| "loss": 3.5913, | |
| "step": 230 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 234, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 248838271008768.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |