| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9938823529411764, | |
| "eval_steps": 500, | |
| "global_step": 132, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07529411764705882, | |
| "grad_norm": 3.4375, | |
| "learning_rate": 9.945882549823906e-06, | |
| "loss": 0.9271, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.15058823529411763, | |
| "grad_norm": 2.921875, | |
| "learning_rate": 9.619397662556434e-06, | |
| "loss": 0.9117, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.22588235294117648, | |
| "grad_norm": 3.03125, | |
| "learning_rate": 9.016037657403225e-06, | |
| "loss": 0.8815, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.30117647058823527, | |
| "grad_norm": 3.265625, | |
| "learning_rate": 8.171966420818227e-06, | |
| "loss": 0.8864, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.3764705882352941, | |
| "grad_norm": 2.765625, | |
| "learning_rate": 7.137775467151411e-06, | |
| "loss": 0.8392, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.45176470588235296, | |
| "grad_norm": 2.875, | |
| "learning_rate": 5.975451610080643e-06, | |
| "loss": 0.897, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.5270588235294118, | |
| "grad_norm": 2.828125, | |
| "learning_rate": 4.75466162836291e-06, | |
| "loss": 0.9156, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.6023529411764705, | |
| "grad_norm": 3.328125, | |
| "learning_rate": 3.5485766137276894e-06, | |
| "loss": 0.8995, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.6776470588235294, | |
| "grad_norm": 2.640625, | |
| "learning_rate": 2.429486279033892e-06, | |
| "loss": 0.8987, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.7529411764705882, | |
| "grad_norm": 3.1875, | |
| "learning_rate": 1.4644660940672628e-06, | |
| "loss": 0.9125, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.8282352941176471, | |
| "grad_norm": 2.25, | |
| "learning_rate": 7.113569499986401e-07, | |
| "loss": 0.9048, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.9035294117647059, | |
| "grad_norm": 3.546875, | |
| "learning_rate": 2.152983213389559e-07, | |
| "loss": 0.8947, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.9788235294117648, | |
| "grad_norm": 2.390625, | |
| "learning_rate": 6.022718974137976e-09, | |
| "loss": 0.8629, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.9938823529411764, | |
| "step": 132, | |
| "total_flos": 1.4402327792320512e+17, | |
| "train_loss": 0.8944851132956418, | |
| "train_runtime": 1000.6829, | |
| "train_samples_per_second": 4.247, | |
| "train_steps_per_second": 0.132 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 132, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.4402327792320512e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |