| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 100, | |
| "global_step": 258, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.11627906976744186, | |
| "grad_norm": 10.613214492797852, | |
| "learning_rate": 4.0000000000000003e-07, | |
| "loss": 0.6914, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.23255813953488372, | |
| "grad_norm": 13.879897117614746, | |
| "learning_rate": 8.000000000000001e-07, | |
| "loss": 0.6682, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.3488372093023256, | |
| "grad_norm": 12.199045181274414, | |
| "learning_rate": 1.2000000000000002e-06, | |
| "loss": 0.6228, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.46511627906976744, | |
| "grad_norm": 10.077858924865723, | |
| "learning_rate": 1.6000000000000001e-06, | |
| "loss": 0.5581, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5813953488372093, | |
| "grad_norm": 9.077909469604492, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.4533, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.6976744186046512, | |
| "grad_norm": 12.368229866027832, | |
| "learning_rate": 2.4000000000000003e-06, | |
| "loss": 0.3712, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.813953488372093, | |
| "grad_norm": 8.568652153015137, | |
| "learning_rate": 2.8000000000000003e-06, | |
| "loss": 0.2595, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.9302325581395349, | |
| "grad_norm": 4.570730209350586, | |
| "learning_rate": 3.2000000000000003e-06, | |
| "loss": 0.1839, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.0465116279069768, | |
| "grad_norm": 7.3009562492370605, | |
| "learning_rate": 3.6000000000000003e-06, | |
| "loss": 0.1171, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.1627906976744187, | |
| "grad_norm": 17.301179885864258, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.1089, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.1627906976744187, | |
| "eval_accuracy": 0.9883720930232558, | |
| "eval_accuracy_label_clickbait": 0.9827586206896551, | |
| "eval_accuracy_label_factual": 0.9941176470588236, | |
| "eval_f1": 0.9883724860836951, | |
| "eval_loss": 0.06170095503330231, | |
| "eval_precision": 0.9884396971335857, | |
| "eval_recall": 0.9883720930232558, | |
| "eval_runtime": 0.2786, | |
| "eval_samples_per_second": 1234.948, | |
| "eval_steps_per_second": 78.979, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.2790697674418605, | |
| "grad_norm": 9.898296356201172, | |
| "learning_rate": 4.4e-06, | |
| "loss": 0.0528, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.3953488372093024, | |
| "grad_norm": 0.6471260190010071, | |
| "learning_rate": 4.800000000000001e-06, | |
| "loss": 0.0235, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.5116279069767442, | |
| "grad_norm": 0.5717675089836121, | |
| "learning_rate": 5.2e-06, | |
| "loss": 0.0456, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.627906976744186, | |
| "grad_norm": 0.4758104085922241, | |
| "learning_rate": 5.600000000000001e-06, | |
| "loss": 0.0329, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.744186046511628, | |
| "grad_norm": 0.17176453769207, | |
| "learning_rate": 6e-06, | |
| "loss": 0.0422, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.8604651162790697, | |
| "grad_norm": 0.1672956943511963, | |
| "learning_rate": 6.4000000000000006e-06, | |
| "loss": 0.0325, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.9767441860465116, | |
| "grad_norm": 8.929553031921387, | |
| "learning_rate": 6.800000000000001e-06, | |
| "loss": 0.0577, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.0930232558139537, | |
| "grad_norm": 6.927422523498535, | |
| "learning_rate": 7.2000000000000005e-06, | |
| "loss": 0.0329, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.2093023255813953, | |
| "grad_norm": 101.96356964111328, | |
| "learning_rate": 7.600000000000001e-06, | |
| "loss": 0.0076, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.3255813953488373, | |
| "grad_norm": 0.08046901226043701, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.0118, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.3255813953488373, | |
| "eval_accuracy": 0.997093023255814, | |
| "eval_accuracy_label_clickbait": 0.9942528735632183, | |
| "eval_accuracy_label_factual": 1.0, | |
| "eval_f1": 0.9970930969577605, | |
| "eval_loss": 0.009262952022254467, | |
| "eval_precision": 0.9971100231198151, | |
| "eval_recall": 0.997093023255814, | |
| "eval_runtime": 0.2762, | |
| "eval_samples_per_second": 1245.451, | |
| "eval_steps_per_second": 79.651, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.441860465116279, | |
| "grad_norm": 8.304059028625488, | |
| "learning_rate": 8.400000000000001e-06, | |
| "loss": 0.0752, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.558139534883721, | |
| "grad_norm": 0.26231056451797485, | |
| "learning_rate": 8.8e-06, | |
| "loss": 0.0019, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.6744186046511627, | |
| "grad_norm": 0.04355992376804352, | |
| "learning_rate": 9.200000000000002e-06, | |
| "loss": 0.0076, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.7906976744186047, | |
| "grad_norm": 27.33185386657715, | |
| "learning_rate": 9.600000000000001e-06, | |
| "loss": 0.029, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.9069767441860463, | |
| "grad_norm": 0.038574591279029846, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0015, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 258, | |
| "total_flos": 11163167404740.0, | |
| "train_loss": 0.1740486863350799, | |
| "train_runtime": 20.4599, | |
| "train_samples_per_second": 403.08, | |
| "train_steps_per_second": 12.61 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 1.0, | |
| "eval_accuracy_label_clickbait": 1.0, | |
| "eval_accuracy_label_factual": 1.0, | |
| "eval_f1": 1.0, | |
| "eval_loss": 0.001034915097989142, | |
| "eval_precision": 1.0, | |
| "eval_recall": 1.0, | |
| "eval_runtime": 0.2775, | |
| "eval_samples_per_second": 1239.448, | |
| "eval_steps_per_second": 79.267, | |
| "step": 258 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 258, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 11163167404740.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |