| { | |
| "best_metric": 0.7792592592592592, | |
| "best_model_checkpoint": "sentiment-analysis-pp/checkpoint-4728", | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 4728, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 163.66278076171875, | |
| "learning_rate": 2.7461928934010155e-05, | |
| "loss": 0.9585, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 7.153451919555664, | |
| "learning_rate": 2.4923857868020305e-05, | |
| "loss": 0.8643, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.6766666666666666, | |
| "eval_loss": 0.7678444981575012, | |
| "eval_runtime": 42.3243, | |
| "eval_samples_per_second": 63.793, | |
| "eval_steps_per_second": 3.993, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 6.358371257781982, | |
| "learning_rate": 2.238578680203046e-05, | |
| "loss": 0.7115, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "grad_norm": 6.753957748413086, | |
| "learning_rate": 1.9847715736040607e-05, | |
| "loss": 0.6343, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.7188888888888889, | |
| "eval_loss": 0.6581271290779114, | |
| "eval_runtime": 42.3763, | |
| "eval_samples_per_second": 63.715, | |
| "eval_steps_per_second": 3.988, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 12.652050971984863, | |
| "learning_rate": 1.730964467005076e-05, | |
| "loss": 0.5684, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "grad_norm": 22.428882598876953, | |
| "learning_rate": 1.4771573604060913e-05, | |
| "loss": 0.4881, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 7.206263542175293, | |
| "learning_rate": 1.2233502538071067e-05, | |
| "loss": 0.4631, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.7648148148148148, | |
| "eval_loss": 0.6259744763374329, | |
| "eval_runtime": 42.4889, | |
| "eval_samples_per_second": 63.546, | |
| "eval_steps_per_second": 3.978, | |
| "step": 3546 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "grad_norm": 7.703592300415039, | |
| "learning_rate": 9.695431472081218e-06, | |
| "loss": 0.3736, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "grad_norm": 11.456498146057129, | |
| "learning_rate": 7.15736040609137e-06, | |
| "loss": 0.3553, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7792592592592592, | |
| "eval_loss": 0.623358964920044, | |
| "eval_runtime": 42.3946, | |
| "eval_samples_per_second": 63.687, | |
| "eval_steps_per_second": 3.986, | |
| "step": 4728 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 5910, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "total_flos": 1.60063403215968e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |