| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 10.0, |
| "eval_steps": 500, |
| "global_step": 8860, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.1828822508584721, |
| "eval_loss": 5.422142028808594, |
| "eval_runtime": 4.713, |
| "eval_samples_per_second": 44.982, |
| "eval_steps_per_second": 1.485, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.1286681715575622, |
| "grad_norm": 11591.58203125, |
| "learning_rate": 0.0005993999999999999, |
| "loss": 1.3556, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.2204150204925599, |
| "eval_loss": 4.926900386810303, |
| "eval_runtime": 4.1827, |
| "eval_samples_per_second": 50.684, |
| "eval_steps_per_second": 1.674, |
| "step": 1772 |
| }, |
| { |
| "epoch": 2.2573363431151243, |
| "grad_norm": 11872.123046875, |
| "learning_rate": 0.0005237404580152672, |
| "loss": 1.177, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.23829524055680684, |
| "eval_loss": 4.684645652770996, |
| "eval_runtime": 4.1755, |
| "eval_samples_per_second": 50.772, |
| "eval_steps_per_second": 1.676, |
| "step": 2658 |
| }, |
| { |
| "epoch": 3.386004514672686, |
| "grad_norm": 10701.9892578125, |
| "learning_rate": 0.0004474045801526717, |
| "loss": 1.0942, |
| "step": 3000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.2529723442750065, |
| "eval_loss": 4.538271903991699, |
| "eval_runtime": 4.2082, |
| "eval_samples_per_second": 50.378, |
| "eval_steps_per_second": 1.663, |
| "step": 3544 |
| }, |
| { |
| "epoch": 4.514672686230249, |
| "grad_norm": 9611.1318359375, |
| "learning_rate": 0.0003710687022900763, |
| "loss": 1.0491, |
| "step": 4000 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.26249861536757374, |
| "eval_loss": 4.419987201690674, |
| "eval_runtime": 4.1901, |
| "eval_samples_per_second": 50.596, |
| "eval_steps_per_second": 1.671, |
| "step": 4430 |
| }, |
| { |
| "epoch": 5.643340857787811, |
| "grad_norm": 9415.6533203125, |
| "learning_rate": 0.0002947328244274809, |
| "loss": 1.0142, |
| "step": 5000 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.2718771923346749, |
| "eval_loss": 4.335880279541016, |
| "eval_runtime": 4.1941, |
| "eval_samples_per_second": 50.547, |
| "eval_steps_per_second": 1.669, |
| "step": 5316 |
| }, |
| { |
| "epoch": 6.772009029345372, |
| "grad_norm": 9572.5986328125, |
| "learning_rate": 0.0002183969465648855, |
| "loss": 0.9847, |
| "step": 6000 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.2839327253258502, |
| "eval_loss": 4.236783504486084, |
| "eval_runtime": 4.1911, |
| "eval_samples_per_second": 50.584, |
| "eval_steps_per_second": 1.67, |
| "step": 6202 |
| }, |
| { |
| "epoch": 7.900677200902934, |
| "grad_norm": 9886.6513671875, |
| "learning_rate": 0.00014206106870229005, |
| "loss": 0.9601, |
| "step": 7000 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.2898220285788133, |
| "eval_loss": 4.1695966720581055, |
| "eval_runtime": 4.1868, |
| "eval_samples_per_second": 50.635, |
| "eval_steps_per_second": 1.672, |
| "step": 7088 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.2955820994719935, |
| "eval_loss": 4.1200079917907715, |
| "eval_runtime": 4.1803, |
| "eval_samples_per_second": 50.714, |
| "eval_steps_per_second": 1.675, |
| "step": 7974 |
| }, |
| { |
| "epoch": 9.029345372460497, |
| "grad_norm": 9840.4814453125, |
| "learning_rate": 6.572519083969464e-05, |
| "loss": 0.9374, |
| "step": 8000 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.2983236716759591, |
| "eval_loss": 4.0958251953125, |
| "eval_runtime": 4.192, |
| "eval_samples_per_second": 50.572, |
| "eval_steps_per_second": 1.67, |
| "step": 8860 |
| }, |
| { |
| "epoch": 10.0, |
| "step": 8860, |
| "total_flos": 7.405800062976e+16, |
| "train_loss": 1.0569607977942472, |
| "train_runtime": 8629.2144, |
| "train_samples_per_second": 32.845, |
| "train_steps_per_second": 1.027 |
| } |
| ], |
| "logging_steps": 1000, |
| "max_steps": 8860, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.405800062976e+16, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|