| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9215358931552586, |
| "global_step": 14000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.652198107957708e-05, |
| "loss": 6.6098, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.21, |
| "eval_loss": 5.553156852722168, |
| "eval_runtime": 208.004, |
| "eval_samples_per_second": 737.207, |
| "eval_steps_per_second": 46.076, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.304396215915415e-05, |
| "loss": 5.3085, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.42, |
| "eval_loss": 4.866845607757568, |
| "eval_runtime": 182.4795, |
| "eval_samples_per_second": 840.325, |
| "eval_steps_per_second": 52.521, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 3.956594323873122e-05, |
| "loss": 4.8353, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.63, |
| "eval_loss": 4.504930019378662, |
| "eval_runtime": 182.5398, |
| "eval_samples_per_second": 840.047, |
| "eval_steps_per_second": 52.504, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 3.608792431830829e-05, |
| "loss": 4.5338, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.83, |
| "eval_loss": 4.277353763580322, |
| "eval_runtime": 182.0621, |
| "eval_samples_per_second": 842.251, |
| "eval_steps_per_second": 52.641, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 3.2609905397885366e-05, |
| "loss": 4.341, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.04, |
| "eval_loss": 4.1110429763793945, |
| "eval_runtime": 182.2031, |
| "eval_samples_per_second": 841.599, |
| "eval_steps_per_second": 52.601, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 2.913188647746244e-05, |
| "loss": 4.1883, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.25, |
| "eval_loss": 3.976555109024048, |
| "eval_runtime": 182.3315, |
| "eval_samples_per_second": 841.007, |
| "eval_steps_per_second": 52.564, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 2.5653867557039513e-05, |
| "loss": 4.0868, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.46, |
| "eval_loss": 3.8894081115722656, |
| "eval_runtime": 182.5114, |
| "eval_samples_per_second": 840.178, |
| "eval_steps_per_second": 52.512, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 2.2175848636616583e-05, |
| "loss": 3.9926, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.67, |
| "eval_loss": 3.8076019287109375, |
| "eval_runtime": 181.8702, |
| "eval_samples_per_second": 843.14, |
| "eval_steps_per_second": 52.697, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 1.8697829716193657e-05, |
| "loss": 3.927, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.88, |
| "eval_loss": 3.7512552738189697, |
| "eval_runtime": 181.1858, |
| "eval_samples_per_second": 846.325, |
| "eval_steps_per_second": 52.896, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 1.521981079577073e-05, |
| "loss": 3.883, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.09, |
| "eval_loss": 3.726844549179077, |
| "eval_runtime": 181.8374, |
| "eval_samples_per_second": 843.292, |
| "eval_steps_per_second": 52.706, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 1.1741791875347802e-05, |
| "loss": 3.8267, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.3, |
| "eval_loss": 3.6861767768859863, |
| "eval_runtime": 181.6704, |
| "eval_samples_per_second": 844.067, |
| "eval_steps_per_second": 52.755, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 8.263772954924876e-06, |
| "loss": 3.7992, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.5, |
| "eval_loss": 3.6546273231506348, |
| "eval_runtime": 181.6602, |
| "eval_samples_per_second": 844.114, |
| "eval_steps_per_second": 52.758, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 4.785754034501947e-06, |
| "loss": 3.7776, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.71, |
| "eval_loss": 3.6455674171447754, |
| "eval_runtime": 181.243, |
| "eval_samples_per_second": 846.057, |
| "eval_steps_per_second": 52.879, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 1.3077351140790207e-06, |
| "loss": 3.7765, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.92, |
| "eval_loss": 3.6216413974761963, |
| "eval_runtime": 181.2343, |
| "eval_samples_per_second": 846.098, |
| "eval_steps_per_second": 52.882, |
| "step": 14000 |
| } |
| ], |
| "max_steps": 14376, |
| "num_train_epochs": 3, |
| "total_flos": 2556138160300032.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|