| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "global_step": 18750, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.961904761904762e-05, | |
| "loss": 2.4725, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.923809523809524e-05, | |
| "loss": 2.2746, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.885714285714286e-05, | |
| "loss": 2.2761, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 1.8476190476190478e-05, | |
| "loss": 2.1547, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.8095238095238097e-05, | |
| "loss": 2.2002, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.7714285714285717e-05, | |
| "loss": 2.1066, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.7333333333333336e-05, | |
| "loss": 2.0066, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 2.0953896045684814, | |
| "eval_runtime": 5.7038, | |
| "eval_samples_per_second": 207.582, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 1.6952380952380955e-05, | |
| "loss": 1.9183, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 1.6571428571428574e-05, | |
| "loss": 2.0477, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 1.6190476190476193e-05, | |
| "loss": 1.8964, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 1.580952380952381e-05, | |
| "loss": 1.9112, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 1.542857142857143e-05, | |
| "loss": 1.9038, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 1.5047619047619049e-05, | |
| "loss": 1.9071, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.4666666666666666e-05, | |
| "loss": 1.8629, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.4285714285714287e-05, | |
| "loss": 1.8226, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 1.9552286863327026, | |
| "eval_runtime": 5.8934, | |
| "eval_samples_per_second": 200.902, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.3904761904761905e-05, | |
| "loss": 1.851, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.3523809523809525e-05, | |
| "loss": 1.6683, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.3142857142857145e-05, | |
| "loss": 1.8138, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 1.2761904761904762e-05, | |
| "loss": 1.7491, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 1.2380952380952383e-05, | |
| "loss": 1.7464, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 1.2e-05, | |
| "loss": 1.7883, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.1619047619047621e-05, | |
| "loss": 1.7698, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 1.8933346271514893, | |
| "eval_runtime": 5.5236, | |
| "eval_samples_per_second": 214.353, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 1.1238095238095239e-05, | |
| "loss": 1.7162, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 1.0857142857142858e-05, | |
| "loss": 1.6866, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 1.0476190476190477e-05, | |
| "loss": 1.731, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 1.0095238095238096e-05, | |
| "loss": 1.6949, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 9.714285714285715e-06, | |
| "loss": 1.5885, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 9.333333333333334e-06, | |
| "loss": 1.647, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 8.952380952380953e-06, | |
| "loss": 1.6989, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 1.5514, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 1.866198182106018, | |
| "eval_runtime": 5.5702, | |
| "eval_samples_per_second": 212.558, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 8.190476190476192e-06, | |
| "loss": 1.5816, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 7.809523809523811e-06, | |
| "loss": 1.5777, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 7.428571428571429e-06, | |
| "loss": 1.4894, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 7.047619047619048e-06, | |
| "loss": 1.5155, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 1.5695, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 6.285714285714286e-06, | |
| "loss": 1.5737, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 5.904761904761905e-06, | |
| "loss": 1.4525, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 1.7822632789611816, | |
| "eval_runtime": 5.6756, | |
| "eval_samples_per_second": 208.612, | |
| "step": 18750 | |
| } | |
| ], | |
| "max_steps": 26250, | |
| "num_train_epochs": 7, | |
| "total_flos": 881661582406872.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |