bert_omar_small_mlm / checkpoint-14000 /trainer_state.json
omarelsayeed's picture
Upload folder using huggingface_hub
b738153
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9215358931552586,
"global_step": 14000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.21,
"learning_rate": 4.652198107957708e-05,
"loss": 6.6098,
"step": 1000
},
{
"epoch": 0.21,
"eval_loss": 5.553156852722168,
"eval_runtime": 208.004,
"eval_samples_per_second": 737.207,
"eval_steps_per_second": 46.076,
"step": 1000
},
{
"epoch": 0.42,
"learning_rate": 4.304396215915415e-05,
"loss": 5.3085,
"step": 2000
},
{
"epoch": 0.42,
"eval_loss": 4.866845607757568,
"eval_runtime": 182.4795,
"eval_samples_per_second": 840.325,
"eval_steps_per_second": 52.521,
"step": 2000
},
{
"epoch": 0.63,
"learning_rate": 3.956594323873122e-05,
"loss": 4.8353,
"step": 3000
},
{
"epoch": 0.63,
"eval_loss": 4.504930019378662,
"eval_runtime": 182.5398,
"eval_samples_per_second": 840.047,
"eval_steps_per_second": 52.504,
"step": 3000
},
{
"epoch": 0.83,
"learning_rate": 3.608792431830829e-05,
"loss": 4.5338,
"step": 4000
},
{
"epoch": 0.83,
"eval_loss": 4.277353763580322,
"eval_runtime": 182.0621,
"eval_samples_per_second": 842.251,
"eval_steps_per_second": 52.641,
"step": 4000
},
{
"epoch": 1.04,
"learning_rate": 3.2609905397885366e-05,
"loss": 4.341,
"step": 5000
},
{
"epoch": 1.04,
"eval_loss": 4.1110429763793945,
"eval_runtime": 182.2031,
"eval_samples_per_second": 841.599,
"eval_steps_per_second": 52.601,
"step": 5000
},
{
"epoch": 1.25,
"learning_rate": 2.913188647746244e-05,
"loss": 4.1883,
"step": 6000
},
{
"epoch": 1.25,
"eval_loss": 3.976555109024048,
"eval_runtime": 182.3315,
"eval_samples_per_second": 841.007,
"eval_steps_per_second": 52.564,
"step": 6000
},
{
"epoch": 1.46,
"learning_rate": 2.5653867557039513e-05,
"loss": 4.0868,
"step": 7000
},
{
"epoch": 1.46,
"eval_loss": 3.8894081115722656,
"eval_runtime": 182.5114,
"eval_samples_per_second": 840.178,
"eval_steps_per_second": 52.512,
"step": 7000
},
{
"epoch": 1.67,
"learning_rate": 2.2175848636616583e-05,
"loss": 3.9926,
"step": 8000
},
{
"epoch": 1.67,
"eval_loss": 3.8076019287109375,
"eval_runtime": 181.8702,
"eval_samples_per_second": 843.14,
"eval_steps_per_second": 52.697,
"step": 8000
},
{
"epoch": 1.88,
"learning_rate": 1.8697829716193657e-05,
"loss": 3.927,
"step": 9000
},
{
"epoch": 1.88,
"eval_loss": 3.7512552738189697,
"eval_runtime": 181.1858,
"eval_samples_per_second": 846.325,
"eval_steps_per_second": 52.896,
"step": 9000
},
{
"epoch": 2.09,
"learning_rate": 1.521981079577073e-05,
"loss": 3.883,
"step": 10000
},
{
"epoch": 2.09,
"eval_loss": 3.726844549179077,
"eval_runtime": 181.8374,
"eval_samples_per_second": 843.292,
"eval_steps_per_second": 52.706,
"step": 10000
},
{
"epoch": 2.3,
"learning_rate": 1.1741791875347802e-05,
"loss": 3.8267,
"step": 11000
},
{
"epoch": 2.3,
"eval_loss": 3.6861767768859863,
"eval_runtime": 181.6704,
"eval_samples_per_second": 844.067,
"eval_steps_per_second": 52.755,
"step": 11000
},
{
"epoch": 2.5,
"learning_rate": 8.263772954924876e-06,
"loss": 3.7992,
"step": 12000
},
{
"epoch": 2.5,
"eval_loss": 3.6546273231506348,
"eval_runtime": 181.6602,
"eval_samples_per_second": 844.114,
"eval_steps_per_second": 52.758,
"step": 12000
},
{
"epoch": 2.71,
"learning_rate": 4.785754034501947e-06,
"loss": 3.7776,
"step": 13000
},
{
"epoch": 2.71,
"eval_loss": 3.6455674171447754,
"eval_runtime": 181.243,
"eval_samples_per_second": 846.057,
"eval_steps_per_second": 52.879,
"step": 13000
},
{
"epoch": 2.92,
"learning_rate": 1.3077351140790207e-06,
"loss": 3.7765,
"step": 14000
},
{
"epoch": 2.92,
"eval_loss": 3.6216413974761963,
"eval_runtime": 181.2343,
"eval_samples_per_second": 846.098,
"eval_steps_per_second": 52.882,
"step": 14000
}
],
"max_steps": 14376,
"num_train_epochs": 3,
"total_flos": 2556138160300032.0,
"trial_name": null,
"trial_params": null
}