Ali Assi
Upload folder using huggingface_hub
ec14de0 verified
{
"best_global_step": 2124,
"best_metric": 0.5742166755177908,
"best_model_checkpoint": "./bert-lora-newsgroups/checkpoint-2124",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 2124,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14124293785310735,
"grad_norm": 4.619589805603027,
"learning_rate": 0.00019067796610169492,
"loss": 2.9563,
"step": 100
},
{
"epoch": 0.2824858757062147,
"grad_norm": 4.369770526885986,
"learning_rate": 0.0001812617702448211,
"loss": 2.4516,
"step": 200
},
{
"epoch": 0.423728813559322,
"grad_norm": 4.454040050506592,
"learning_rate": 0.00017184557438794729,
"loss": 2.1271,
"step": 300
},
{
"epoch": 0.5649717514124294,
"grad_norm": 5.155438423156738,
"learning_rate": 0.00016242937853107344,
"loss": 1.9125,
"step": 400
},
{
"epoch": 0.7062146892655368,
"grad_norm": 4.892629623413086,
"learning_rate": 0.00015301318267419963,
"loss": 1.7896,
"step": 500
},
{
"epoch": 0.847457627118644,
"grad_norm": 4.983877658843994,
"learning_rate": 0.0001435969868173258,
"loss": 1.6968,
"step": 600
},
{
"epoch": 0.9887005649717514,
"grad_norm": 8.334493637084961,
"learning_rate": 0.00013418079096045197,
"loss": 1.5862,
"step": 700
},
{
"epoch": 1.0,
"eval_accuracy": 0.47357939458311205,
"eval_loss": 1.6120948791503906,
"eval_runtime": 59.1685,
"eval_samples_per_second": 127.297,
"eval_steps_per_second": 7.96,
"step": 708
},
{
"epoch": 1.1299435028248588,
"grad_norm": 6.713998794555664,
"learning_rate": 0.00012476459510357815,
"loss": 1.5267,
"step": 800
},
{
"epoch": 1.271186440677966,
"grad_norm": 4.822694778442383,
"learning_rate": 0.00011534839924670434,
"loss": 1.4934,
"step": 900
},
{
"epoch": 1.4124293785310735,
"grad_norm": 4.339609146118164,
"learning_rate": 0.00010593220338983052,
"loss": 1.4728,
"step": 1000
},
{
"epoch": 1.5536723163841808,
"grad_norm": 3.8593039512634277,
"learning_rate": 9.651600753295669e-05,
"loss": 1.4145,
"step": 1100
},
{
"epoch": 1.694915254237288,
"grad_norm": 4.826875686645508,
"learning_rate": 8.709981167608286e-05,
"loss": 1.3815,
"step": 1200
},
{
"epoch": 1.8361581920903953,
"grad_norm": 4.669344902038574,
"learning_rate": 7.768361581920904e-05,
"loss": 1.4495,
"step": 1300
},
{
"epoch": 1.9774011299435028,
"grad_norm": 4.768439769744873,
"learning_rate": 6.826741996233523e-05,
"loss": 1.3959,
"step": 1400
},
{
"epoch": 2.0,
"eval_accuracy": 0.54182156133829,
"eval_loss": 1.4169427156448364,
"eval_runtime": 59.3986,
"eval_samples_per_second": 126.804,
"eval_steps_per_second": 7.929,
"step": 1416
},
{
"epoch": 2.1186440677966103,
"grad_norm": 3.956120491027832,
"learning_rate": 5.88512241054614e-05,
"loss": 1.3278,
"step": 1500
},
{
"epoch": 2.2598870056497176,
"grad_norm": 4.364845275878906,
"learning_rate": 4.9435028248587575e-05,
"loss": 1.3065,
"step": 1600
},
{
"epoch": 2.401129943502825,
"grad_norm": 7.486156463623047,
"learning_rate": 4.001883239171375e-05,
"loss": 1.305,
"step": 1700
},
{
"epoch": 2.542372881355932,
"grad_norm": 5.2779693603515625,
"learning_rate": 3.060263653483992e-05,
"loss": 1.2618,
"step": 1800
},
{
"epoch": 2.68361581920904,
"grad_norm": 6.177374839782715,
"learning_rate": 2.1186440677966103e-05,
"loss": 1.2691,
"step": 1900
},
{
"epoch": 2.824858757062147,
"grad_norm": 6.994251251220703,
"learning_rate": 1.1770244821092279e-05,
"loss": 1.2931,
"step": 2000
},
{
"epoch": 2.9661016949152543,
"grad_norm": 5.824560642242432,
"learning_rate": 2.3540489642184557e-06,
"loss": 1.25,
"step": 2100
},
{
"epoch": 3.0,
"eval_accuracy": 0.5742166755177908,
"eval_loss": 1.346989393234253,
"eval_runtime": 59.373,
"eval_samples_per_second": 126.859,
"eval_steps_per_second": 7.933,
"step": 2124
}
],
"logging_steps": 100,
"max_steps": 2124,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8520568027192704.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}