seed_1 / trainer_state.json
marmolpen3's picture
marmolpen3/lexglue-unfair-tos
9e0bcb7 verified
{
"best_metric": 0.9517181383210092,
"best_model_checkpoint": "logs/unfair_tos/bert-base-uncased/seed_1/checkpoint-2076",
"epoch": 6.0,
"eval_steps": 500,
"global_step": 4152,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.7225433526011561,
"grad_norm": 0.4137115776538849,
"learning_rate": 2.8916184971098265e-05,
"loss": 0.0965,
"step": 500
},
{
"epoch": 1.0,
"eval_loss": 0.03946217894554138,
"eval_macro-f1": 0.3257253069723768,
"eval_micro-f1": 0.9201138104618078,
"eval_runtime": 3.109,
"eval_samples_per_second": 731.749,
"eval_steps_per_second": 91.67,
"step": 692
},
{
"epoch": 1.4450867052023122,
"grad_norm": 0.5951205492019653,
"learning_rate": 2.7832369942196533e-05,
"loss": 0.0428,
"step": 1000
},
{
"epoch": 2.0,
"eval_loss": 0.030047794803977013,
"eval_macro-f1": 0.5948063101905734,
"eval_micro-f1": 0.926039387308534,
"eval_runtime": 3.1251,
"eval_samples_per_second": 727.972,
"eval_steps_per_second": 91.196,
"step": 1384
},
{
"epoch": 2.167630057803468,
"grad_norm": 0.040406469255685806,
"learning_rate": 2.6748554913294797e-05,
"loss": 0.0294,
"step": 1500
},
{
"epoch": 2.8901734104046244,
"grad_norm": 0.47717490792274475,
"learning_rate": 2.5664739884393065e-05,
"loss": 0.0202,
"step": 2000
},
{
"epoch": 3.0,
"eval_loss": 0.025113314390182495,
"eval_macro-f1": 0.7620096966057794,
"eval_micro-f1": 0.9517181383210092,
"eval_runtime": 3.0811,
"eval_samples_per_second": 738.38,
"eval_steps_per_second": 92.5,
"step": 2076
},
{
"epoch": 3.61271676300578,
"grad_norm": 0.00729083176702261,
"learning_rate": 2.458092485549133e-05,
"loss": 0.0136,
"step": 2500
},
{
"epoch": 4.0,
"eval_loss": 0.028495851904153824,
"eval_macro-f1": 0.7233574363382008,
"eval_micro-f1": 0.937227550130776,
"eval_runtime": 3.1146,
"eval_samples_per_second": 730.425,
"eval_steps_per_second": 91.504,
"step": 2768
},
{
"epoch": 4.335260115606936,
"grad_norm": 0.01607728749513626,
"learning_rate": 2.3497109826589597e-05,
"loss": 0.01,
"step": 3000
},
{
"epoch": 5.0,
"eval_loss": 0.030019745230674744,
"eval_macro-f1": 0.7251725230841095,
"eval_micro-f1": 0.9452084697664266,
"eval_runtime": 3.0696,
"eval_samples_per_second": 741.132,
"eval_steps_per_second": 92.845,
"step": 3460
},
{
"epoch": 5.057803468208093,
"grad_norm": 0.006081081461161375,
"learning_rate": 2.2413294797687862e-05,
"loss": 0.0087,
"step": 3500
},
{
"epoch": 5.780346820809249,
"grad_norm": 4.081294536590576,
"learning_rate": 2.132947976878613e-05,
"loss": 0.0068,
"step": 4000
},
{
"epoch": 6.0,
"eval_loss": 0.02861410565674305,
"eval_macro-f1": 0.7558502112595478,
"eval_micro-f1": 0.9500763025942882,
"eval_runtime": 3.0792,
"eval_samples_per_second": 738.833,
"eval_steps_per_second": 92.557,
"step": 4152
},
{
"epoch": 6.0,
"step": 4152,
"total_flos": 2183413154955264.0,
"train_loss": 0.027697743765421227,
"train_runtime": 253.8343,
"train_samples_per_second": 435.875,
"train_steps_per_second": 54.524
}
],
"logging_steps": 500,
"max_steps": 13840,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 2183413154955264.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}