tempdef_expt2_50e / trainer_state.json
hsuvaskakoty's picture
Upload 15 files
98aa502
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 34.210215142059454,
"global_step": 19500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 4.917429577464789e-05,
"loss": 1.7834,
"step": 570
},
{
"epoch": 1.0,
"eval_loss": 0.6923606991767883,
"eval_runtime": 1521.6883,
"eval_samples_per_second": 921.873,
"eval_steps_per_second": 14.404,
"step": 570
},
{
"epoch": 2.0,
"learning_rate": 4.817077464788733e-05,
"loss": 0.6994,
"step": 1140
},
{
"epoch": 2.0,
"eval_loss": 0.6767147183418274,
"eval_runtime": 1526.5049,
"eval_samples_per_second": 918.964,
"eval_steps_per_second": 14.359,
"step": 1140
},
{
"epoch": 3.0,
"learning_rate": 4.7167253521126765e-05,
"loss": 0.654,
"step": 1710
},
{
"epoch": 3.0,
"eval_loss": 0.6709172129631042,
"eval_runtime": 1516.8574,
"eval_samples_per_second": 924.809,
"eval_steps_per_second": 14.45,
"step": 1710
},
{
"epoch": 4.0,
"learning_rate": 4.61637323943662e-05,
"loss": 0.6194,
"step": 2280
},
{
"epoch": 4.0,
"eval_loss": 0.6694589257240295,
"eval_runtime": 1517.553,
"eval_samples_per_second": 924.385,
"eval_steps_per_second": 14.444,
"step": 2280
},
{
"epoch": 5.0,
"learning_rate": 4.516021126760563e-05,
"loss": 0.5904,
"step": 2850
},
{
"epoch": 5.0,
"eval_loss": 0.6706073880195618,
"eval_runtime": 1514.7995,
"eval_samples_per_second": 926.065,
"eval_steps_per_second": 14.47,
"step": 2850
},
{
"epoch": 6.0,
"learning_rate": 4.4156690140845075e-05,
"loss": 0.5653,
"step": 3420
},
{
"epoch": 6.0,
"eval_loss": 0.6708899736404419,
"eval_runtime": 1517.3829,
"eval_samples_per_second": 924.488,
"eval_steps_per_second": 14.445,
"step": 3420
},
{
"epoch": 7.0,
"learning_rate": 4.315316901408451e-05,
"loss": 0.5431,
"step": 3990
},
{
"epoch": 7.0,
"eval_loss": 0.6717029809951782,
"eval_runtime": 1515.5706,
"eval_samples_per_second": 925.594,
"eval_steps_per_second": 14.463,
"step": 3990
},
{
"epoch": 8.0,
"learning_rate": 4.214964788732394e-05,
"loss": 0.5237,
"step": 4560
},
{
"epoch": 8.0,
"eval_loss": 0.6746511459350586,
"eval_runtime": 1515.027,
"eval_samples_per_second": 925.926,
"eval_steps_per_second": 14.468,
"step": 4560
},
{
"epoch": 9.0,
"learning_rate": 4.114612676056338e-05,
"loss": 0.5061,
"step": 5130
},
{
"epoch": 9.0,
"eval_loss": 0.6748108267784119,
"eval_runtime": 1514.6887,
"eval_samples_per_second": 926.133,
"eval_steps_per_second": 14.471,
"step": 5130
},
{
"epoch": 10.0,
"learning_rate": 4.014260563380282e-05,
"loss": 0.4901,
"step": 5700
},
{
"epoch": 10.0,
"eval_loss": 0.6780610680580139,
"eval_runtime": 1515.4454,
"eval_samples_per_second": 925.67,
"eval_steps_per_second": 14.464,
"step": 5700
},
{
"epoch": 11.0,
"learning_rate": 3.913908450704226e-05,
"loss": 0.4755,
"step": 6270
},
{
"epoch": 11.0,
"eval_loss": 0.6779971718788147,
"eval_runtime": 1517.3277,
"eval_samples_per_second": 924.522,
"eval_steps_per_second": 14.446,
"step": 6270
},
{
"epoch": 12.0,
"learning_rate": 3.813556338028169e-05,
"loss": 0.4624,
"step": 6840
},
{
"epoch": 12.0,
"eval_loss": 0.6860271096229553,
"eval_runtime": 1515.1169,
"eval_samples_per_second": 925.871,
"eval_steps_per_second": 14.467,
"step": 6840
},
{
"epoch": 13.0,
"learning_rate": 3.7132042253521126e-05,
"loss": 0.4501,
"step": 7410
},
{
"epoch": 13.0,
"eval_loss": 0.6835764646530151,
"eval_runtime": 1513.5665,
"eval_samples_per_second": 926.82,
"eval_steps_per_second": 14.482,
"step": 7410
},
{
"epoch": 14.0,
"learning_rate": 3.612852112676057e-05,
"loss": 0.4389,
"step": 7980
},
{
"epoch": 14.0,
"eval_loss": 0.6895220279693604,
"eval_runtime": 1514.0338,
"eval_samples_per_second": 926.533,
"eval_steps_per_second": 14.477,
"step": 7980
},
{
"epoch": 15.0,
"learning_rate": 3.5125e-05,
"loss": 0.4286,
"step": 8550
},
{
"epoch": 15.0,
"eval_loss": 0.6874631643295288,
"eval_runtime": 1516.075,
"eval_samples_per_second": 925.286,
"eval_steps_per_second": 14.458,
"step": 8550
},
{
"epoch": 16.0,
"learning_rate": 3.412147887323944e-05,
"loss": 0.4189,
"step": 9120
},
{
"epoch": 16.0,
"eval_loss": 0.6922647953033447,
"eval_runtime": 1516.6302,
"eval_samples_per_second": 924.947,
"eval_steps_per_second": 14.452,
"step": 9120
},
{
"epoch": 17.0,
"learning_rate": 3.3117957746478874e-05,
"loss": 0.41,
"step": 9690
},
{
"epoch": 17.0,
"eval_loss": 0.6977216005325317,
"eval_runtime": 1513.9466,
"eval_samples_per_second": 926.587,
"eval_steps_per_second": 14.478,
"step": 9690
},
{
"epoch": 18.0,
"learning_rate": 3.211443661971831e-05,
"loss": 0.4017,
"step": 10260
},
{
"epoch": 18.0,
"eval_loss": 0.705147922039032,
"eval_runtime": 1513.405,
"eval_samples_per_second": 926.918,
"eval_steps_per_second": 14.483,
"step": 10260
},
{
"epoch": 19.0,
"learning_rate": 3.111091549295775e-05,
"loss": 0.394,
"step": 10830
},
{
"epoch": 19.0,
"eval_loss": 0.6972260475158691,
"eval_runtime": 1514.848,
"eval_samples_per_second": 926.035,
"eval_steps_per_second": 14.469,
"step": 10830
},
{
"epoch": 20.0,
"learning_rate": 3.0109154929577467e-05,
"loss": 0.3868,
"step": 11400
},
{
"epoch": 20.0,
"eval_loss": 0.7031562328338623,
"eval_runtime": 1513.8461,
"eval_samples_per_second": 926.648,
"eval_steps_per_second": 14.479,
"step": 11400
},
{
"epoch": 21.0,
"learning_rate": 2.91056338028169e-05,
"loss": 0.38,
"step": 11970
},
{
"epoch": 21.0,
"eval_loss": 0.7028641104698181,
"eval_runtime": 1513.4864,
"eval_samples_per_second": 926.869,
"eval_steps_per_second": 14.482,
"step": 11970
},
{
"epoch": 22.0,
"learning_rate": 2.810211267605634e-05,
"loss": 0.3738,
"step": 12540
},
{
"epoch": 22.0,
"eval_loss": 0.7074136734008789,
"eval_runtime": 1513.6381,
"eval_samples_per_second": 926.776,
"eval_steps_per_second": 14.481,
"step": 12540
},
{
"epoch": 23.0,
"learning_rate": 2.7098591549295778e-05,
"loss": 0.3679,
"step": 13110
},
{
"epoch": 23.0,
"eval_loss": 0.7076618075370789,
"eval_runtime": 1513.3948,
"eval_samples_per_second": 926.925,
"eval_steps_per_second": 14.483,
"step": 13110
},
{
"epoch": 24.0,
"learning_rate": 2.609507042253521e-05,
"loss": 0.3623,
"step": 13680
},
{
"epoch": 24.0,
"eval_loss": 0.7148919701576233,
"eval_runtime": 1514.4065,
"eval_samples_per_second": 926.305,
"eval_steps_per_second": 14.474,
"step": 13680
},
{
"epoch": 25.0,
"learning_rate": 2.509507042253521e-05,
"loss": 0.3572,
"step": 14250
},
{
"epoch": 25.0,
"eval_loss": 0.7150377631187439,
"eval_runtime": 1514.2022,
"eval_samples_per_second": 926.43,
"eval_steps_per_second": 14.476,
"step": 14250
},
{
"epoch": 26.0,
"learning_rate": 2.409330985915493e-05,
"loss": 0.3523,
"step": 14820
},
{
"epoch": 26.0,
"eval_loss": 0.7058804035186768,
"eval_runtime": 1514.5913,
"eval_samples_per_second": 926.192,
"eval_steps_per_second": 14.472,
"step": 14820
},
{
"epoch": 27.0,
"learning_rate": 2.3089788732394367e-05,
"loss": 0.3478,
"step": 15390
},
{
"epoch": 27.0,
"eval_loss": 0.7150311470031738,
"eval_runtime": 1515.4959,
"eval_samples_per_second": 925.64,
"eval_steps_per_second": 14.463,
"step": 15390
},
{
"epoch": 28.0,
"learning_rate": 2.2086267605633804e-05,
"loss": 0.3436,
"step": 15960
},
{
"epoch": 28.0,
"eval_loss": 0.715233564376831,
"eval_runtime": 1515.1776,
"eval_samples_per_second": 925.834,
"eval_steps_per_second": 14.466,
"step": 15960
},
{
"epoch": 29.0,
"learning_rate": 2.108274647887324e-05,
"loss": 0.3396,
"step": 16530
},
{
"epoch": 29.0,
"eval_loss": 0.7200678586959839,
"eval_runtime": 1514.8194,
"eval_samples_per_second": 926.053,
"eval_steps_per_second": 14.47,
"step": 16530
},
{
"epoch": 30.0,
"learning_rate": 2.0079225352112678e-05,
"loss": 0.3358,
"step": 17100
},
{
"epoch": 30.0,
"eval_loss": 0.713365375995636,
"eval_runtime": 1513.5909,
"eval_samples_per_second": 926.805,
"eval_steps_per_second": 14.481,
"step": 17100
},
{
"epoch": 31.0,
"learning_rate": 1.9075704225352115e-05,
"loss": 0.3323,
"step": 17670
},
{
"epoch": 31.0,
"eval_loss": 0.7232212424278259,
"eval_runtime": 1515.152,
"eval_samples_per_second": 925.85,
"eval_steps_per_second": 14.467,
"step": 17670
},
{
"epoch": 32.0,
"learning_rate": 1.8072183098591548e-05,
"loss": 0.3291,
"step": 18240
},
{
"epoch": 32.0,
"eval_loss": 0.7199446558952332,
"eval_runtime": 1514.7283,
"eval_samples_per_second": 926.109,
"eval_steps_per_second": 14.471,
"step": 18240
},
{
"epoch": 33.0,
"learning_rate": 1.706866197183099e-05,
"loss": 0.3259,
"step": 18810
},
{
"epoch": 33.0,
"eval_loss": 0.7213279604911804,
"eval_runtime": 1513.6184,
"eval_samples_per_second": 926.788,
"eval_steps_per_second": 14.481,
"step": 18810
},
{
"epoch": 34.0,
"learning_rate": 1.6065140845070422e-05,
"loss": 0.3231,
"step": 19380
},
{
"epoch": 34.0,
"eval_loss": 0.7236403226852417,
"eval_runtime": 1514.5169,
"eval_samples_per_second": 926.238,
"eval_steps_per_second": 14.473,
"step": 19380
}
],
"max_steps": 28500,
"num_train_epochs": 50,
"total_flos": 3.0481579178459136e+18,
"trial_name": null,
"trial_params": null
}