model-fiqa-epo9 / trainer_state.json
ssktora's picture
Upload folder using huggingface_hub
8c69689 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.0,
"eval_steps": 500,
"global_step": 774,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.11627906976744186,
"grad_norm": 422.98492431640625,
"learning_rate": 5.813953488372094e-07,
"loss": 13.5134,
"step": 10
},
{
"epoch": 0.23255813953488372,
"grad_norm": 320.77978515625,
"learning_rate": 1.1627906976744188e-06,
"loss": 13.8919,
"step": 20
},
{
"epoch": 0.3488372093023256,
"grad_norm": 332.9243469238281,
"learning_rate": 1.7441860465116282e-06,
"loss": 11.296,
"step": 30
},
{
"epoch": 0.46511627906976744,
"grad_norm": 228.7667999267578,
"learning_rate": 2.3255813953488376e-06,
"loss": 10.1082,
"step": 40
},
{
"epoch": 0.5813953488372093,
"grad_norm": 161.30587768554688,
"learning_rate": 2.9069767441860468e-06,
"loss": 8.3917,
"step": 50
},
{
"epoch": 0.6976744186046512,
"grad_norm": 127.99996185302734,
"learning_rate": 3.4883720930232564e-06,
"loss": 7.1075,
"step": 60
},
{
"epoch": 0.813953488372093,
"grad_norm": 140.52676391601562,
"learning_rate": 4.0697674418604655e-06,
"loss": 6.1315,
"step": 70
},
{
"epoch": 0.9302325581395349,
"grad_norm": 114.8707275390625,
"learning_rate": 4.651162790697675e-06,
"loss": 4.8287,
"step": 80
},
{
"epoch": 1.0465116279069768,
"grad_norm": 92.12864685058594,
"learning_rate": 5.232558139534885e-06,
"loss": 4.5353,
"step": 90
},
{
"epoch": 1.1627906976744187,
"grad_norm": 68.36174011230469,
"learning_rate": 5.8139534883720935e-06,
"loss": 3.5791,
"step": 100
},
{
"epoch": 1.2790697674418605,
"grad_norm": 63.35755920410156,
"learning_rate": 6.395348837209303e-06,
"loss": 3.5826,
"step": 110
},
{
"epoch": 1.3953488372093024,
"grad_norm": 55.400062561035156,
"learning_rate": 6.976744186046513e-06,
"loss": 3.3077,
"step": 120
},
{
"epoch": 1.5116279069767442,
"grad_norm": 48.623931884765625,
"learning_rate": 7.5581395348837215e-06,
"loss": 2.7043,
"step": 130
},
{
"epoch": 1.627906976744186,
"grad_norm": 38.653045654296875,
"learning_rate": 8.139534883720931e-06,
"loss": 2.5115,
"step": 140
},
{
"epoch": 1.744186046511628,
"grad_norm": 40.29408645629883,
"learning_rate": 8.72093023255814e-06,
"loss": 2.2846,
"step": 150
},
{
"epoch": 1.8604651162790697,
"grad_norm": 38.37876892089844,
"learning_rate": 9.30232558139535e-06,
"loss": 2.1131,
"step": 160
},
{
"epoch": 1.9767441860465116,
"grad_norm": 42.284358978271484,
"learning_rate": 9.883720930232558e-06,
"loss": 2.1187,
"step": 170
},
{
"epoch": 2.0930232558139537,
"grad_norm": 35.26789474487305,
"learning_rate": 9.948320413436692e-06,
"loss": 1.7909,
"step": 180
},
{
"epoch": 2.2093023255813953,
"grad_norm": 32.07147216796875,
"learning_rate": 9.883720930232558e-06,
"loss": 1.8275,
"step": 190
},
{
"epoch": 2.3255813953488373,
"grad_norm": 26.437585830688477,
"learning_rate": 9.819121447028424e-06,
"loss": 1.6012,
"step": 200
},
{
"epoch": 2.441860465116279,
"grad_norm": 23.764263153076172,
"learning_rate": 9.75452196382429e-06,
"loss": 1.4442,
"step": 210
},
{
"epoch": 2.558139534883721,
"grad_norm": 25.62763786315918,
"learning_rate": 9.689922480620156e-06,
"loss": 1.6601,
"step": 220
},
{
"epoch": 2.6744186046511627,
"grad_norm": 24.76538848876953,
"learning_rate": 9.625322997416021e-06,
"loss": 1.6132,
"step": 230
},
{
"epoch": 2.7906976744186047,
"grad_norm": 20.186899185180664,
"learning_rate": 9.560723514211887e-06,
"loss": 1.4848,
"step": 240
},
{
"epoch": 2.9069767441860463,
"grad_norm": 18.639366149902344,
"learning_rate": 9.496124031007753e-06,
"loss": 1.3203,
"step": 250
},
{
"epoch": 3.0232558139534884,
"grad_norm": 18.23370361328125,
"learning_rate": 9.431524547803619e-06,
"loss": 1.2951,
"step": 260
},
{
"epoch": 3.13953488372093,
"grad_norm": 20.976999282836914,
"learning_rate": 9.366925064599483e-06,
"loss": 1.2178,
"step": 270
},
{
"epoch": 3.255813953488372,
"grad_norm": 17.430908203125,
"learning_rate": 9.30232558139535e-06,
"loss": 1.2632,
"step": 280
},
{
"epoch": 3.3720930232558137,
"grad_norm": 19.134355545043945,
"learning_rate": 9.237726098191216e-06,
"loss": 1.1677,
"step": 290
},
{
"epoch": 3.488372093023256,
"grad_norm": 17.857439041137695,
"learning_rate": 9.173126614987082e-06,
"loss": 1.1876,
"step": 300
},
{
"epoch": 3.604651162790698,
"grad_norm": 19.010107040405273,
"learning_rate": 9.108527131782946e-06,
"loss": 1.2605,
"step": 310
},
{
"epoch": 3.7209302325581395,
"grad_norm": 20.19669532775879,
"learning_rate": 9.043927648578812e-06,
"loss": 1.0512,
"step": 320
},
{
"epoch": 3.8372093023255816,
"grad_norm": 17.26742935180664,
"learning_rate": 8.979328165374678e-06,
"loss": 1.0464,
"step": 330
},
{
"epoch": 3.953488372093023,
"grad_norm": 16.611114501953125,
"learning_rate": 8.914728682170543e-06,
"loss": 1.275,
"step": 340
},
{
"epoch": 4.069767441860465,
"grad_norm": 16.697961807250977,
"learning_rate": 8.850129198966409e-06,
"loss": 1.149,
"step": 350
},
{
"epoch": 4.186046511627907,
"grad_norm": 21.129913330078125,
"learning_rate": 8.785529715762275e-06,
"loss": 1.0761,
"step": 360
},
{
"epoch": 4.3023255813953485,
"grad_norm": 17.171480178833008,
"learning_rate": 8.72093023255814e-06,
"loss": 1.1369,
"step": 370
},
{
"epoch": 4.4186046511627906,
"grad_norm": 18.365201950073242,
"learning_rate": 8.656330749354006e-06,
"loss": 1.0853,
"step": 380
},
{
"epoch": 4.534883720930233,
"grad_norm": 17.23832130432129,
"learning_rate": 8.591731266149872e-06,
"loss": 1.2211,
"step": 390
},
{
"epoch": 4.651162790697675,
"grad_norm": 15.34086799621582,
"learning_rate": 8.527131782945736e-06,
"loss": 0.9821,
"step": 400
},
{
"epoch": 4.767441860465116,
"grad_norm": 16.993715286254883,
"learning_rate": 8.462532299741602e-06,
"loss": 0.9534,
"step": 410
},
{
"epoch": 4.883720930232558,
"grad_norm": 19.80428695678711,
"learning_rate": 8.397932816537468e-06,
"loss": 1.1195,
"step": 420
},
{
"epoch": 5.0,
"grad_norm": 16.171567916870117,
"learning_rate": 8.333333333333334e-06,
"loss": 1.0985,
"step": 430
},
{
"epoch": 5.116279069767442,
"grad_norm": 13.978007316589355,
"learning_rate": 8.2687338501292e-06,
"loss": 0.9202,
"step": 440
},
{
"epoch": 5.232558139534884,
"grad_norm": 14.796314239501953,
"learning_rate": 8.204134366925065e-06,
"loss": 0.9328,
"step": 450
},
{
"epoch": 5.348837209302325,
"grad_norm": 16.71290397644043,
"learning_rate": 8.139534883720931e-06,
"loss": 1.039,
"step": 460
},
{
"epoch": 5.465116279069767,
"grad_norm": 18.757164001464844,
"learning_rate": 8.074935400516797e-06,
"loss": 0.8807,
"step": 470
},
{
"epoch": 5.5813953488372094,
"grad_norm": 15.623830795288086,
"learning_rate": 8.010335917312663e-06,
"loss": 0.9312,
"step": 480
},
{
"epoch": 5.6976744186046515,
"grad_norm": 18.499126434326172,
"learning_rate": 7.945736434108527e-06,
"loss": 0.9762,
"step": 490
},
{
"epoch": 5.813953488372093,
"grad_norm": 16.5031795501709,
"learning_rate": 7.881136950904393e-06,
"loss": 0.8655,
"step": 500
},
{
"epoch": 5.930232558139535,
"grad_norm": 16.413860321044922,
"learning_rate": 7.81653746770026e-06,
"loss": 0.9282,
"step": 510
},
{
"epoch": 6.046511627906977,
"grad_norm": 16.479084014892578,
"learning_rate": 7.751937984496126e-06,
"loss": 0.9508,
"step": 520
},
{
"epoch": 6.162790697674419,
"grad_norm": 17.928300857543945,
"learning_rate": 7.68733850129199e-06,
"loss": 0.8915,
"step": 530
},
{
"epoch": 6.27906976744186,
"grad_norm": 15.385879516601562,
"learning_rate": 7.622739018087856e-06,
"loss": 0.9037,
"step": 540
},
{
"epoch": 6.395348837209302,
"grad_norm": 15.203673362731934,
"learning_rate": 7.5581395348837215e-06,
"loss": 0.8771,
"step": 550
},
{
"epoch": 6.511627906976744,
"grad_norm": 15.593746185302734,
"learning_rate": 7.493540051679587e-06,
"loss": 0.8693,
"step": 560
},
{
"epoch": 6.627906976744186,
"grad_norm": 14.65884017944336,
"learning_rate": 7.428940568475452e-06,
"loss": 0.7836,
"step": 570
},
{
"epoch": 6.7441860465116275,
"grad_norm": 14.073718070983887,
"learning_rate": 7.364341085271318e-06,
"loss": 0.8735,
"step": 580
},
{
"epoch": 6.8604651162790695,
"grad_norm": 14.441410064697266,
"learning_rate": 7.299741602067184e-06,
"loss": 0.8854,
"step": 590
},
{
"epoch": 6.976744186046512,
"grad_norm": 15.579970359802246,
"learning_rate": 7.23514211886305e-06,
"loss": 0.7916,
"step": 600
},
{
"epoch": 7.093023255813954,
"grad_norm": 17.380699157714844,
"learning_rate": 7.170542635658916e-06,
"loss": 0.8114,
"step": 610
},
{
"epoch": 7.209302325581396,
"grad_norm": 12.569280624389648,
"learning_rate": 7.10594315245478e-06,
"loss": 0.7023,
"step": 620
},
{
"epoch": 7.325581395348837,
"grad_norm": 13.200396537780762,
"learning_rate": 7.041343669250646e-06,
"loss": 0.816,
"step": 630
},
{
"epoch": 7.441860465116279,
"grad_norm": 14.037437438964844,
"learning_rate": 6.976744186046513e-06,
"loss": 0.7554,
"step": 640
},
{
"epoch": 7.558139534883721,
"grad_norm": 15.947734832763672,
"learning_rate": 6.9121447028423785e-06,
"loss": 0.8142,
"step": 650
},
{
"epoch": 7.674418604651163,
"grad_norm": 14.823920249938965,
"learning_rate": 6.8475452196382435e-06,
"loss": 0.8039,
"step": 660
},
{
"epoch": 7.790697674418604,
"grad_norm": 14.750404357910156,
"learning_rate": 6.782945736434109e-06,
"loss": 0.7782,
"step": 670
},
{
"epoch": 7.906976744186046,
"grad_norm": 14.9164400100708,
"learning_rate": 6.718346253229975e-06,
"loss": 0.7377,
"step": 680
},
{
"epoch": 8.023255813953488,
"grad_norm": 14.381583213806152,
"learning_rate": 6.653746770025841e-06,
"loss": 0.8922,
"step": 690
},
{
"epoch": 8.13953488372093,
"grad_norm": 18.800931930541992,
"learning_rate": 6.589147286821706e-06,
"loss": 0.7478,
"step": 700
},
{
"epoch": 8.255813953488373,
"grad_norm": 12.644633293151855,
"learning_rate": 6.5245478036175715e-06,
"loss": 0.7164,
"step": 710
},
{
"epoch": 8.372093023255815,
"grad_norm": 13.925402641296387,
"learning_rate": 6.459948320413437e-06,
"loss": 0.6666,
"step": 720
},
{
"epoch": 8.488372093023255,
"grad_norm": 17.934913635253906,
"learning_rate": 6.395348837209303e-06,
"loss": 0.7386,
"step": 730
},
{
"epoch": 8.604651162790697,
"grad_norm": 15.470941543579102,
"learning_rate": 6.330749354005169e-06,
"loss": 0.6754,
"step": 740
},
{
"epoch": 8.720930232558139,
"grad_norm": 14.682629585266113,
"learning_rate": 6.266149870801034e-06,
"loss": 0.7265,
"step": 750
},
{
"epoch": 8.837209302325581,
"grad_norm": 16.380544662475586,
"learning_rate": 6.2015503875969e-06,
"loss": 0.7417,
"step": 760
},
{
"epoch": 8.953488372093023,
"grad_norm": 15.928140640258789,
"learning_rate": 6.1369509043927654e-06,
"loss": 0.7413,
"step": 770
}
],
"logging_steps": 10,
"max_steps": 1720,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}