job-ner-deberta / checkpoint-5000 /trainer_state.json
Shrav20's picture
Upload folder using huggingface_hub
1211eca verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.962085308056872,
"eval_steps": 500,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02962085308056872,
"grad_norm": 0.0359644778072834,
"learning_rate": 4.951619273301738e-05,
"loss": 0.3566,
"step": 50
},
{
"epoch": 0.05924170616113744,
"grad_norm": 0.045561712235212326,
"learning_rate": 4.902251184834124e-05,
"loss": 0.0019,
"step": 100
},
{
"epoch": 0.08886255924170616,
"grad_norm": 0.011206220835447311,
"learning_rate": 4.852883096366509e-05,
"loss": 0.0017,
"step": 150
},
{
"epoch": 0.11848341232227488,
"grad_norm": 0.008807787671685219,
"learning_rate": 4.8035150078988947e-05,
"loss": 0.0006,
"step": 200
},
{
"epoch": 0.1481042654028436,
"grad_norm": 0.005796543322503567,
"learning_rate": 4.75414691943128e-05,
"loss": 0.0009,
"step": 250
},
{
"epoch": 0.17772511848341233,
"grad_norm": 0.005227432586252689,
"learning_rate": 4.7047788309636656e-05,
"loss": 0.0006,
"step": 300
},
{
"epoch": 0.20734597156398105,
"grad_norm": 0.003554289462044835,
"learning_rate": 4.655410742496051e-05,
"loss": 0.0005,
"step": 350
},
{
"epoch": 0.23696682464454977,
"grad_norm": 0.0033741986844688654,
"learning_rate": 4.6060426540284365e-05,
"loss": 0.0012,
"step": 400
},
{
"epoch": 0.2665876777251185,
"grad_norm": 0.002832002704963088,
"learning_rate": 4.556674565560822e-05,
"loss": 0.0002,
"step": 450
},
{
"epoch": 0.2962085308056872,
"grad_norm": 0.002275377744808793,
"learning_rate": 4.5073064770932074e-05,
"loss": 0.0007,
"step": 500
},
{
"epoch": 0.32582938388625593,
"grad_norm": 0.003911971114575863,
"learning_rate": 4.457938388625593e-05,
"loss": 0.0006,
"step": 550
},
{
"epoch": 0.35545023696682465,
"grad_norm": 0.006926015485078096,
"learning_rate": 4.408570300157978e-05,
"loss": 0.0014,
"step": 600
},
{
"epoch": 0.38507109004739337,
"grad_norm": 0.0027338722720742226,
"learning_rate": 4.359202211690364e-05,
"loss": 0.0008,
"step": 650
},
{
"epoch": 0.4146919431279621,
"grad_norm": 0.001827694708481431,
"learning_rate": 4.309834123222749e-05,
"loss": 0.0003,
"step": 700
},
{
"epoch": 0.4443127962085308,
"grad_norm": 0.0012653936864808202,
"learning_rate": 4.2604660347551346e-05,
"loss": 0.0001,
"step": 750
},
{
"epoch": 0.47393364928909953,
"grad_norm": 0.0022165332920849323,
"learning_rate": 4.21109794628752e-05,
"loss": 0.0004,
"step": 800
},
{
"epoch": 0.5035545023696683,
"grad_norm": 0.0063213687390089035,
"learning_rate": 4.1617298578199055e-05,
"loss": 0.0007,
"step": 850
},
{
"epoch": 0.533175355450237,
"grad_norm": 0.024515969678759575,
"learning_rate": 4.112361769352291e-05,
"loss": 0.001,
"step": 900
},
{
"epoch": 0.5627962085308057,
"grad_norm": 0.003376233857125044,
"learning_rate": 4.0629936808846765e-05,
"loss": 0.0005,
"step": 950
},
{
"epoch": 0.5924170616113744,
"grad_norm": 0.001513678696937859,
"learning_rate": 4.013625592417062e-05,
"loss": 0.0002,
"step": 1000
},
{
"epoch": 0.6220379146919431,
"grad_norm": 0.013340185396373272,
"learning_rate": 3.9642575039494474e-05,
"loss": 0.0004,
"step": 1050
},
{
"epoch": 0.6516587677725119,
"grad_norm": 0.0008107981411740184,
"learning_rate": 3.914889415481833e-05,
"loss": 0.0001,
"step": 1100
},
{
"epoch": 0.6812796208530806,
"grad_norm": 0.000826548261102289,
"learning_rate": 3.865521327014218e-05,
"loss": 0.0,
"step": 1150
},
{
"epoch": 0.7109004739336493,
"grad_norm": 0.0015754875494167209,
"learning_rate": 3.816153238546604e-05,
"loss": 0.0003,
"step": 1200
},
{
"epoch": 0.740521327014218,
"grad_norm": 0.0010875407606363297,
"learning_rate": 3.766785150078989e-05,
"loss": 0.0001,
"step": 1250
},
{
"epoch": 0.7701421800947867,
"grad_norm": 0.0013485795352607965,
"learning_rate": 3.7174170616113746e-05,
"loss": 0.0002,
"step": 1300
},
{
"epoch": 0.7997630331753555,
"grad_norm": 0.002195018110796809,
"learning_rate": 3.66804897314376e-05,
"loss": 0.0008,
"step": 1350
},
{
"epoch": 0.8293838862559242,
"grad_norm": 0.0013226654846221209,
"learning_rate": 3.6186808846761455e-05,
"loss": 0.0004,
"step": 1400
},
{
"epoch": 0.8590047393364929,
"grad_norm": 0.020256407558918,
"learning_rate": 3.569312796208531e-05,
"loss": 0.0011,
"step": 1450
},
{
"epoch": 0.8886255924170616,
"grad_norm": 0.0060112737119197845,
"learning_rate": 3.5199447077409164e-05,
"loss": 0.0003,
"step": 1500
},
{
"epoch": 0.9182464454976303,
"grad_norm": 0.0010298583656549454,
"learning_rate": 3.470576619273302e-05,
"loss": 0.0002,
"step": 1550
},
{
"epoch": 0.9478672985781991,
"grad_norm": 0.0008804717799648643,
"learning_rate": 3.4212085308056873e-05,
"loss": 0.0003,
"step": 1600
},
{
"epoch": 0.9774881516587678,
"grad_norm": 0.0007368926890194416,
"learning_rate": 3.371840442338073e-05,
"loss": 0.0002,
"step": 1650
},
{
"epoch": 1.0,
"eval_loss": 6.839788693469018e-05,
"eval_report": " precision recall f1-score support\n\nCERTIFICATION 1.00 1.00 1.00 1410\n EDUCATION 1.00 1.00 1.00 2241\n LANGUAGE 1.00 1.00 1.00 3014\n SKILL 1.00 1.00 1.00 3069\n\n micro avg 1.00 1.00 1.00 9734\n macro avg 1.00 1.00 1.00 9734\n weighted avg 1.00 1.00 1.00 9734\n",
"eval_runtime": 7.1833,
"eval_samples_per_second": 208.818,
"eval_steps_per_second": 26.172,
"step": 1688
},
{
"epoch": 1.0071090047393365,
"grad_norm": 0.0006786159938201308,
"learning_rate": 3.322472353870458e-05,
"loss": 0.0002,
"step": 1700
},
{
"epoch": 1.0367298578199051,
"grad_norm": 0.005513947457075119,
"learning_rate": 3.273104265402844e-05,
"loss": 0.0001,
"step": 1750
},
{
"epoch": 1.066350710900474,
"grad_norm": 0.000574503734242171,
"learning_rate": 3.223736176935229e-05,
"loss": 0.0001,
"step": 1800
},
{
"epoch": 1.0959715639810426,
"grad_norm": 0.004586311522871256,
"learning_rate": 3.1743680884676146e-05,
"loss": 0.0006,
"step": 1850
},
{
"epoch": 1.1255924170616114,
"grad_norm": 0.000738324539270252,
"learning_rate": 3.125e-05,
"loss": 0.0002,
"step": 1900
},
{
"epoch": 1.15521327014218,
"grad_norm": 0.0006399003323167562,
"learning_rate": 3.0756319115323855e-05,
"loss": 0.0001,
"step": 1950
},
{
"epoch": 1.1848341232227488,
"grad_norm": 0.0004596344952005893,
"learning_rate": 3.026263823064771e-05,
"loss": 0.0,
"step": 2000
},
{
"epoch": 1.2144549763033174,
"grad_norm": 0.0004429569817148149,
"learning_rate": 2.9768957345971564e-05,
"loss": 0.0001,
"step": 2050
},
{
"epoch": 1.2440758293838863,
"grad_norm": 0.0004196607042104006,
"learning_rate": 2.927527646129542e-05,
"loss": 0.0,
"step": 2100
},
{
"epoch": 1.2736966824644549,
"grad_norm": 0.0004485426179599017,
"learning_rate": 2.8781595576619273e-05,
"loss": 0.0,
"step": 2150
},
{
"epoch": 1.3033175355450237,
"grad_norm": 0.0005169134237803519,
"learning_rate": 2.8287914691943128e-05,
"loss": 0.0001,
"step": 2200
},
{
"epoch": 1.3329383886255926,
"grad_norm": 0.0005291880224831402,
"learning_rate": 2.7794233807266982e-05,
"loss": 0.0002,
"step": 2250
},
{
"epoch": 1.3625592417061612,
"grad_norm": 0.00048425907152704895,
"learning_rate": 2.7300552922590837e-05,
"loss": 0.0001,
"step": 2300
},
{
"epoch": 1.3921800947867298,
"grad_norm": 0.0004068867419846356,
"learning_rate": 2.6806872037914695e-05,
"loss": 0.0,
"step": 2350
},
{
"epoch": 1.4218009478672986,
"grad_norm": 0.0003516751166898757,
"learning_rate": 2.631319115323855e-05,
"loss": 0.0,
"step": 2400
},
{
"epoch": 1.4514218009478674,
"grad_norm": 0.0003432795056141913,
"learning_rate": 2.5819510268562404e-05,
"loss": 0.0,
"step": 2450
},
{
"epoch": 1.481042654028436,
"grad_norm": 0.00032765124342404306,
"learning_rate": 2.532582938388626e-05,
"loss": 0.0,
"step": 2500
},
{
"epoch": 1.5106635071090047,
"grad_norm": 0.00030620096367783844,
"learning_rate": 2.4832148499210113e-05,
"loss": 0.0,
"step": 2550
},
{
"epoch": 1.5402843601895735,
"grad_norm": 0.00032276054844260216,
"learning_rate": 2.4338467614533967e-05,
"loss": 0.0,
"step": 2600
},
{
"epoch": 1.5699052132701423,
"grad_norm": 0.0003284791891928762,
"learning_rate": 2.3844786729857822e-05,
"loss": 0.0,
"step": 2650
},
{
"epoch": 1.599526066350711,
"grad_norm": 0.0005517126410268247,
"learning_rate": 2.3351105845181677e-05,
"loss": 0.0001,
"step": 2700
},
{
"epoch": 1.6291469194312795,
"grad_norm": 0.0004335689009167254,
"learning_rate": 2.285742496050553e-05,
"loss": 0.0,
"step": 2750
},
{
"epoch": 1.6587677725118484,
"grad_norm": 0.00938709732145071,
"learning_rate": 2.2363744075829386e-05,
"loss": 0.0011,
"step": 2800
},
{
"epoch": 1.6883886255924172,
"grad_norm": 0.002625273773446679,
"learning_rate": 2.187006319115324e-05,
"loss": 0.0,
"step": 2850
},
{
"epoch": 1.7180094786729858,
"grad_norm": 0.00048340365174226463,
"learning_rate": 2.1376382306477095e-05,
"loss": 0.0,
"step": 2900
},
{
"epoch": 1.7476303317535544,
"grad_norm": 0.0035711589735001326,
"learning_rate": 2.088270142180095e-05,
"loss": 0.0005,
"step": 2950
},
{
"epoch": 1.7772511848341233,
"grad_norm": 0.0005380721995607018,
"learning_rate": 2.0389020537124804e-05,
"loss": 0.0001,
"step": 3000
},
{
"epoch": 1.806872037914692,
"grad_norm": 0.00039379362715408206,
"learning_rate": 1.9895339652448658e-05,
"loss": 0.0,
"step": 3050
},
{
"epoch": 1.8364928909952607,
"grad_norm": 0.00031137277255766094,
"learning_rate": 1.9401658767772513e-05,
"loss": 0.0001,
"step": 3100
},
{
"epoch": 1.8661137440758293,
"grad_norm": 0.00033859844552353024,
"learning_rate": 1.8907977883096367e-05,
"loss": 0.0003,
"step": 3150
},
{
"epoch": 1.8957345971563981,
"grad_norm": 0.0003177137696184218,
"learning_rate": 1.8414296998420222e-05,
"loss": 0.0,
"step": 3200
},
{
"epoch": 1.925355450236967,
"grad_norm": 0.5252463817596436,
"learning_rate": 1.7920616113744076e-05,
"loss": 0.0001,
"step": 3250
},
{
"epoch": 1.9549763033175356,
"grad_norm": 0.000578847888391465,
"learning_rate": 1.742693522906793e-05,
"loss": 0.0002,
"step": 3300
},
{
"epoch": 1.9845971563981042,
"grad_norm": 0.00031036767177283764,
"learning_rate": 1.6933254344391785e-05,
"loss": 0.0,
"step": 3350
},
{
"epoch": 2.0,
"eval_loss": 0.0002486561133991927,
"eval_report": " precision recall f1-score support\n\nCERTIFICATION 1.00 1.00 1.00 1410\n EDUCATION 1.00 1.00 1.00 2241\n LANGUAGE 1.00 1.00 1.00 3014\n SKILL 1.00 1.00 1.00 3069\n\n micro avg 1.00 1.00 1.00 9734\n macro avg 1.00 1.00 1.00 9734\n weighted avg 1.00 1.00 1.00 9734\n",
"eval_runtime": 7.0179,
"eval_samples_per_second": 213.738,
"eval_steps_per_second": 26.788,
"step": 3376
},
{
"epoch": 2.014218009478673,
"grad_norm": 0.00038110482273623347,
"learning_rate": 1.643957345971564e-05,
"loss": 0.0002,
"step": 3400
},
{
"epoch": 2.043838862559242,
"grad_norm": 0.00030082205194048584,
"learning_rate": 1.5945892575039495e-05,
"loss": 0.0,
"step": 3450
},
{
"epoch": 2.0734597156398102,
"grad_norm": 0.00030870226328261197,
"learning_rate": 1.545221169036335e-05,
"loss": 0.0,
"step": 3500
},
{
"epoch": 2.103080568720379,
"grad_norm": 0.00023404941020999104,
"learning_rate": 1.4958530805687204e-05,
"loss": 0.0001,
"step": 3550
},
{
"epoch": 2.132701421800948,
"grad_norm": 0.00021994848793838173,
"learning_rate": 1.4464849921011058e-05,
"loss": 0.0,
"step": 3600
},
{
"epoch": 2.1623222748815167,
"grad_norm": 0.0002600239240564406,
"learning_rate": 1.3971169036334913e-05,
"loss": 0.0,
"step": 3650
},
{
"epoch": 2.191943127962085,
"grad_norm": 0.00019250177138019353,
"learning_rate": 1.3477488151658769e-05,
"loss": 0.0,
"step": 3700
},
{
"epoch": 2.221563981042654,
"grad_norm": 0.00024143581686075777,
"learning_rate": 1.2983807266982623e-05,
"loss": 0.0,
"step": 3750
},
{
"epoch": 2.251184834123223,
"grad_norm": 0.00020565264276228845,
"learning_rate": 1.2490126382306478e-05,
"loss": 0.0,
"step": 3800
},
{
"epoch": 2.2808056872037916,
"grad_norm": 0.0002789797727018595,
"learning_rate": 1.1996445497630332e-05,
"loss": 0.0002,
"step": 3850
},
{
"epoch": 2.31042654028436,
"grad_norm": 0.0007835368160158396,
"learning_rate": 1.1502764612954187e-05,
"loss": 0.0007,
"step": 3900
},
{
"epoch": 2.340047393364929,
"grad_norm": 0.0004727982450276613,
"learning_rate": 1.1009083728278042e-05,
"loss": 0.0,
"step": 3950
},
{
"epoch": 2.3696682464454977,
"grad_norm": 0.0008073291974142194,
"learning_rate": 1.0515402843601896e-05,
"loss": 0.0,
"step": 4000
},
{
"epoch": 2.3992890995260665,
"grad_norm": 0.15108434855937958,
"learning_rate": 1.002172195892575e-05,
"loss": 0.0,
"step": 4050
},
{
"epoch": 2.428909952606635,
"grad_norm": 0.00033067440381273627,
"learning_rate": 9.528041074249605e-06,
"loss": 0.0,
"step": 4100
},
{
"epoch": 2.4585308056872037,
"grad_norm": 0.019667765125632286,
"learning_rate": 9.03436018957346e-06,
"loss": 0.0,
"step": 4150
},
{
"epoch": 2.4881516587677726,
"grad_norm": 0.000259611289948225,
"learning_rate": 8.540679304897314e-06,
"loss": 0.0,
"step": 4200
},
{
"epoch": 2.5177725118483414,
"grad_norm": 0.0002708205720409751,
"learning_rate": 8.046998420221169e-06,
"loss": 0.0003,
"step": 4250
},
{
"epoch": 2.5473933649289098,
"grad_norm": 0.0002382330858381465,
"learning_rate": 7.553317535545023e-06,
"loss": 0.0,
"step": 4300
},
{
"epoch": 2.5770142180094786,
"grad_norm": 0.0002609147340990603,
"learning_rate": 7.059636650868879e-06,
"loss": 0.0,
"step": 4350
},
{
"epoch": 2.6066350710900474,
"grad_norm": 0.00025002885377034545,
"learning_rate": 6.565955766192733e-06,
"loss": 0.0,
"step": 4400
},
{
"epoch": 2.6362559241706163,
"grad_norm": 0.00023832859005779028,
"learning_rate": 6.0722748815165886e-06,
"loss": 0.0,
"step": 4450
},
{
"epoch": 2.665876777251185,
"grad_norm": 0.0002076889795716852,
"learning_rate": 5.578593996840443e-06,
"loss": 0.0,
"step": 4500
},
{
"epoch": 2.6954976303317535,
"grad_norm": 0.0002478805836290121,
"learning_rate": 5.084913112164298e-06,
"loss": 0.0,
"step": 4550
},
{
"epoch": 2.7251184834123223,
"grad_norm": 0.00021155517606530339,
"learning_rate": 4.591232227488152e-06,
"loss": 0.0,
"step": 4600
},
{
"epoch": 2.754739336492891,
"grad_norm": 0.00021754855697508901,
"learning_rate": 4.097551342812007e-06,
"loss": 0.0,
"step": 4650
},
{
"epoch": 2.7843601895734595,
"grad_norm": 0.00020183408923912793,
"learning_rate": 3.6038704581358612e-06,
"loss": 0.0,
"step": 4700
},
{
"epoch": 2.8139810426540284,
"grad_norm": 0.00022289449407253414,
"learning_rate": 3.1101895734597158e-06,
"loss": 0.0,
"step": 4750
},
{
"epoch": 2.843601895734597,
"grad_norm": 0.007142237853258848,
"learning_rate": 2.6165086887835703e-06,
"loss": 0.0,
"step": 4800
},
{
"epoch": 2.873222748815166,
"grad_norm": 0.00019705097656697035,
"learning_rate": 2.122827804107425e-06,
"loss": 0.0,
"step": 4850
},
{
"epoch": 2.902843601895735,
"grad_norm": 0.00023452220193576068,
"learning_rate": 1.6291469194312798e-06,
"loss": 0.0001,
"step": 4900
},
{
"epoch": 2.9324644549763033,
"grad_norm": 0.000205856587854214,
"learning_rate": 1.1354660347551343e-06,
"loss": 0.0,
"step": 4950
},
{
"epoch": 2.962085308056872,
"grad_norm": 0.00020924191630911082,
"learning_rate": 6.41785150078989e-07,
"loss": 0.0,
"step": 5000
}
],
"logging_steps": 50,
"max_steps": 5064,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1246789065733344.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}