sindhi-bert-base / checkpoint-5886 /trainer_state.json
hellosindh's picture
Upload folder using huggingface_hub
e0f6e92 verified
{
"best_global_step": 5886,
"best_metric": 3.5591108798980713,
"best_model_checkpoint": "sindhibert_session4/checkpoint-5886",
"epoch": 3.0,
"eval_steps": 1962,
"global_step": 5886,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05098139179199592,
"grad_norm": 4.590001106262207,
"learning_rate": 5.609065155807366e-06,
"loss": 15.86372314453125,
"step": 100
},
{
"epoch": 0.10196278358399184,
"grad_norm": 5.000253677368164,
"learning_rate": 1.1274787535410765e-05,
"loss": 15.6683056640625,
"step": 200
},
{
"epoch": 0.15294417537598776,
"grad_norm": 5.164661407470703,
"learning_rate": 1.6940509915014164e-05,
"loss": 15.58547607421875,
"step": 300
},
{
"epoch": 0.20392556716798368,
"grad_norm": 4.895200729370117,
"learning_rate": 1.999658933249201e-05,
"loss": 15.5261376953125,
"step": 400
},
{
"epoch": 0.2549069589599796,
"grad_norm": 5.010247707366943,
"learning_rate": 1.9965659596003744e-05,
"loss": 15.493291015625,
"step": 500
},
{
"epoch": 0.3058883507519755,
"grad_norm": 4.85853910446167,
"learning_rate": 1.990261043359342e-05,
"loss": 15.43971435546875,
"step": 600
},
{
"epoch": 0.35686974254397147,
"grad_norm": 4.788653373718262,
"learning_rate": 1.9807645053376055e-05,
"loss": 15.409666748046876,
"step": 700
},
{
"epoch": 0.40785113433596737,
"grad_norm": 4.742185592651367,
"learning_rate": 1.968106952977309e-05,
"loss": 15.346304931640624,
"step": 800
},
{
"epoch": 0.45883252612796327,
"grad_norm": 4.758422374725342,
"learning_rate": 1.9523291817031276e-05,
"loss": 15.344024658203125,
"step": 900
},
{
"epoch": 0.5098139179199592,
"grad_norm": 4.854381084442139,
"learning_rate": 1.933482043438185e-05,
"loss": 15.307811279296875,
"step": 1000
},
{
"epoch": 0.5607953097119551,
"grad_norm": 4.7934041023254395,
"learning_rate": 1.9116262827077703e-05,
"loss": 15.254422607421875,
"step": 1100
},
{
"epoch": 0.611776701503951,
"grad_norm": 4.670731544494629,
"learning_rate": 1.88683234085909e-05,
"loss": 15.23345703125,
"step": 1200
},
{
"epoch": 0.6627580932959469,
"grad_norm": 4.993561267852783,
"learning_rate": 1.8591801290280664e-05,
"loss": 15.2450927734375,
"step": 1300
},
{
"epoch": 0.7137394850879429,
"grad_norm": 4.720964431762695,
"learning_rate": 1.8287587705849013e-05,
"loss": 15.1839599609375,
"step": 1400
},
{
"epoch": 0.7647208768799388,
"grad_norm": 5.050419330596924,
"learning_rate": 1.7956663138885173e-05,
"loss": 15.164833984375,
"step": 1500
},
{
"epoch": 0.8157022686719347,
"grad_norm": 4.826648712158203,
"learning_rate": 1.760009416275661e-05,
"loss": 15.130496826171875,
"step": 1600
},
{
"epoch": 0.8666836604639306,
"grad_norm": 4.858438014984131,
"learning_rate": 1.721903000303185e-05,
"loss": 15.125797119140625,
"step": 1700
},
{
"epoch": 0.9176650522559265,
"grad_norm": 4.9611430168151855,
"learning_rate": 1.6814698833514326e-05,
"loss": 15.13617431640625,
"step": 1800
},
{
"epoch": 0.9686464440479226,
"grad_norm": 4.663859844207764,
"learning_rate": 1.63884038178253e-05,
"loss": 15.072591552734375,
"step": 1900
},
{
"epoch": 1.0,
"eval_loss": 3.636704444885254,
"eval_runtime": 8.0138,
"eval_samples_per_second": 632.91,
"eval_steps_per_second": 9.983,
"step": 1962
},
{
"epoch": 1.0193729288809585,
"grad_norm": 4.863068103790283,
"learning_rate": 1.5941518909293737e-05,
"loss": 14.968798828125,
"step": 2000
},
{
"epoch": 1.0703543206729544,
"grad_norm": 5.036495685577393,
"learning_rate": 1.5475484422690282e-05,
"loss": 15.0290869140625,
"step": 2100
},
{
"epoch": 1.1213357124649503,
"grad_norm": 5.248174667358398,
"learning_rate": 1.4991802392077543e-05,
"loss": 15.004036865234376,
"step": 2200
},
{
"epoch": 1.1723171042569462,
"grad_norm": 4.950564384460449,
"learning_rate": 1.4492031729738489e-05,
"loss": 15.002611083984375,
"step": 2300
},
{
"epoch": 1.2232984960489421,
"grad_norm": 4.509192943572998,
"learning_rate": 1.3977783201785732e-05,
"loss": 14.96060302734375,
"step": 2400
},
{
"epoch": 1.274279887840938,
"grad_norm": 4.900182723999023,
"learning_rate": 1.3450714236645352e-05,
"loss": 14.971297607421874,
"step": 2500
},
{
"epoch": 1.325261279632934,
"grad_norm": 5.138764381408691,
"learning_rate": 1.2912523583147625e-05,
"loss": 14.928385009765625,
"step": 2600
},
{
"epoch": 1.3762426714249298,
"grad_norm": 4.894199848175049,
"learning_rate": 1.2364945835441636e-05,
"loss": 14.938167724609375,
"step": 2700
},
{
"epoch": 1.4272240632169257,
"grad_norm": 4.8737921714782715,
"learning_rate": 1.1809745842380042e-05,
"loss": 14.923902587890625,
"step": 2800
},
{
"epoch": 1.4782054550089216,
"grad_norm": 4.8258819580078125,
"learning_rate": 1.1248713019392635e-05,
"loss": 14.89677001953125,
"step": 2900
},
{
"epoch": 1.5291868468009175,
"grad_norm": 4.769787788391113,
"learning_rate": 1.0683655581181524e-05,
"loss": 14.87692626953125,
"step": 3000
},
{
"epoch": 1.5801682385929134,
"grad_norm": 4.92316198348999,
"learning_rate": 1.0116394713826117e-05,
"loss": 14.849693603515625,
"step": 3100
},
{
"epoch": 1.6311496303849093,
"grad_norm": 4.873258590698242,
"learning_rate": 9.548758705081177e-06,
"loss": 14.833634033203126,
"step": 3200
},
{
"epoch": 1.6821310221769055,
"grad_norm": 4.738825798034668,
"learning_rate": 8.98257705178612e-06,
"loss": 14.85665283203125,
"step": 3300
},
{
"epoch": 1.7331124139689014,
"grad_norm": 4.907736778259277,
"learning_rate": 8.419674563377416e-06,
"loss": 14.8664599609375,
"step": 3400
},
{
"epoch": 1.7840938057608973,
"grad_norm": 4.977413177490234,
"learning_rate": 7.861865480508541e-06,
"loss": 14.83008056640625,
"step": 3500
},
{
"epoch": 1.8350751975528932,
"grad_norm": 4.792273044586182,
"learning_rate": 7.310947627733231e-06,
"loss": 14.81404541015625,
"step": 3600
},
{
"epoch": 1.886056589344889,
"grad_norm": 4.84648323059082,
"learning_rate": 6.768696619097996e-06,
"loss": 14.831793212890625,
"step": 3700
},
{
"epoch": 1.9370379811368852,
"grad_norm": 4.854404449462891,
"learning_rate": 6.236860135319321e-06,
"loss": 14.826976318359375,
"step": 3800
},
{
"epoch": 1.988019372928881,
"grad_norm": 4.615888595581055,
"learning_rate": 5.717152290990302e-06,
"loss": 14.767562255859374,
"step": 3900
},
{
"epoch": 2.0,
"eval_loss": 3.56946063041687,
"eval_runtime": 8.0481,
"eval_samples_per_second": 630.208,
"eval_steps_per_second": 9.94,
"step": 3924
},
{
"epoch": 2.038745857761917,
"grad_norm": 5.015805721282959,
"learning_rate": 5.211248109971254e-06,
"loss": 14.695634765625,
"step": 4000
},
{
"epoch": 2.089727249553913,
"grad_norm": 4.800245761871338,
"learning_rate": 4.720778126770141e-06,
"loss": 14.764068603515625,
"step": 4100
},
{
"epoch": 2.140708641345909,
"grad_norm": 4.756154537200928,
"learning_rate": 4.247323131312676e-06,
"loss": 14.755054931640625,
"step": 4200
},
{
"epoch": 2.191690033137905,
"grad_norm": 4.989803314208984,
"learning_rate": 3.7924090740397178e-06,
"loss": 14.760721435546875,
"step": 4300
},
{
"epoch": 2.2426714249299007,
"grad_norm": 4.568801403045654,
"learning_rate": 3.3575021477529313e-06,
"loss": 14.72455810546875,
"step": 4400
},
{
"epoch": 2.2936528167218966,
"grad_norm": 4.871072769165039,
"learning_rate": 2.944004062059924e-06,
"loss": 14.743800048828126,
"step": 4500
},
{
"epoch": 2.3446342085138925,
"grad_norm": 4.790256500244141,
"learning_rate": 2.5532475256494073e-06,
"loss": 14.7241162109375,
"step": 4600
},
{
"epoch": 2.3956156003058884,
"grad_norm": 4.770144462585449,
"learning_rate": 2.186491950957048e-06,
"loss": 14.711162109375,
"step": 4700
},
{
"epoch": 2.4465969920978843,
"grad_norm": 4.44427490234375,
"learning_rate": 1.8449193950659018e-06,
"loss": 14.72890625,
"step": 4800
},
{
"epoch": 2.49757838388988,
"grad_norm": 4.664465427398682,
"learning_rate": 1.5296307499239903e-06,
"loss": 14.713804931640626,
"step": 4900
},
{
"epoch": 2.548559775681876,
"grad_norm": 4.861291408538818,
"learning_rate": 1.2416421941579448e-06,
"loss": 14.730694580078126,
"step": 5000
},
{
"epoch": 2.599541167473872,
"grad_norm": 4.662012577056885,
"learning_rate": 9.818819179185713e-07,
"loss": 14.70477294921875,
"step": 5100
},
{
"epoch": 2.650522559265868,
"grad_norm": 4.803001403808594,
"learning_rate": 7.511871313142238e-07,
"loss": 14.7314208984375,
"step": 5200
},
{
"epoch": 2.701503951057864,
"grad_norm": 4.746646404266357,
"learning_rate": 5.503013660737899e-07,
"loss": 14.70580810546875,
"step": 5300
},
{
"epoch": 2.7524853428498597,
"grad_norm": 4.867108345031738,
"learning_rate": 3.798720791360988e-07,
"loss": 14.710306396484375,
"step": 5400
},
{
"epoch": 2.8034667346418556,
"grad_norm": 4.6949992179870605,
"learning_rate": 2.404485658893807e-07,
"loss": 14.725491943359375,
"step": 5500
},
{
"epoch": 2.8544481264338515,
"grad_norm": 4.641607284545898,
"learning_rate": 1.3248018978643695e-07,
"loss": 14.7078369140625,
"step": 5600
},
{
"epoch": 2.905429518225848,
"grad_norm": 4.756202220916748,
"learning_rate": 5.6314934041501455e-08,
"loss": 14.697396240234376,
"step": 5700
},
{
"epoch": 2.9564109100178433,
"grad_norm": 4.691574573516846,
"learning_rate": 1.2198280076668455e-08,
"loss": 14.694278564453125,
"step": 5800
},
{
"epoch": 3.0,
"eval_loss": 3.5591108798980713,
"eval_runtime": 8.0338,
"eval_samples_per_second": 631.333,
"eval_steps_per_second": 9.958,
"step": 5886
}
],
"logging_steps": 100,
"max_steps": 5886,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 1962,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.964983111028869e+17,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}