afrolid_mega / trainer_state.json
14kwonss's picture
Upload folder using huggingface_hub
eeaf3a3 verified
{
"best_global_step": 5000,
"best_metric": 0.9754972535633911,
"best_model_checkpoint": "/home/skwon01/scratch/afroscope_may/fine_tuned_models/afrolid_mega/checkpoint-5000",
"epoch": 25.0,
"eval_steps": 5000,
"global_step": 7025,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.35587188612099646,
"grad_norm": 0.9373229742050171,
"learning_rate": 1.9718149466192173e-05,
"loss": 5.1026,
"step": 100
},
{
"epoch": 0.7117437722419929,
"grad_norm": 0.9601100087165833,
"learning_rate": 1.9433451957295375e-05,
"loss": 2.8606,
"step": 200
},
{
"epoch": 1.0676156583629894,
"grad_norm": 0.8142226338386536,
"learning_rate": 1.9148754448398578e-05,
"loss": 1.6358,
"step": 300
},
{
"epoch": 1.4234875444839858,
"grad_norm": 0.6562248468399048,
"learning_rate": 1.886405693950178e-05,
"loss": 0.9659,
"step": 400
},
{
"epoch": 1.7793594306049823,
"grad_norm": 0.807991087436676,
"learning_rate": 1.8579359430604986e-05,
"loss": 0.6199,
"step": 500
},
{
"epoch": 2.135231316725979,
"grad_norm": 0.6347224116325378,
"learning_rate": 1.8294661921708185e-05,
"loss": 0.4351,
"step": 600
},
{
"epoch": 2.491103202846975,
"grad_norm": 0.7022924423217773,
"learning_rate": 1.800996441281139e-05,
"loss": 0.3276,
"step": 700
},
{
"epoch": 2.8469750889679717,
"grad_norm": 0.7419522404670715,
"learning_rate": 1.7725266903914593e-05,
"loss": 0.2658,
"step": 800
},
{
"epoch": 3.202846975088968,
"grad_norm": 0.6704352498054504,
"learning_rate": 1.7440569395017795e-05,
"loss": 0.2184,
"step": 900
},
{
"epoch": 3.5587188612099645,
"grad_norm": 0.7129318714141846,
"learning_rate": 1.7155871886120997e-05,
"loss": 0.187,
"step": 1000
},
{
"epoch": 3.914590747330961,
"grad_norm": 0.7012542486190796,
"learning_rate": 1.68711743772242e-05,
"loss": 0.166,
"step": 1100
},
{
"epoch": 4.270462633451958,
"grad_norm": 0.5381819009780884,
"learning_rate": 1.6586476868327405e-05,
"loss": 0.1438,
"step": 1200
},
{
"epoch": 4.6263345195729535,
"grad_norm": 0.6322551965713501,
"learning_rate": 1.6301779359430608e-05,
"loss": 0.1307,
"step": 1300
},
{
"epoch": 4.98220640569395,
"grad_norm": 0.598932147026062,
"learning_rate": 1.601708185053381e-05,
"loss": 0.1201,
"step": 1400
},
{
"epoch": 5.338078291814947,
"grad_norm": 0.5444994568824768,
"learning_rate": 1.5732384341637012e-05,
"loss": 0.1057,
"step": 1500
},
{
"epoch": 5.693950177935943,
"grad_norm": 0.5283308029174805,
"learning_rate": 1.5447686832740214e-05,
"loss": 0.1001,
"step": 1600
},
{
"epoch": 6.049822064056939,
"grad_norm": 0.5698280930519104,
"learning_rate": 1.5162989323843418e-05,
"loss": 0.0933,
"step": 1700
},
{
"epoch": 6.405693950177936,
"grad_norm": 0.5436670780181885,
"learning_rate": 1.4878291814946619e-05,
"loss": 0.0826,
"step": 1800
},
{
"epoch": 6.761565836298932,
"grad_norm": 0.4918835461139679,
"learning_rate": 1.4593594306049823e-05,
"loss": 0.0808,
"step": 1900
},
{
"epoch": 7.117437722419929,
"grad_norm": 0.425703763961792,
"learning_rate": 1.4308896797153027e-05,
"loss": 0.0757,
"step": 2000
},
{
"epoch": 7.473309608540926,
"grad_norm": 0.5279386043548584,
"learning_rate": 1.4024199288256228e-05,
"loss": 0.0689,
"step": 2100
},
{
"epoch": 7.829181494661921,
"grad_norm": 0.5598633289337158,
"learning_rate": 1.3739501779359432e-05,
"loss": 0.0666,
"step": 2200
},
{
"epoch": 8.185053380782918,
"grad_norm": 0.4975087642669678,
"learning_rate": 1.3454804270462634e-05,
"loss": 0.0621,
"step": 2300
},
{
"epoch": 8.540925266903916,
"grad_norm": 0.4781990051269531,
"learning_rate": 1.3170106761565838e-05,
"loss": 0.0588,
"step": 2400
},
{
"epoch": 8.896797153024911,
"grad_norm": 0.5004540681838989,
"learning_rate": 1.288540925266904e-05,
"loss": 0.0569,
"step": 2500
},
{
"epoch": 9.252669039145907,
"grad_norm": 0.494505912065506,
"learning_rate": 1.2600711743772243e-05,
"loss": 0.0519,
"step": 2600
},
{
"epoch": 9.608540925266905,
"grad_norm": 0.47906264662742615,
"learning_rate": 1.2316014234875447e-05,
"loss": 0.0503,
"step": 2700
},
{
"epoch": 9.9644128113879,
"grad_norm": 0.507278323173523,
"learning_rate": 1.2031316725978647e-05,
"loss": 0.0497,
"step": 2800
},
{
"epoch": 10.320284697508896,
"grad_norm": 0.5136927366256714,
"learning_rate": 1.1746619217081851e-05,
"loss": 0.0456,
"step": 2900
},
{
"epoch": 10.676156583629894,
"grad_norm": 0.479863703250885,
"learning_rate": 1.1461921708185055e-05,
"loss": 0.0436,
"step": 3000
},
{
"epoch": 11.03202846975089,
"grad_norm": 0.4625159502029419,
"learning_rate": 1.1177224199288256e-05,
"loss": 0.0437,
"step": 3100
},
{
"epoch": 11.387900355871887,
"grad_norm": 0.43024319410324097,
"learning_rate": 1.089252669039146e-05,
"loss": 0.0395,
"step": 3200
},
{
"epoch": 11.743772241992882,
"grad_norm": 0.5366887450218201,
"learning_rate": 1.0607829181494662e-05,
"loss": 0.0388,
"step": 3300
},
{
"epoch": 12.099644128113878,
"grad_norm": 0.41900748014450073,
"learning_rate": 1.0323131672597866e-05,
"loss": 0.0379,
"step": 3400
},
{
"epoch": 12.455516014234876,
"grad_norm": 0.5012409090995789,
"learning_rate": 1.0038434163701067e-05,
"loss": 0.0357,
"step": 3500
},
{
"epoch": 12.811387900355871,
"grad_norm": 0.4979284405708313,
"learning_rate": 9.753736654804271e-06,
"loss": 0.0347,
"step": 3600
},
{
"epoch": 13.167259786476869,
"grad_norm": 0.39561697840690613,
"learning_rate": 9.469039145907473e-06,
"loss": 0.0332,
"step": 3700
},
{
"epoch": 13.523131672597865,
"grad_norm": 0.46909043192863464,
"learning_rate": 9.184341637010676e-06,
"loss": 0.032,
"step": 3800
},
{
"epoch": 13.87900355871886,
"grad_norm": 0.4095679521560669,
"learning_rate": 8.89964412811388e-06,
"loss": 0.0319,
"step": 3900
},
{
"epoch": 14.234875444839858,
"grad_norm": 0.45537084341049194,
"learning_rate": 8.614946619217082e-06,
"loss": 0.0301,
"step": 4000
},
{
"epoch": 14.590747330960854,
"grad_norm": 0.4314133822917938,
"learning_rate": 8.330249110320286e-06,
"loss": 0.0291,
"step": 4100
},
{
"epoch": 14.946619217081851,
"grad_norm": 0.388823002576828,
"learning_rate": 8.045551601423488e-06,
"loss": 0.0286,
"step": 4200
},
{
"epoch": 15.302491103202847,
"grad_norm": 0.45969030261039734,
"learning_rate": 7.76085409252669e-06,
"loss": 0.0268,
"step": 4300
},
{
"epoch": 15.658362989323843,
"grad_norm": 0.383735328912735,
"learning_rate": 7.476156583629895e-06,
"loss": 0.0266,
"step": 4400
},
{
"epoch": 16.01423487544484,
"grad_norm": 0.43427741527557373,
"learning_rate": 7.191459074733097e-06,
"loss": 0.0265,
"step": 4500
},
{
"epoch": 16.370106761565836,
"grad_norm": 0.3857556879520416,
"learning_rate": 6.906761565836299e-06,
"loss": 0.0243,
"step": 4600
},
{
"epoch": 16.725978647686834,
"grad_norm": 0.41817349195480347,
"learning_rate": 6.622064056939502e-06,
"loss": 0.0246,
"step": 4700
},
{
"epoch": 17.081850533807827,
"grad_norm": 0.44656312465667725,
"learning_rate": 6.337366548042705e-06,
"loss": 0.024,
"step": 4800
},
{
"epoch": 17.437722419928825,
"grad_norm": 0.38422083854675293,
"learning_rate": 6.052669039145908e-06,
"loss": 0.0227,
"step": 4900
},
{
"epoch": 17.793594306049823,
"grad_norm": 0.36347660422325134,
"learning_rate": 5.767971530249111e-06,
"loss": 0.0227,
"step": 5000
},
{
"epoch": 17.793594306049823,
"eval_f1": 0.9754972535633911,
"eval_loss": 0.088624507188797,
"eval_runtime": 53.192,
"eval_samples_per_second": 4372.145,
"eval_steps_per_second": 136.637,
"step": 5000
},
{
"epoch": 18.14946619217082,
"grad_norm": 0.3471202254295349,
"learning_rate": 5.483274021352314e-06,
"loss": 0.0219,
"step": 5100
},
{
"epoch": 18.505338078291814,
"grad_norm": 0.4116271436214447,
"learning_rate": 5.1985765124555165e-06,
"loss": 0.0215,
"step": 5200
},
{
"epoch": 18.86120996441281,
"grad_norm": 0.3756101727485657,
"learning_rate": 4.913879003558719e-06,
"loss": 0.0216,
"step": 5300
},
{
"epoch": 19.21708185053381,
"grad_norm": 0.3369296193122864,
"learning_rate": 4.629181494661922e-06,
"loss": 0.0206,
"step": 5400
},
{
"epoch": 19.572953736654803,
"grad_norm": 0.41810908913612366,
"learning_rate": 4.344483985765125e-06,
"loss": 0.0201,
"step": 5500
},
{
"epoch": 19.9288256227758,
"grad_norm": 0.45450907945632935,
"learning_rate": 4.0597864768683275e-06,
"loss": 0.0202,
"step": 5600
},
{
"epoch": 20.284697508896798,
"grad_norm": 0.3766241669654846,
"learning_rate": 3.7750889679715307e-06,
"loss": 0.0194,
"step": 5700
},
{
"epoch": 20.640569395017792,
"grad_norm": 0.3532281816005707,
"learning_rate": 3.4903914590747334e-06,
"loss": 0.0192,
"step": 5800
},
{
"epoch": 20.99644128113879,
"grad_norm": 0.34376078844070435,
"learning_rate": 3.205693950177936e-06,
"loss": 0.0189,
"step": 5900
},
{
"epoch": 21.352313167259787,
"grad_norm": 0.38847091794013977,
"learning_rate": 2.9209964412811393e-06,
"loss": 0.0184,
"step": 6000
},
{
"epoch": 21.708185053380785,
"grad_norm": 0.33618295192718506,
"learning_rate": 2.636298932384342e-06,
"loss": 0.0185,
"step": 6100
},
{
"epoch": 22.06405693950178,
"grad_norm": 0.34742456674575806,
"learning_rate": 2.351601423487545e-06,
"loss": 0.018,
"step": 6200
},
{
"epoch": 22.419928825622776,
"grad_norm": 0.28954410552978516,
"learning_rate": 2.0669039145907475e-06,
"loss": 0.0177,
"step": 6300
},
{
"epoch": 22.775800711743774,
"grad_norm": 0.3540429174900055,
"learning_rate": 1.7822064056939503e-06,
"loss": 0.0173,
"step": 6400
},
{
"epoch": 23.131672597864767,
"grad_norm": 0.2977263033390045,
"learning_rate": 1.4975088967971532e-06,
"loss": 0.0175,
"step": 6500
},
{
"epoch": 23.487544483985765,
"grad_norm": 0.3293995261192322,
"learning_rate": 1.212811387900356e-06,
"loss": 0.0168,
"step": 6600
},
{
"epoch": 23.843416370106763,
"grad_norm": 0.33407387137413025,
"learning_rate": 9.281138790035587e-07,
"loss": 0.0169,
"step": 6700
},
{
"epoch": 24.199288256227756,
"grad_norm": 0.27643245458602905,
"learning_rate": 6.434163701067617e-07,
"loss": 0.0166,
"step": 6800
},
{
"epoch": 24.555160142348754,
"grad_norm": 0.3567065894603729,
"learning_rate": 3.5871886120996446e-07,
"loss": 0.0168,
"step": 6900
},
{
"epoch": 24.91103202846975,
"grad_norm": 0.34183645248413086,
"learning_rate": 7.402135231316726e-08,
"loss": 0.0164,
"step": 7000
},
{
"epoch": 25.0,
"step": 7025,
"total_flos": 3.806537758910972e+18,
"train_loss": 0.21727693550527308,
"train_runtime": 4570.4626,
"train_samples_per_second": 12590.799,
"train_steps_per_second": 1.537
}
],
"logging_steps": 100,
"max_steps": 7025,
"num_input_tokens_seen": 0,
"num_train_epochs": 25,
"save_steps": 5000,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.806537758910972e+18,
"train_batch_size": 512,
"trial_name": null,
"trial_params": null
}