eng-amh-norm / trainer_state.json
Atnafu's picture
Upload 12 files
c0e4123 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 31135,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08029548739360848,
"grad_norm": 1.0575518608093262,
"learning_rate": 4.919704512606392e-05,
"loss": 1.6978,
"step": 500
},
{
"epoch": 0.16059097478721696,
"grad_norm": 1.1808357238769531,
"learning_rate": 4.839409025212783e-05,
"loss": 1.5655,
"step": 1000
},
{
"epoch": 0.24088646218082543,
"grad_norm": 1.2457187175750732,
"learning_rate": 4.759113537819175e-05,
"loss": 1.5116,
"step": 1500
},
{
"epoch": 0.3211819495744339,
"grad_norm": 1.1846860647201538,
"learning_rate": 4.678818050425566e-05,
"loss": 1.4617,
"step": 2000
},
{
"epoch": 0.4014774369680424,
"grad_norm": 1.1566858291625977,
"learning_rate": 4.598522563031958e-05,
"loss": 1.4351,
"step": 2500
},
{
"epoch": 0.48177292436165087,
"grad_norm": 1.2010133266448975,
"learning_rate": 4.5182270756383495e-05,
"loss": 1.4215,
"step": 3000
},
{
"epoch": 0.5620684117552593,
"grad_norm": 1.0966484546661377,
"learning_rate": 4.4379315882447406e-05,
"loss": 1.3943,
"step": 3500
},
{
"epoch": 0.6423638991488678,
"grad_norm": 1.1054482460021973,
"learning_rate": 4.3576361008511324e-05,
"loss": 1.392,
"step": 4000
},
{
"epoch": 0.7226593865424763,
"grad_norm": 1.166495680809021,
"learning_rate": 4.277340613457524e-05,
"loss": 1.3607,
"step": 4500
},
{
"epoch": 0.8029548739360848,
"grad_norm": 1.6440229415893555,
"learning_rate": 4.197045126063915e-05,
"loss": 1.3453,
"step": 5000
},
{
"epoch": 0.8832503613296933,
"grad_norm": 1.1146718263626099,
"learning_rate": 4.116749638670307e-05,
"loss": 1.328,
"step": 5500
},
{
"epoch": 0.9635458487233017,
"grad_norm": 1.2365636825561523,
"learning_rate": 4.036454151276698e-05,
"loss": 1.3213,
"step": 6000
},
{
"epoch": 1.0438413361169103,
"grad_norm": 1.0937212705612183,
"learning_rate": 3.95615866388309e-05,
"loss": 1.2654,
"step": 6500
},
{
"epoch": 1.1241368235105187,
"grad_norm": 0.8828343749046326,
"learning_rate": 3.875863176489482e-05,
"loss": 1.2134,
"step": 7000
},
{
"epoch": 1.2044323109041273,
"grad_norm": 1.099165916442871,
"learning_rate": 3.795567689095873e-05,
"loss": 1.2104,
"step": 7500
},
{
"epoch": 1.2847277982977356,
"grad_norm": 1.2219111919403076,
"learning_rate": 3.7152722017022646e-05,
"loss": 1.1973,
"step": 8000
},
{
"epoch": 1.3650232856913442,
"grad_norm": 1.0750117301940918,
"learning_rate": 3.6349767143086564e-05,
"loss": 1.1923,
"step": 8500
},
{
"epoch": 1.4453187730849526,
"grad_norm": 1.098244547843933,
"learning_rate": 3.5546812269150475e-05,
"loss": 1.1925,
"step": 9000
},
{
"epoch": 1.525614260478561,
"grad_norm": 1.1637680530548096,
"learning_rate": 3.474385739521439e-05,
"loss": 1.182,
"step": 9500
},
{
"epoch": 1.6059097478721696,
"grad_norm": 1.1562321186065674,
"learning_rate": 3.3940902521278304e-05,
"loss": 1.1634,
"step": 10000
},
{
"epoch": 1.6862052352657781,
"grad_norm": 1.4565141201019287,
"learning_rate": 3.313794764734222e-05,
"loss": 1.1542,
"step": 10500
},
{
"epoch": 1.7665007226593865,
"grad_norm": 1.434606671333313,
"learning_rate": 3.233499277340614e-05,
"loss": 1.1533,
"step": 11000
},
{
"epoch": 1.8467962100529949,
"grad_norm": 1.1290115118026733,
"learning_rate": 3.153203789947005e-05,
"loss": 1.1496,
"step": 11500
},
{
"epoch": 1.9270916974466035,
"grad_norm": 1.1467580795288086,
"learning_rate": 3.072908302553397e-05,
"loss": 1.1444,
"step": 12000
},
{
"epoch": 2.007387184840212,
"grad_norm": 1.1580528020858765,
"learning_rate": 2.9926128151597882e-05,
"loss": 1.1478,
"step": 12500
},
{
"epoch": 2.0876826722338206,
"grad_norm": 1.040642261505127,
"learning_rate": 2.9123173277661797e-05,
"loss": 1.0662,
"step": 13000
},
{
"epoch": 2.167978159627429,
"grad_norm": 1.1460875272750854,
"learning_rate": 2.832021840372571e-05,
"loss": 1.0788,
"step": 13500
},
{
"epoch": 2.2482736470210374,
"grad_norm": 1.0731582641601562,
"learning_rate": 2.751726352978963e-05,
"loss": 1.0635,
"step": 14000
},
{
"epoch": 2.328569134414646,
"grad_norm": 1.1237194538116455,
"learning_rate": 2.6714308655853543e-05,
"loss": 1.065,
"step": 14500
},
{
"epoch": 2.4088646218082546,
"grad_norm": 1.0012214183807373,
"learning_rate": 2.5911353781917458e-05,
"loss": 1.0509,
"step": 15000
},
{
"epoch": 2.4891601092018627,
"grad_norm": 1.1109308004379272,
"learning_rate": 2.5108398907981372e-05,
"loss": 1.0574,
"step": 15500
},
{
"epoch": 2.5694555965954713,
"grad_norm": 1.1631648540496826,
"learning_rate": 2.430544403404529e-05,
"loss": 1.0345,
"step": 16000
},
{
"epoch": 2.64975108398908,
"grad_norm": 1.0513032674789429,
"learning_rate": 2.3502489160109204e-05,
"loss": 1.0616,
"step": 16500
},
{
"epoch": 2.7300465713826885,
"grad_norm": 1.189889669418335,
"learning_rate": 2.269953428617312e-05,
"loss": 1.0533,
"step": 17000
},
{
"epoch": 2.8103420587762966,
"grad_norm": 1.0951628684997559,
"learning_rate": 2.1896579412237033e-05,
"loss": 1.0388,
"step": 17500
},
{
"epoch": 2.890637546169905,
"grad_norm": 1.0122724771499634,
"learning_rate": 2.109362453830095e-05,
"loss": 1.0374,
"step": 18000
},
{
"epoch": 2.970933033563514,
"grad_norm": 1.1020405292510986,
"learning_rate": 2.0290669664364865e-05,
"loss": 1.0325,
"step": 18500
},
{
"epoch": 3.0512285209571224,
"grad_norm": 1.0594305992126465,
"learning_rate": 1.948771479042878e-05,
"loss": 1.0047,
"step": 19000
},
{
"epoch": 3.1315240083507305,
"grad_norm": 1.070056438446045,
"learning_rate": 1.8684759916492694e-05,
"loss": 0.9794,
"step": 19500
},
{
"epoch": 3.211819495744339,
"grad_norm": 1.106451392173767,
"learning_rate": 1.7881805042556608e-05,
"loss": 0.971,
"step": 20000
},
{
"epoch": 3.2921149831379477,
"grad_norm": 1.0232676267623901,
"learning_rate": 1.7078850168620526e-05,
"loss": 0.9819,
"step": 20500
},
{
"epoch": 3.3724104705315563,
"grad_norm": 1.1868596076965332,
"learning_rate": 1.627589529468444e-05,
"loss": 0.9763,
"step": 21000
},
{
"epoch": 3.4527059579251644,
"grad_norm": 1.0707334280014038,
"learning_rate": 1.5472940420748355e-05,
"loss": 0.9741,
"step": 21500
},
{
"epoch": 3.533001445318773,
"grad_norm": 1.0286450386047363,
"learning_rate": 1.466998554681227e-05,
"loss": 0.9821,
"step": 22000
},
{
"epoch": 3.6132969327123816,
"grad_norm": 1.1337109804153442,
"learning_rate": 1.3867030672876185e-05,
"loss": 0.9754,
"step": 22500
},
{
"epoch": 3.69359242010599,
"grad_norm": 1.1301957368850708,
"learning_rate": 1.3064075798940101e-05,
"loss": 0.9757,
"step": 23000
},
{
"epoch": 3.7738879074995983,
"grad_norm": 0.8995300531387329,
"learning_rate": 1.2261120925004016e-05,
"loss": 0.9728,
"step": 23500
},
{
"epoch": 3.854183394893207,
"grad_norm": 1.099932074546814,
"learning_rate": 1.1458166051067932e-05,
"loss": 0.9549,
"step": 24000
},
{
"epoch": 3.9344788822868155,
"grad_norm": 1.0159733295440674,
"learning_rate": 1.0655211177131846e-05,
"loss": 0.976,
"step": 24500
},
{
"epoch": 4.014774369680424,
"grad_norm": 1.0208700895309448,
"learning_rate": 9.852256303195762e-06,
"loss": 0.9571,
"step": 25000
},
{
"epoch": 4.095069857074033,
"grad_norm": 1.040358304977417,
"learning_rate": 9.049301429259676e-06,
"loss": 0.9293,
"step": 25500
},
{
"epoch": 4.175365344467641,
"grad_norm": 1.1360992193222046,
"learning_rate": 8.246346555323591e-06,
"loss": 0.9351,
"step": 26000
},
{
"epoch": 4.255660831861249,
"grad_norm": 1.0629996061325073,
"learning_rate": 7.443391681387506e-06,
"loss": 0.9308,
"step": 26500
},
{
"epoch": 4.335956319254858,
"grad_norm": 1.1828113794326782,
"learning_rate": 6.6404368074514205e-06,
"loss": 0.9356,
"step": 27000
},
{
"epoch": 4.416251806648466,
"grad_norm": 1.156646966934204,
"learning_rate": 5.8374819335153366e-06,
"loss": 0.9396,
"step": 27500
},
{
"epoch": 4.496547294042075,
"grad_norm": 1.0000945329666138,
"learning_rate": 5.034527059579252e-06,
"loss": 0.9266,
"step": 28000
},
{
"epoch": 4.576842781435683,
"grad_norm": 1.0536987781524658,
"learning_rate": 4.231572185643167e-06,
"loss": 0.9269,
"step": 28500
},
{
"epoch": 4.657138268829292,
"grad_norm": 1.1100162267684937,
"learning_rate": 3.4286173117070822e-06,
"loss": 0.9256,
"step": 29000
},
{
"epoch": 4.7374337562229005,
"grad_norm": 1.1744736433029175,
"learning_rate": 2.6256624377709975e-06,
"loss": 0.9176,
"step": 29500
},
{
"epoch": 4.817729243616509,
"grad_norm": 1.049423098564148,
"learning_rate": 1.8227075638349127e-06,
"loss": 0.9355,
"step": 30000
},
{
"epoch": 4.898024731010118,
"grad_norm": 1.227993369102478,
"learning_rate": 1.0197526898988277e-06,
"loss": 0.9221,
"step": 30500
},
{
"epoch": 4.978320218403725,
"grad_norm": 1.1226952075958252,
"learning_rate": 2.167978159627429e-07,
"loss": 0.9336,
"step": 31000
}
],
"logging_steps": 500,
"max_steps": 31135,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 10000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.6463151666049843e+17,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}