jdqqjr's picture
Upload folder using huggingface_hub
21a9c12 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 2000,
"global_step": 514,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.038910505836575876,
"grad_norm": 3.7059597969055176,
"learning_rate": 9.994024049928221e-05,
"loss": 17.1185,
"step": 10
},
{
"epoch": 0.07782101167315175,
"grad_norm": 2.1613128185272217,
"learning_rate": 9.969771232924403e-05,
"loss": 9.3101,
"step": 20
},
{
"epoch": 0.11673151750972763,
"grad_norm": 1.1486432552337646,
"learning_rate": 9.926958555700134e-05,
"loss": 8.2256,
"step": 30
},
{
"epoch": 0.1556420233463035,
"grad_norm": 1.1596895456314087,
"learning_rate": 9.865745904348295e-05,
"loss": 8.105,
"step": 40
},
{
"epoch": 0.19455252918287938,
"grad_norm": 1.1468164920806885,
"learning_rate": 9.786361880589083e-05,
"loss": 7.914,
"step": 50
},
{
"epoch": 0.23346303501945526,
"grad_norm": 1.1589274406433105,
"learning_rate": 9.689102948045397e-05,
"loss": 7.9438,
"step": 60
},
{
"epoch": 0.2723735408560311,
"grad_norm": 1.0185976028442383,
"learning_rate": 9.574332325084563e-05,
"loss": 7.8356,
"step": 70
},
{
"epoch": 0.311284046692607,
"grad_norm": 0.9653449058532715,
"learning_rate": 9.442478628361098e-05,
"loss": 7.6501,
"step": 80
},
{
"epoch": 0.35019455252918286,
"grad_norm": 0.959281861782074,
"learning_rate": 9.294034272126287e-05,
"loss": 7.6496,
"step": 90
},
{
"epoch": 0.38910505836575876,
"grad_norm": 0.9253648519515991,
"learning_rate": 9.129553629282448e-05,
"loss": 7.421,
"step": 100
},
{
"epoch": 0.4280155642023346,
"grad_norm": 0.9299134016036987,
"learning_rate": 8.949650961049478e-05,
"loss": 7.5062,
"step": 110
},
{
"epoch": 0.4669260700389105,
"grad_norm": 0.9562272429466248,
"learning_rate": 8.754998122975489e-05,
"loss": 7.43,
"step": 120
},
{
"epoch": 0.5058365758754864,
"grad_norm": 0.9745796322822571,
"learning_rate": 8.546322055858526e-05,
"loss": 7.3152,
"step": 130
},
{
"epoch": 0.5447470817120622,
"grad_norm": 1.0300663709640503,
"learning_rate": 8.324402070949658e-05,
"loss": 7.3288,
"step": 140
},
{
"epoch": 0.5836575875486382,
"grad_norm": 0.8551069498062134,
"learning_rate": 8.09006693957597e-05,
"loss": 7.288,
"step": 150
},
{
"epoch": 0.622568093385214,
"grad_norm": 0.9526028037071228,
"learning_rate": 7.844191798052438e-05,
"loss": 7.3128,
"step": 160
},
{
"epoch": 0.6614785992217899,
"grad_norm": 0.8692190647125244,
"learning_rate": 7.587694879441401e-05,
"loss": 7.0454,
"step": 170
},
{
"epoch": 0.7003891050583657,
"grad_norm": 0.7848012447357178,
"learning_rate": 7.321534084365102e-05,
"loss": 7.0277,
"step": 180
},
{
"epoch": 0.7392996108949417,
"grad_norm": 0.7873611450195312,
"learning_rate": 7.046703403677695e-05,
"loss": 6.9448,
"step": 190
},
{
"epoch": 0.7782101167315175,
"grad_norm": 0.8347607254981995,
"learning_rate": 6.764229206356498e-05,
"loss": 7.0677,
"step": 200
},
{
"epoch": 0.8171206225680934,
"grad_norm": 0.7865148186683655,
"learning_rate": 6.475166406475515e-05,
"loss": 6.9666,
"step": 210
},
{
"epoch": 0.8560311284046692,
"grad_norm": 0.8299317359924316,
"learning_rate": 6.180594523575838e-05,
"loss": 7.1694,
"step": 220
},
{
"epoch": 0.8949416342412452,
"grad_norm": 0.8158650994300842,
"learning_rate": 5.881613651145732e-05,
"loss": 6.7787,
"step": 230
},
{
"epoch": 0.933852140077821,
"grad_norm": 0.8030637502670288,
"learning_rate": 5.579340348266251e-05,
"loss": 6.7451,
"step": 240
},
{
"epoch": 0.9727626459143969,
"grad_norm": 0.8576043844223022,
"learning_rate": 5.27490346976529e-05,
"loss": 6.7098,
"step": 250
},
{
"epoch": 1.0116731517509727,
"grad_norm": 1.007199764251709,
"learning_rate": 4.969439950452543e-05,
"loss": 6.3174,
"step": 260
},
{
"epoch": 1.0505836575875487,
"grad_norm": 0.7966341972351074,
"learning_rate": 4.664090559179367e-05,
"loss": 5.085,
"step": 270
},
{
"epoch": 1.0894941634241244,
"grad_norm": 0.8274776339530945,
"learning_rate": 4.359995638580226e-05,
"loss": 5.2567,
"step": 280
},
{
"epoch": 1.1284046692607004,
"grad_norm": 0.8015701770782471,
"learning_rate": 4.0582908464058556e-05,
"loss": 5.1255,
"step": 290
},
{
"epoch": 1.1673151750972763,
"grad_norm": 0.7696110010147095,
"learning_rate": 3.7601029143523764e-05,
"loss": 5.1827,
"step": 300
},
{
"epoch": 1.206225680933852,
"grad_norm": 0.7955564260482788,
"learning_rate": 3.466545440225193e-05,
"loss": 5.1197,
"step": 310
},
{
"epoch": 1.245136186770428,
"grad_norm": 0.7841880917549133,
"learning_rate": 3.1787147291520674e-05,
"loss": 5.1158,
"step": 320
},
{
"epoch": 1.2840466926070038,
"grad_norm": 0.7522294521331787,
"learning_rate": 2.8976856993765766e-05,
"loss": 5.1173,
"step": 330
},
{
"epoch": 1.3229571984435797,
"grad_norm": 0.7152595520019531,
"learning_rate": 2.6245078679219505e-05,
"loss": 5.0291,
"step": 340
},
{
"epoch": 1.3618677042801557,
"grad_norm": 0.697422206401825,
"learning_rate": 2.3602014311170523e-05,
"loss": 4.9941,
"step": 350
},
{
"epoch": 1.4007782101167314,
"grad_norm": 0.7435976266860962,
"learning_rate": 2.1057534546219658e-05,
"loss": 5.1369,
"step": 360
},
{
"epoch": 1.4396887159533074,
"grad_norm": 0.8627301454544067,
"learning_rate": 1.862114187181705e-05,
"loss": 4.9706,
"step": 370
},
{
"epoch": 1.4785992217898833,
"grad_norm": 0.7423489093780518,
"learning_rate": 1.6301935118745826e-05,
"loss": 4.9751,
"step": 380
},
{
"epoch": 1.517509727626459,
"grad_norm": 0.7178505659103394,
"learning_rate": 1.4108575481081521e-05,
"loss": 4.8353,
"step": 390
},
{
"epoch": 1.556420233463035,
"grad_norm": 0.7140501737594604,
"learning_rate": 1.2049254170527857e-05,
"loss": 4.9182,
"step": 400
},
{
"epoch": 1.595330739299611,
"grad_norm": 0.7342681884765625,
"learning_rate": 1.013166182592551e-05,
"loss": 4.8421,
"step": 410
},
{
"epoch": 1.6342412451361867,
"grad_norm": 0.716266930103302,
"learning_rate": 8.36295979217494e-06,
"loss": 4.8006,
"step": 420
},
{
"epoch": 1.6731517509727627,
"grad_norm": 0.7294363975524902,
"learning_rate": 6.7497533758344665e-06,
"loss": 4.7904,
"step": 430
},
{
"epoch": 1.7120622568093387,
"grad_norm": 0.6808627843856812,
"learning_rate": 5.298067177271143e-06,
"loss": 4.7973,
"step": 440
},
{
"epoch": 1.7509727626459144,
"grad_norm": 0.7073158025741577,
"learning_rate": 4.01332259148815e-06,
"loss": 4.7772,
"step": 450
},
{
"epoch": 1.7898832684824901,
"grad_norm": 0.6910358667373657,
"learning_rate": 2.9003175616530265e-06,
"loss": 4.9969,
"step": 460
},
{
"epoch": 1.8287937743190663,
"grad_norm": 0.688657283782959,
"learning_rate": 1.963208660937904e-06,
"loss": 4.9254,
"step": 470
},
{
"epoch": 1.867704280155642,
"grad_norm": 0.6506223678588867,
"learning_rate": 1.205495569588283e-06,
"loss": 4.9407,
"step": 480
},
{
"epoch": 1.9066147859922178,
"grad_norm": 0.6927580237388611,
"learning_rate": 6.300080051914791e-07,
"loss": 4.8365,
"step": 490
},
{
"epoch": 1.9455252918287937,
"grad_norm": 0.682306170463562,
"learning_rate": 2.3889515495413296e-07,
"loss": 4.8349,
"step": 500
},
{
"epoch": 1.9844357976653697,
"grad_norm": 0.6962385177612305,
"learning_rate": 3.361764945473134e-08,
"loss": 4.9875,
"step": 510
},
{
"epoch": 2.0,
"step": 514,
"total_flos": 3.8982891094409216e+17,
"train_loss": 6.394122821347723,
"train_runtime": 7794.0822,
"train_samples_per_second": 4.219,
"train_steps_per_second": 0.066
}
],
"logging_steps": 10,
"max_steps": 514,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 15000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.8982891094409216e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}