gpt2-xl-lora-multi-2 / trainer_state.json
MHGanainy's picture
MHGanainy/gpt2-xl-lora-multi-2
48a7cf9 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 5649,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.017702248185519562,
"grad_norm": 0.04682525247335434,
"learning_rate": 4.4267374944665786e-07,
"loss": 2.7706,
"step": 100
},
{
"epoch": 0.035404496371039124,
"grad_norm": 0.04805780574679375,
"learning_rate": 8.853474988933157e-07,
"loss": 2.7666,
"step": 200
},
{
"epoch": 0.053106744556558685,
"grad_norm": 0.0528537780046463,
"learning_rate": 1.3280212483399734e-06,
"loss": 2.7509,
"step": 300
},
{
"epoch": 0.07080899274207825,
"grad_norm": 0.07866832613945007,
"learning_rate": 1.7706949977866315e-06,
"loss": 2.7561,
"step": 400
},
{
"epoch": 0.08851124092759781,
"grad_norm": 0.0911114439368248,
"learning_rate": 2.2133687472332895e-06,
"loss": 2.7683,
"step": 500
},
{
"epoch": 0.10621348911311737,
"grad_norm": 0.09670838713645935,
"learning_rate": 2.656042496679947e-06,
"loss": 2.7376,
"step": 600
},
{
"epoch": 0.12391573729863693,
"grad_norm": 0.10767289996147156,
"learning_rate": 3.098716246126605e-06,
"loss": 2.7265,
"step": 700
},
{
"epoch": 0.1416179854841565,
"grad_norm": 0.11828861385583878,
"learning_rate": 3.541389995573263e-06,
"loss": 2.7201,
"step": 800
},
{
"epoch": 0.15932023366967604,
"grad_norm": 0.13154348731040955,
"learning_rate": 3.98406374501992e-06,
"loss": 2.7123,
"step": 900
},
{
"epoch": 0.17702248185519562,
"grad_norm": 0.14556218683719635,
"learning_rate": 4.426737494466579e-06,
"loss": 2.7015,
"step": 1000
},
{
"epoch": 0.19472473004071517,
"grad_norm": 0.15351223945617676,
"learning_rate": 4.869411243913236e-06,
"loss": 2.6959,
"step": 1100
},
{
"epoch": 0.21242697822623474,
"grad_norm": 0.1721131056547165,
"learning_rate": 5.312084993359894e-06,
"loss": 2.6947,
"step": 1200
},
{
"epoch": 0.2301292264117543,
"grad_norm": 0.18679605424404144,
"learning_rate": 5.754758742806552e-06,
"loss": 2.6706,
"step": 1300
},
{
"epoch": 0.24783147459727387,
"grad_norm": 0.17619894444942474,
"learning_rate": 6.19743249225321e-06,
"loss": 2.6608,
"step": 1400
},
{
"epoch": 0.2655337227827934,
"grad_norm": 0.1907527893781662,
"learning_rate": 6.640106241699867e-06,
"loss": 2.6539,
"step": 1500
},
{
"epoch": 0.283235970968313,
"grad_norm": 0.20181308686733246,
"learning_rate": 7.082779991146526e-06,
"loss": 2.6598,
"step": 1600
},
{
"epoch": 0.3009382191538325,
"grad_norm": 0.21578721702098846,
"learning_rate": 7.525453740593184e-06,
"loss": 2.6435,
"step": 1700
},
{
"epoch": 0.3186404673393521,
"grad_norm": 0.20917312800884247,
"learning_rate": 7.96812749003984e-06,
"loss": 2.6344,
"step": 1800
},
{
"epoch": 0.33634271552487166,
"grad_norm": 0.23269931972026825,
"learning_rate": 8.4108012394865e-06,
"loss": 2.6365,
"step": 1900
},
{
"epoch": 0.35404496371039124,
"grad_norm": 0.2302529662847519,
"learning_rate": 8.853474988933158e-06,
"loss": 2.6323,
"step": 2000
},
{
"epoch": 0.37174721189591076,
"grad_norm": 0.25494036078453064,
"learning_rate": 9.296148738379815e-06,
"loss": 2.6171,
"step": 2100
},
{
"epoch": 0.38944946008143033,
"grad_norm": 0.2526334822177887,
"learning_rate": 9.738822487826472e-06,
"loss": 2.6152,
"step": 2200
},
{
"epoch": 0.4071517082669499,
"grad_norm": 0.2761424779891968,
"learning_rate": 1.0181496237273129e-05,
"loss": 2.6144,
"step": 2300
},
{
"epoch": 0.4248539564524695,
"grad_norm": 0.2666952908039093,
"learning_rate": 1.0624169986719787e-05,
"loss": 2.596,
"step": 2400
},
{
"epoch": 0.442556204637989,
"grad_norm": 0.2540760338306427,
"learning_rate": 1.1066843736166446e-05,
"loss": 2.6047,
"step": 2500
},
{
"epoch": 0.4602584528235086,
"grad_norm": 0.28952619433403015,
"learning_rate": 1.1509517485613105e-05,
"loss": 2.584,
"step": 2600
},
{
"epoch": 0.47796070100902815,
"grad_norm": 0.25384747982025146,
"learning_rate": 1.1952191235059762e-05,
"loss": 2.5861,
"step": 2700
},
{
"epoch": 0.49566294919454773,
"grad_norm": 0.27219265699386597,
"learning_rate": 1.239486498450642e-05,
"loss": 2.5773,
"step": 2800
},
{
"epoch": 0.5133651973800673,
"grad_norm": 0.27737173438072205,
"learning_rate": 1.2837538733953077e-05,
"loss": 2.5735,
"step": 2900
},
{
"epoch": 0.5310674455655868,
"grad_norm": 0.2793057858943939,
"learning_rate": 1.3280212483399734e-05,
"loss": 2.5717,
"step": 3000
},
{
"epoch": 0.5487696937511064,
"grad_norm": 0.2786986231803894,
"learning_rate": 1.3722886232846393e-05,
"loss": 2.5779,
"step": 3100
},
{
"epoch": 0.566471941936626,
"grad_norm": 0.267103910446167,
"learning_rate": 1.4165559982293052e-05,
"loss": 2.5569,
"step": 3200
},
{
"epoch": 0.5841741901221456,
"grad_norm": 0.30275553464889526,
"learning_rate": 1.4608233731739709e-05,
"loss": 2.5662,
"step": 3300
},
{
"epoch": 0.601876438307665,
"grad_norm": 0.2926766276359558,
"learning_rate": 1.5050907481186367e-05,
"loss": 2.5539,
"step": 3400
},
{
"epoch": 0.6195786864931846,
"grad_norm": 0.2998274862766266,
"learning_rate": 1.5493581230633026e-05,
"loss": 2.5592,
"step": 3500
},
{
"epoch": 0.6372809346787042,
"grad_norm": 0.2824733853340149,
"learning_rate": 1.593625498007968e-05,
"loss": 2.5652,
"step": 3600
},
{
"epoch": 0.6549831828642237,
"grad_norm": 0.33311328291893005,
"learning_rate": 1.637892872952634e-05,
"loss": 2.551,
"step": 3700
},
{
"epoch": 0.6726854310497433,
"grad_norm": 0.321186363697052,
"learning_rate": 1.6821602478973e-05,
"loss": 2.55,
"step": 3800
},
{
"epoch": 0.6903876792352629,
"grad_norm": 0.33593156933784485,
"learning_rate": 1.7264276228419657e-05,
"loss": 2.5468,
"step": 3900
},
{
"epoch": 0.7080899274207825,
"grad_norm": 0.32341769337654114,
"learning_rate": 1.7706949977866316e-05,
"loss": 2.5465,
"step": 4000
},
{
"epoch": 0.725792175606302,
"grad_norm": 0.3142276108264923,
"learning_rate": 1.814962372731297e-05,
"loss": 2.5291,
"step": 4100
},
{
"epoch": 0.7434944237918215,
"grad_norm": 0.3352969288825989,
"learning_rate": 1.859229747675963e-05,
"loss": 2.5193,
"step": 4200
},
{
"epoch": 0.7611966719773411,
"grad_norm": 0.31670665740966797,
"learning_rate": 1.903497122620629e-05,
"loss": 2.5263,
"step": 4300
},
{
"epoch": 0.7788989201628607,
"grad_norm": 0.336976021528244,
"learning_rate": 1.9477644975652944e-05,
"loss": 2.5253,
"step": 4400
},
{
"epoch": 0.7966011683483802,
"grad_norm": 0.3269643783569336,
"learning_rate": 1.9920318725099602e-05,
"loss": 2.5182,
"step": 4500
},
{
"epoch": 0.8143034165338998,
"grad_norm": 0.3271143436431885,
"learning_rate": 1.9741718189189488e-05,
"loss": 2.5278,
"step": 4600
},
{
"epoch": 0.8320056647194194,
"grad_norm": 0.3091905415058136,
"learning_rate": 1.8749113254181498e-05,
"loss": 2.5208,
"step": 4700
},
{
"epoch": 0.849707912904939,
"grad_norm": 0.33515864610671997,
"learning_rate": 1.7085783500963825e-05,
"loss": 2.5139,
"step": 4800
},
{
"epoch": 0.8674101610904585,
"grad_norm": 0.33183321356773376,
"learning_rate": 1.487924317171598e-05,
"loss": 2.5108,
"step": 4900
},
{
"epoch": 0.885112409275978,
"grad_norm": 0.32752835750579834,
"learning_rate": 1.2298650136294059e-05,
"loss": 2.5113,
"step": 5000
},
{
"epoch": 0.9028146574614976,
"grad_norm": 0.3689501881599426,
"learning_rate": 9.541837905851817e-06,
"loss": 2.5141,
"step": 5100
},
{
"epoch": 0.9205169056470172,
"grad_norm": 0.3316275179386139,
"learning_rate": 6.8201493134721215e-06,
"loss": 2.5211,
"step": 5200
},
{
"epoch": 0.9382191538325367,
"grad_norm": 0.36550265550613403,
"learning_rate": 4.342234542700692e-06,
"loss": 2.5125,
"step": 5300
},
{
"epoch": 0.9559214020180563,
"grad_norm": 0.30869239568710327,
"learning_rate": 2.2980555800703273e-06,
"loss": 2.4942,
"step": 5400
},
{
"epoch": 0.9736236502035759,
"grad_norm": 0.32581713795661926,
"learning_rate": 8.443233428209019e-07,
"loss": 2.5019,
"step": 5500
},
{
"epoch": 0.9913258983890955,
"grad_norm": 0.3429378569126129,
"learning_rate": 9.248390122572615e-08,
"loss": 2.5082,
"step": 5600
},
{
"epoch": 1.0,
"step": 5649,
"total_flos": 8.231609035317576e+17,
"train_loss": 2.6019921150011687,
"train_runtime": 1598.815,
"train_samples_per_second": 56.525,
"train_steps_per_second": 3.533
}
],
"logging_steps": 100,
"max_steps": 5649,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.231609035317576e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}