gpol13's picture
Upload folder using huggingface_hub
138f96f verified
{
"best_global_step": 7075,
"best_metric": 0.9626803387159316,
"best_model_checkpoint": "./output-codet5-k8s-lora/checkpoint-7075",
"epoch": 5.0,
"eval_steps": 500,
"global_step": 7075,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0353356890459364,
"grad_norm": 132030.078125,
"learning_rate": 4.9653710247349825e-05,
"loss": 4.4129,
"step": 50
},
{
"epoch": 0.0706713780918728,
"grad_norm": 259212.703125,
"learning_rate": 4.930035335689046e-05,
"loss": 3.4742,
"step": 100
},
{
"epoch": 0.10600706713780919,
"grad_norm": 135731.703125,
"learning_rate": 4.89469964664311e-05,
"loss": 2.4982,
"step": 150
},
{
"epoch": 0.1413427561837456,
"grad_norm": 253683.546875,
"learning_rate": 4.859363957597173e-05,
"loss": 2.2733,
"step": 200
},
{
"epoch": 0.17667844522968199,
"grad_norm": 447967.6875,
"learning_rate": 4.8240282685512374e-05,
"loss": 2.1743,
"step": 250
},
{
"epoch": 0.21201413427561838,
"grad_norm": 248496.0,
"learning_rate": 4.788692579505301e-05,
"loss": 2.0272,
"step": 300
},
{
"epoch": 0.24734982332155478,
"grad_norm": 137680.71875,
"learning_rate": 4.753356890459364e-05,
"loss": 1.9573,
"step": 350
},
{
"epoch": 0.2826855123674912,
"grad_norm": 165613.9375,
"learning_rate": 4.718021201413428e-05,
"loss": 1.9568,
"step": 400
},
{
"epoch": 0.31802120141342755,
"grad_norm": 139130.828125,
"learning_rate": 4.682685512367491e-05,
"loss": 1.8366,
"step": 450
},
{
"epoch": 0.35335689045936397,
"grad_norm": 141737.296875,
"learning_rate": 4.647349823321555e-05,
"loss": 1.7679,
"step": 500
},
{
"epoch": 0.38869257950530034,
"grad_norm": 151085.828125,
"learning_rate": 4.612014134275618e-05,
"loss": 1.7186,
"step": 550
},
{
"epoch": 0.42402826855123676,
"grad_norm": 143353.3125,
"learning_rate": 4.5766784452296823e-05,
"loss": 1.6871,
"step": 600
},
{
"epoch": 0.45936395759717313,
"grad_norm": 129146.640625,
"learning_rate": 4.541342756183746e-05,
"loss": 1.6727,
"step": 650
},
{
"epoch": 0.49469964664310956,
"grad_norm": 318427.25,
"learning_rate": 4.50600706713781e-05,
"loss": 1.6139,
"step": 700
},
{
"epoch": 0.5300353356890459,
"grad_norm": 167751.1875,
"learning_rate": 4.470671378091873e-05,
"loss": 1.6485,
"step": 750
},
{
"epoch": 0.5653710247349824,
"grad_norm": 185479.96875,
"learning_rate": 4.435335689045937e-05,
"loss": 1.5644,
"step": 800
},
{
"epoch": 0.6007067137809188,
"grad_norm": 162241.875,
"learning_rate": 4.4000000000000006e-05,
"loss": 1.5162,
"step": 850
},
{
"epoch": 0.6360424028268551,
"grad_norm": 220409.78125,
"learning_rate": 4.364664310954063e-05,
"loss": 1.5365,
"step": 900
},
{
"epoch": 0.6713780918727915,
"grad_norm": 219547.015625,
"learning_rate": 4.329328621908127e-05,
"loss": 1.4956,
"step": 950
},
{
"epoch": 0.7067137809187279,
"grad_norm": 151856.1875,
"learning_rate": 4.293992932862191e-05,
"loss": 1.4474,
"step": 1000
},
{
"epoch": 0.7420494699646644,
"grad_norm": 139005.203125,
"learning_rate": 4.258657243816255e-05,
"loss": 1.4196,
"step": 1050
},
{
"epoch": 0.7773851590106007,
"grad_norm": 207968.453125,
"learning_rate": 4.223321554770318e-05,
"loss": 1.4448,
"step": 1100
},
{
"epoch": 0.8127208480565371,
"grad_norm": 226858.75,
"learning_rate": 4.187985865724382e-05,
"loss": 1.3649,
"step": 1150
},
{
"epoch": 0.8480565371024735,
"grad_norm": 139849.125,
"learning_rate": 4.1526501766784455e-05,
"loss": 1.3146,
"step": 1200
},
{
"epoch": 0.8833922261484098,
"grad_norm": 191754.421875,
"learning_rate": 4.1173144876325096e-05,
"loss": 1.3667,
"step": 1250
},
{
"epoch": 0.9187279151943463,
"grad_norm": 399802.3125,
"learning_rate": 4.081978798586573e-05,
"loss": 1.3588,
"step": 1300
},
{
"epoch": 0.9540636042402827,
"grad_norm": 163376.953125,
"learning_rate": 4.0466431095406356e-05,
"loss": 1.305,
"step": 1350
},
{
"epoch": 0.9893992932862191,
"grad_norm": 235141.46875,
"learning_rate": 4.0113074204947e-05,
"loss": 1.3144,
"step": 1400
},
{
"epoch": 1.0,
"eval_bertscore_f1": 0.9499767605989876,
"eval_bleu": 0.41483915022966605,
"eval_loss": 0.6510708332061768,
"eval_meteor": 0.6007343806481693,
"eval_rouge1": 0.7698895071809828,
"eval_rouge2": 0.6286208265783073,
"eval_runtime": 476.7994,
"eval_samples_per_second": 13.557,
"eval_steps_per_second": 0.847,
"step": 1415
},
{
"epoch": 1.0247349823321554,
"grad_norm": 154460.578125,
"learning_rate": 3.975971731448763e-05,
"loss": 1.3006,
"step": 1450
},
{
"epoch": 1.0600706713780919,
"grad_norm": 152772.09375,
"learning_rate": 3.940636042402827e-05,
"loss": 1.3061,
"step": 1500
},
{
"epoch": 1.0954063604240283,
"grad_norm": 157520.671875,
"learning_rate": 3.9053003533568905e-05,
"loss": 1.2585,
"step": 1550
},
{
"epoch": 1.1307420494699647,
"grad_norm": 159070.796875,
"learning_rate": 3.8699646643109545e-05,
"loss": 1.2477,
"step": 1600
},
{
"epoch": 1.1660777385159011,
"grad_norm": 162802.515625,
"learning_rate": 3.834628975265018e-05,
"loss": 1.2697,
"step": 1650
},
{
"epoch": 1.2014134275618376,
"grad_norm": 218109.6875,
"learning_rate": 3.799293286219082e-05,
"loss": 1.2485,
"step": 1700
},
{
"epoch": 1.2367491166077738,
"grad_norm": 201347.4375,
"learning_rate": 3.763957597173145e-05,
"loss": 1.218,
"step": 1750
},
{
"epoch": 1.2720848056537102,
"grad_norm": 171368.71875,
"learning_rate": 3.728621908127209e-05,
"loss": 1.2826,
"step": 1800
},
{
"epoch": 1.3074204946996466,
"grad_norm": 178375.859375,
"learning_rate": 3.693286219081272e-05,
"loss": 1.2763,
"step": 1850
},
{
"epoch": 1.342756183745583,
"grad_norm": 163735.65625,
"learning_rate": 3.6579505300353355e-05,
"loss": 1.2008,
"step": 1900
},
{
"epoch": 1.3780918727915195,
"grad_norm": 233520.015625,
"learning_rate": 3.6226148409893995e-05,
"loss": 1.2451,
"step": 1950
},
{
"epoch": 1.4134275618374559,
"grad_norm": 416601.09375,
"learning_rate": 3.587279151943463e-05,
"loss": 1.189,
"step": 2000
},
{
"epoch": 1.4487632508833923,
"grad_norm": 242072.71875,
"learning_rate": 3.551943462897527e-05,
"loss": 1.1835,
"step": 2050
},
{
"epoch": 1.4840989399293285,
"grad_norm": 165171.296875,
"learning_rate": 3.51660777385159e-05,
"loss": 1.1288,
"step": 2100
},
{
"epoch": 1.5194346289752652,
"grad_norm": 178379.890625,
"learning_rate": 3.4812720848056544e-05,
"loss": 1.1147,
"step": 2150
},
{
"epoch": 1.5547703180212014,
"grad_norm": 189740.796875,
"learning_rate": 3.445936395759717e-05,
"loss": 1.0938,
"step": 2200
},
{
"epoch": 1.5901060070671378,
"grad_norm": 130276.1171875,
"learning_rate": 3.410600706713781e-05,
"loss": 1.1391,
"step": 2250
},
{
"epoch": 1.6254416961130742,
"grad_norm": 183882.953125,
"learning_rate": 3.3752650176678445e-05,
"loss": 1.154,
"step": 2300
},
{
"epoch": 1.6607773851590106,
"grad_norm": 161704.109375,
"learning_rate": 3.3399293286219085e-05,
"loss": 1.1451,
"step": 2350
},
{
"epoch": 1.696113074204947,
"grad_norm": 191092.828125,
"learning_rate": 3.304593639575972e-05,
"loss": 1.189,
"step": 2400
},
{
"epoch": 1.7314487632508833,
"grad_norm": 628473.625,
"learning_rate": 3.269257950530035e-05,
"loss": 1.1158,
"step": 2450
},
{
"epoch": 1.76678445229682,
"grad_norm": 170168.171875,
"learning_rate": 3.233922261484099e-05,
"loss": 1.112,
"step": 2500
},
{
"epoch": 1.802120141342756,
"grad_norm": 176820.375,
"learning_rate": 3.198586572438163e-05,
"loss": 1.1347,
"step": 2550
},
{
"epoch": 1.8374558303886925,
"grad_norm": 184860.6875,
"learning_rate": 3.163250883392227e-05,
"loss": 1.1363,
"step": 2600
},
{
"epoch": 1.872791519434629,
"grad_norm": 188585.625,
"learning_rate": 3.1279151943462894e-05,
"loss": 1.076,
"step": 2650
},
{
"epoch": 1.9081272084805654,
"grad_norm": 210377.1875,
"learning_rate": 3.0925795053003535e-05,
"loss": 1.0597,
"step": 2700
},
{
"epoch": 1.9434628975265018,
"grad_norm": 190550.59375,
"learning_rate": 3.057243816254417e-05,
"loss": 1.117,
"step": 2750
},
{
"epoch": 1.978798586572438,
"grad_norm": 252235.53125,
"learning_rate": 3.021908127208481e-05,
"loss": 1.1122,
"step": 2800
},
{
"epoch": 2.0,
"eval_bertscore_f1": 0.9582845463629553,
"eval_bleu": 0.49983486433792085,
"eval_loss": 0.4666912853717804,
"eval_meteor": 0.669274930453781,
"eval_rouge1": 0.8072540196527545,
"eval_rouge2": 0.70742760483214,
"eval_runtime": 458.6727,
"eval_samples_per_second": 14.093,
"eval_steps_per_second": 0.881,
"step": 2830
},
{
"epoch": 2.0141342756183747,
"grad_norm": 257279.75,
"learning_rate": 2.9865724381625443e-05,
"loss": 1.0781,
"step": 2850
},
{
"epoch": 2.049469964664311,
"grad_norm": 189275.46875,
"learning_rate": 2.9512367491166083e-05,
"loss": 1.1029,
"step": 2900
},
{
"epoch": 2.0848056537102475,
"grad_norm": 194805.359375,
"learning_rate": 2.9159010600706717e-05,
"loss": 1.0469,
"step": 2950
},
{
"epoch": 2.1201413427561837,
"grad_norm": 201691.625,
"learning_rate": 2.8805653710247347e-05,
"loss": 1.0245,
"step": 3000
},
{
"epoch": 2.1554770318021204,
"grad_norm": 235752.0,
"learning_rate": 2.8452296819787988e-05,
"loss": 1.0363,
"step": 3050
},
{
"epoch": 2.1908127208480566,
"grad_norm": 173017.640625,
"learning_rate": 2.809893992932862e-05,
"loss": 1.0579,
"step": 3100
},
{
"epoch": 2.2261484098939928,
"grad_norm": 184098.203125,
"learning_rate": 2.7745583038869262e-05,
"loss": 1.1153,
"step": 3150
},
{
"epoch": 2.2614840989399294,
"grad_norm": 240071.0,
"learning_rate": 2.7392226148409892e-05,
"loss": 1.1175,
"step": 3200
},
{
"epoch": 2.2968197879858656,
"grad_norm": 243769.109375,
"learning_rate": 2.7038869257950533e-05,
"loss": 1.027,
"step": 3250
},
{
"epoch": 2.3321554770318023,
"grad_norm": 464112.78125,
"learning_rate": 2.6685512367491167e-05,
"loss": 1.0759,
"step": 3300
},
{
"epoch": 2.3674911660777385,
"grad_norm": 169012.796875,
"learning_rate": 2.6332155477031807e-05,
"loss": 1.0839,
"step": 3350
},
{
"epoch": 2.402826855123675,
"grad_norm": 166011.828125,
"learning_rate": 2.597879858657244e-05,
"loss": 1.0121,
"step": 3400
},
{
"epoch": 2.4381625441696113,
"grad_norm": 158634.28125,
"learning_rate": 2.562544169611307e-05,
"loss": 1.06,
"step": 3450
},
{
"epoch": 2.4734982332155475,
"grad_norm": 162840.34375,
"learning_rate": 2.5272084805653712e-05,
"loss": 0.9904,
"step": 3500
},
{
"epoch": 2.508833922261484,
"grad_norm": 162062.4375,
"learning_rate": 2.491872791519435e-05,
"loss": 1.0173,
"step": 3550
},
{
"epoch": 2.5441696113074204,
"grad_norm": 193363.75,
"learning_rate": 2.4565371024734986e-05,
"loss": 1.0344,
"step": 3600
},
{
"epoch": 2.579505300353357,
"grad_norm": 190665.90625,
"learning_rate": 2.421201413427562e-05,
"loss": 1.0007,
"step": 3650
},
{
"epoch": 2.614840989399293,
"grad_norm": 175752.640625,
"learning_rate": 2.3858657243816253e-05,
"loss": 1.0578,
"step": 3700
},
{
"epoch": 2.65017667844523,
"grad_norm": 175319.953125,
"learning_rate": 2.350530035335689e-05,
"loss": 1.0294,
"step": 3750
},
{
"epoch": 2.685512367491166,
"grad_norm": 169006.34375,
"learning_rate": 2.3151943462897528e-05,
"loss": 1.0066,
"step": 3800
},
{
"epoch": 2.7208480565371023,
"grad_norm": 192368.734375,
"learning_rate": 2.2798586572438165e-05,
"loss": 0.9715,
"step": 3850
},
{
"epoch": 2.756183745583039,
"grad_norm": 167620.265625,
"learning_rate": 2.24452296819788e-05,
"loss": 0.9757,
"step": 3900
},
{
"epoch": 2.791519434628975,
"grad_norm": 216407.484375,
"learning_rate": 2.2091872791519436e-05,
"loss": 1.0102,
"step": 3950
},
{
"epoch": 2.8268551236749118,
"grad_norm": 169598.328125,
"learning_rate": 2.1738515901060073e-05,
"loss": 1.0011,
"step": 4000
},
{
"epoch": 2.862190812720848,
"grad_norm": 447852.53125,
"learning_rate": 2.138515901060071e-05,
"loss": 0.9803,
"step": 4050
},
{
"epoch": 2.8975265017667846,
"grad_norm": 183687.234375,
"learning_rate": 2.1031802120141344e-05,
"loss": 1.0082,
"step": 4100
},
{
"epoch": 2.932862190812721,
"grad_norm": 150918.265625,
"learning_rate": 2.067844522968198e-05,
"loss": 0.9742,
"step": 4150
},
{
"epoch": 2.968197879858657,
"grad_norm": 174224.703125,
"learning_rate": 2.0325088339222618e-05,
"loss": 0.9893,
"step": 4200
},
{
"epoch": 3.0,
"eval_bertscore_f1": 0.9605225993430998,
"eval_bleu": 0.5316084480437336,
"eval_loss": 0.40197497606277466,
"eval_meteor": 0.6927241095474356,
"eval_rouge1": 0.8163514632828656,
"eval_rouge2": 0.7309898153084421,
"eval_runtime": 453.3627,
"eval_samples_per_second": 14.258,
"eval_steps_per_second": 0.891,
"step": 4245
},
{
"epoch": 3.0035335689045937,
"grad_norm": 155634.3125,
"learning_rate": 1.997173144876325e-05,
"loss": 0.9659,
"step": 4250
},
{
"epoch": 3.03886925795053,
"grad_norm": 441207.90625,
"learning_rate": 1.9618374558303885e-05,
"loss": 0.9841,
"step": 4300
},
{
"epoch": 3.0742049469964665,
"grad_norm": 171151.390625,
"learning_rate": 1.9265017667844522e-05,
"loss": 0.9685,
"step": 4350
},
{
"epoch": 3.1095406360424027,
"grad_norm": 178025.75,
"learning_rate": 1.891166077738516e-05,
"loss": 0.9823,
"step": 4400
},
{
"epoch": 3.1448763250883394,
"grad_norm": 194535.046875,
"learning_rate": 1.8558303886925797e-05,
"loss": 0.9673,
"step": 4450
},
{
"epoch": 3.1802120141342756,
"grad_norm": 161927.78125,
"learning_rate": 1.8204946996466434e-05,
"loss": 0.9857,
"step": 4500
},
{
"epoch": 3.215547703180212,
"grad_norm": 162012.375,
"learning_rate": 1.7851590106007067e-05,
"loss": 0.9981,
"step": 4550
},
{
"epoch": 3.2508833922261484,
"grad_norm": 189190.890625,
"learning_rate": 1.7498233215547705e-05,
"loss": 0.9984,
"step": 4600
},
{
"epoch": 3.2862190812720846,
"grad_norm": 160611.28125,
"learning_rate": 1.714487632508834e-05,
"loss": 0.962,
"step": 4650
},
{
"epoch": 3.3215547703180213,
"grad_norm": 160583.796875,
"learning_rate": 1.679151943462898e-05,
"loss": 0.9382,
"step": 4700
},
{
"epoch": 3.3568904593639575,
"grad_norm": 241355.578125,
"learning_rate": 1.643816254416961e-05,
"loss": 1.0251,
"step": 4750
},
{
"epoch": 3.392226148409894,
"grad_norm": 186881.078125,
"learning_rate": 1.6084805653710246e-05,
"loss": 0.9726,
"step": 4800
},
{
"epoch": 3.4275618374558303,
"grad_norm": 189630.578125,
"learning_rate": 1.5731448763250883e-05,
"loss": 0.912,
"step": 4850
},
{
"epoch": 3.462897526501767,
"grad_norm": 427752.875,
"learning_rate": 1.537809187279152e-05,
"loss": 0.9862,
"step": 4900
},
{
"epoch": 3.498233215547703,
"grad_norm": 221891.625,
"learning_rate": 1.5024734982332156e-05,
"loss": 0.9943,
"step": 4950
},
{
"epoch": 3.53356890459364,
"grad_norm": 167730.796875,
"learning_rate": 1.4671378091872793e-05,
"loss": 0.926,
"step": 5000
},
{
"epoch": 3.568904593639576,
"grad_norm": 193771.1875,
"learning_rate": 1.4318021201413428e-05,
"loss": 0.993,
"step": 5050
},
{
"epoch": 3.604240282685512,
"grad_norm": 187069.09375,
"learning_rate": 1.3964664310954066e-05,
"loss": 1.0317,
"step": 5100
},
{
"epoch": 3.639575971731449,
"grad_norm": 188425.546875,
"learning_rate": 1.3611307420494701e-05,
"loss": 0.9729,
"step": 5150
},
{
"epoch": 3.674911660777385,
"grad_norm": 163470.703125,
"learning_rate": 1.3257950530035338e-05,
"loss": 0.9688,
"step": 5200
},
{
"epoch": 3.7102473498233217,
"grad_norm": 187701.09375,
"learning_rate": 1.2904593639575973e-05,
"loss": 0.9105,
"step": 5250
},
{
"epoch": 3.745583038869258,
"grad_norm": 144018.484375,
"learning_rate": 1.2551236749116607e-05,
"loss": 0.9754,
"step": 5300
},
{
"epoch": 3.7809187279151946,
"grad_norm": 154232.65625,
"learning_rate": 1.2197879858657244e-05,
"loss": 0.9472,
"step": 5350
},
{
"epoch": 3.8162544169611308,
"grad_norm": 170624.640625,
"learning_rate": 1.184452296819788e-05,
"loss": 0.9886,
"step": 5400
},
{
"epoch": 3.851590106007067,
"grad_norm": 1157734.625,
"learning_rate": 1.1491166077738517e-05,
"loss": 0.915,
"step": 5450
},
{
"epoch": 3.8869257950530036,
"grad_norm": 179182.0,
"learning_rate": 1.1137809187279152e-05,
"loss": 0.9216,
"step": 5500
},
{
"epoch": 3.92226148409894,
"grad_norm": 872790.1875,
"learning_rate": 1.078445229681979e-05,
"loss": 0.9351,
"step": 5550
},
{
"epoch": 3.9575971731448765,
"grad_norm": 269436.21875,
"learning_rate": 1.0431095406360425e-05,
"loss": 0.9223,
"step": 5600
},
{
"epoch": 3.9929328621908127,
"grad_norm": 197819.828125,
"learning_rate": 1.0077738515901062e-05,
"loss": 0.9707,
"step": 5650
},
{
"epoch": 4.0,
"eval_bertscore_f1": 0.9621182246373432,
"eval_bleu": 0.5466384314770388,
"eval_loss": 0.3729407489299774,
"eval_meteor": 0.706948756410978,
"eval_rouge1": 0.8231910064206467,
"eval_rouge2": 0.7462398210944181,
"eval_runtime": 455.8029,
"eval_samples_per_second": 14.182,
"eval_steps_per_second": 0.886,
"step": 5660
},
{
"epoch": 4.028268551236749,
"grad_norm": 211690.5,
"learning_rate": 9.724381625441696e-06,
"loss": 0.9475,
"step": 5700
},
{
"epoch": 4.063604240282685,
"grad_norm": 1410444.25,
"learning_rate": 9.371024734982333e-06,
"loss": 0.943,
"step": 5750
},
{
"epoch": 4.098939929328622,
"grad_norm": 158585.84375,
"learning_rate": 9.017667844522968e-06,
"loss": 0.9525,
"step": 5800
},
{
"epoch": 4.134275618374558,
"grad_norm": 127129.171875,
"learning_rate": 8.664310954063605e-06,
"loss": 0.9423,
"step": 5850
},
{
"epoch": 4.169611307420495,
"grad_norm": 158647.09375,
"learning_rate": 8.31095406360424e-06,
"loss": 0.9317,
"step": 5900
},
{
"epoch": 4.204946996466431,
"grad_norm": 151794.578125,
"learning_rate": 7.957597173144876e-06,
"loss": 0.9252,
"step": 5950
},
{
"epoch": 4.240282685512367,
"grad_norm": 134228.625,
"learning_rate": 7.6042402826855124e-06,
"loss": 0.9466,
"step": 6000
},
{
"epoch": 4.275618374558304,
"grad_norm": 153596.578125,
"learning_rate": 7.250883392226149e-06,
"loss": 0.9165,
"step": 6050
},
{
"epoch": 4.310954063604241,
"grad_norm": 205703.125,
"learning_rate": 6.897526501766785e-06,
"loss": 0.9143,
"step": 6100
},
{
"epoch": 4.3462897526501765,
"grad_norm": 133143.015625,
"learning_rate": 6.544169611307421e-06,
"loss": 0.9926,
"step": 6150
},
{
"epoch": 4.381625441696113,
"grad_norm": 131342.953125,
"learning_rate": 6.190812720848057e-06,
"loss": 0.9561,
"step": 6200
},
{
"epoch": 4.41696113074205,
"grad_norm": 167654.875,
"learning_rate": 5.837455830388693e-06,
"loss": 0.9183,
"step": 6250
},
{
"epoch": 4.4522968197879855,
"grad_norm": 160178.71875,
"learning_rate": 5.484098939929329e-06,
"loss": 0.9432,
"step": 6300
},
{
"epoch": 4.487632508833922,
"grad_norm": 169846.84375,
"learning_rate": 5.1307420494699655e-06,
"loss": 0.9598,
"step": 6350
},
{
"epoch": 4.522968197879859,
"grad_norm": 159304.1875,
"learning_rate": 4.777385159010601e-06,
"loss": 0.9163,
"step": 6400
},
{
"epoch": 4.5583038869257955,
"grad_norm": 159722.328125,
"learning_rate": 4.424028268551237e-06,
"loss": 0.9269,
"step": 6450
},
{
"epoch": 4.593639575971731,
"grad_norm": 134695.21875,
"learning_rate": 4.070671378091873e-06,
"loss": 0.9521,
"step": 6500
},
{
"epoch": 4.628975265017668,
"grad_norm": 214522.46875,
"learning_rate": 3.717314487632509e-06,
"loss": 0.9487,
"step": 6550
},
{
"epoch": 4.6643109540636045,
"grad_norm": 277130.375,
"learning_rate": 3.363957597173145e-06,
"loss": 0.9512,
"step": 6600
},
{
"epoch": 4.69964664310954,
"grad_norm": 229889.265625,
"learning_rate": 3.010600706713781e-06,
"loss": 0.9568,
"step": 6650
},
{
"epoch": 4.734982332155477,
"grad_norm": 160132.65625,
"learning_rate": 2.6572438162544172e-06,
"loss": 0.9169,
"step": 6700
},
{
"epoch": 4.770318021201414,
"grad_norm": 153591.515625,
"learning_rate": 2.303886925795053e-06,
"loss": 0.9247,
"step": 6750
},
{
"epoch": 4.80565371024735,
"grad_norm": 198496.34375,
"learning_rate": 1.9505300353356893e-06,
"loss": 0.963,
"step": 6800
},
{
"epoch": 4.840989399293286,
"grad_norm": 217991.0625,
"learning_rate": 1.5971731448763252e-06,
"loss": 0.9591,
"step": 6850
},
{
"epoch": 4.876325088339223,
"grad_norm": 170718.59375,
"learning_rate": 1.2438162544169612e-06,
"loss": 0.9099,
"step": 6900
},
{
"epoch": 4.911660777385159,
"grad_norm": 138367.578125,
"learning_rate": 8.904593639575972e-07,
"loss": 0.9267,
"step": 6950
},
{
"epoch": 4.946996466431095,
"grad_norm": 170947.796875,
"learning_rate": 5.371024734982332e-07,
"loss": 0.9668,
"step": 7000
},
{
"epoch": 4.982332155477032,
"grad_norm": 127770.6484375,
"learning_rate": 1.8374558303886926e-07,
"loss": 0.9397,
"step": 7050
},
{
"epoch": 5.0,
"eval_bertscore_f1": 0.9626803387159316,
"eval_bleu": 0.5519723913292562,
"eval_loss": 0.36424726247787476,
"eval_meteor": 0.7112807593232308,
"eval_rouge1": 0.8253264091986198,
"eval_rouge2": 0.750184658379115,
"eval_runtime": 454.9282,
"eval_samples_per_second": 14.209,
"eval_steps_per_second": 0.888,
"step": 7075
}
],
"logging_steps": 50,
"max_steps": 7075,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.730048539557888e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}