{ "best_global_step": 7075, "best_metric": 0.9626803387159316, "best_model_checkpoint": "./output-codet5-k8s-lora/checkpoint-7075", "epoch": 5.0, "eval_steps": 500, "global_step": 7075, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0353356890459364, "grad_norm": 132030.078125, "learning_rate": 4.9653710247349825e-05, "loss": 4.4129, "step": 50 }, { "epoch": 0.0706713780918728, "grad_norm": 259212.703125, "learning_rate": 4.930035335689046e-05, "loss": 3.4742, "step": 100 }, { "epoch": 0.10600706713780919, "grad_norm": 135731.703125, "learning_rate": 4.89469964664311e-05, "loss": 2.4982, "step": 150 }, { "epoch": 0.1413427561837456, "grad_norm": 253683.546875, "learning_rate": 4.859363957597173e-05, "loss": 2.2733, "step": 200 }, { "epoch": 0.17667844522968199, "grad_norm": 447967.6875, "learning_rate": 4.8240282685512374e-05, "loss": 2.1743, "step": 250 }, { "epoch": 0.21201413427561838, "grad_norm": 248496.0, "learning_rate": 4.788692579505301e-05, "loss": 2.0272, "step": 300 }, { "epoch": 0.24734982332155478, "grad_norm": 137680.71875, "learning_rate": 4.753356890459364e-05, "loss": 1.9573, "step": 350 }, { "epoch": 0.2826855123674912, "grad_norm": 165613.9375, "learning_rate": 4.718021201413428e-05, "loss": 1.9568, "step": 400 }, { "epoch": 0.31802120141342755, "grad_norm": 139130.828125, "learning_rate": 4.682685512367491e-05, "loss": 1.8366, "step": 450 }, { "epoch": 0.35335689045936397, "grad_norm": 141737.296875, "learning_rate": 4.647349823321555e-05, "loss": 1.7679, "step": 500 }, { "epoch": 0.38869257950530034, "grad_norm": 151085.828125, "learning_rate": 4.612014134275618e-05, "loss": 1.7186, "step": 550 }, { "epoch": 0.42402826855123676, "grad_norm": 143353.3125, "learning_rate": 4.5766784452296823e-05, "loss": 1.6871, "step": 600 }, { "epoch": 0.45936395759717313, "grad_norm": 129146.640625, "learning_rate": 4.541342756183746e-05, "loss": 1.6727, "step": 650 }, { "epoch": 0.49469964664310956, "grad_norm": 318427.25, "learning_rate": 4.50600706713781e-05, "loss": 1.6139, "step": 700 }, { "epoch": 0.5300353356890459, "grad_norm": 167751.1875, "learning_rate": 4.470671378091873e-05, "loss": 1.6485, "step": 750 }, { "epoch": 0.5653710247349824, "grad_norm": 185479.96875, "learning_rate": 4.435335689045937e-05, "loss": 1.5644, "step": 800 }, { "epoch": 0.6007067137809188, "grad_norm": 162241.875, "learning_rate": 4.4000000000000006e-05, "loss": 1.5162, "step": 850 }, { "epoch": 0.6360424028268551, "grad_norm": 220409.78125, "learning_rate": 4.364664310954063e-05, "loss": 1.5365, "step": 900 }, { "epoch": 0.6713780918727915, "grad_norm": 219547.015625, "learning_rate": 4.329328621908127e-05, "loss": 1.4956, "step": 950 }, { "epoch": 0.7067137809187279, "grad_norm": 151856.1875, "learning_rate": 4.293992932862191e-05, "loss": 1.4474, "step": 1000 }, { "epoch": 0.7420494699646644, "grad_norm": 139005.203125, "learning_rate": 4.258657243816255e-05, "loss": 1.4196, "step": 1050 }, { "epoch": 0.7773851590106007, "grad_norm": 207968.453125, "learning_rate": 4.223321554770318e-05, "loss": 1.4448, "step": 1100 }, { "epoch": 0.8127208480565371, "grad_norm": 226858.75, "learning_rate": 4.187985865724382e-05, "loss": 1.3649, "step": 1150 }, { "epoch": 0.8480565371024735, "grad_norm": 139849.125, "learning_rate": 4.1526501766784455e-05, "loss": 1.3146, "step": 1200 }, { "epoch": 0.8833922261484098, "grad_norm": 191754.421875, "learning_rate": 4.1173144876325096e-05, "loss": 1.3667, "step": 1250 }, { "epoch": 0.9187279151943463, "grad_norm": 399802.3125, "learning_rate": 4.081978798586573e-05, "loss": 1.3588, "step": 1300 }, { "epoch": 0.9540636042402827, "grad_norm": 163376.953125, "learning_rate": 4.0466431095406356e-05, "loss": 1.305, "step": 1350 }, { "epoch": 0.9893992932862191, "grad_norm": 235141.46875, "learning_rate": 4.0113074204947e-05, "loss": 1.3144, "step": 1400 }, { "epoch": 1.0, "eval_bertscore_f1": 0.9499767605989876, "eval_bleu": 0.41483915022966605, "eval_loss": 0.6510708332061768, "eval_meteor": 0.6007343806481693, "eval_rouge1": 0.7698895071809828, "eval_rouge2": 0.6286208265783073, "eval_runtime": 476.7994, "eval_samples_per_second": 13.557, "eval_steps_per_second": 0.847, "step": 1415 }, { "epoch": 1.0247349823321554, "grad_norm": 154460.578125, "learning_rate": 3.975971731448763e-05, "loss": 1.3006, "step": 1450 }, { "epoch": 1.0600706713780919, "grad_norm": 152772.09375, "learning_rate": 3.940636042402827e-05, "loss": 1.3061, "step": 1500 }, { "epoch": 1.0954063604240283, "grad_norm": 157520.671875, "learning_rate": 3.9053003533568905e-05, "loss": 1.2585, "step": 1550 }, { "epoch": 1.1307420494699647, "grad_norm": 159070.796875, "learning_rate": 3.8699646643109545e-05, "loss": 1.2477, "step": 1600 }, { "epoch": 1.1660777385159011, "grad_norm": 162802.515625, "learning_rate": 3.834628975265018e-05, "loss": 1.2697, "step": 1650 }, { "epoch": 1.2014134275618376, "grad_norm": 218109.6875, "learning_rate": 3.799293286219082e-05, "loss": 1.2485, "step": 1700 }, { "epoch": 1.2367491166077738, "grad_norm": 201347.4375, "learning_rate": 3.763957597173145e-05, "loss": 1.218, "step": 1750 }, { "epoch": 1.2720848056537102, "grad_norm": 171368.71875, "learning_rate": 3.728621908127209e-05, "loss": 1.2826, "step": 1800 }, { "epoch": 1.3074204946996466, "grad_norm": 178375.859375, "learning_rate": 3.693286219081272e-05, "loss": 1.2763, "step": 1850 }, { "epoch": 1.342756183745583, "grad_norm": 163735.65625, "learning_rate": 3.6579505300353355e-05, "loss": 1.2008, "step": 1900 }, { "epoch": 1.3780918727915195, "grad_norm": 233520.015625, "learning_rate": 3.6226148409893995e-05, "loss": 1.2451, "step": 1950 }, { "epoch": 1.4134275618374559, "grad_norm": 416601.09375, "learning_rate": 3.587279151943463e-05, "loss": 1.189, "step": 2000 }, { "epoch": 1.4487632508833923, "grad_norm": 242072.71875, "learning_rate": 3.551943462897527e-05, "loss": 1.1835, "step": 2050 }, { "epoch": 1.4840989399293285, "grad_norm": 165171.296875, "learning_rate": 3.51660777385159e-05, "loss": 1.1288, "step": 2100 }, { "epoch": 1.5194346289752652, "grad_norm": 178379.890625, "learning_rate": 3.4812720848056544e-05, "loss": 1.1147, "step": 2150 }, { "epoch": 1.5547703180212014, "grad_norm": 189740.796875, "learning_rate": 3.445936395759717e-05, "loss": 1.0938, "step": 2200 }, { "epoch": 1.5901060070671378, "grad_norm": 130276.1171875, "learning_rate": 3.410600706713781e-05, "loss": 1.1391, "step": 2250 }, { "epoch": 1.6254416961130742, "grad_norm": 183882.953125, "learning_rate": 3.3752650176678445e-05, "loss": 1.154, "step": 2300 }, { "epoch": 1.6607773851590106, "grad_norm": 161704.109375, "learning_rate": 3.3399293286219085e-05, "loss": 1.1451, "step": 2350 }, { "epoch": 1.696113074204947, "grad_norm": 191092.828125, "learning_rate": 3.304593639575972e-05, "loss": 1.189, "step": 2400 }, { "epoch": 1.7314487632508833, "grad_norm": 628473.625, "learning_rate": 3.269257950530035e-05, "loss": 1.1158, "step": 2450 }, { "epoch": 1.76678445229682, "grad_norm": 170168.171875, "learning_rate": 3.233922261484099e-05, "loss": 1.112, "step": 2500 }, { "epoch": 1.802120141342756, "grad_norm": 176820.375, "learning_rate": 3.198586572438163e-05, "loss": 1.1347, "step": 2550 }, { "epoch": 1.8374558303886925, "grad_norm": 184860.6875, "learning_rate": 3.163250883392227e-05, "loss": 1.1363, "step": 2600 }, { "epoch": 1.872791519434629, "grad_norm": 188585.625, "learning_rate": 3.1279151943462894e-05, "loss": 1.076, "step": 2650 }, { "epoch": 1.9081272084805654, "grad_norm": 210377.1875, "learning_rate": 3.0925795053003535e-05, "loss": 1.0597, "step": 2700 }, { "epoch": 1.9434628975265018, "grad_norm": 190550.59375, "learning_rate": 3.057243816254417e-05, "loss": 1.117, "step": 2750 }, { "epoch": 1.978798586572438, "grad_norm": 252235.53125, "learning_rate": 3.021908127208481e-05, "loss": 1.1122, "step": 2800 }, { "epoch": 2.0, "eval_bertscore_f1": 0.9582845463629553, "eval_bleu": 0.49983486433792085, "eval_loss": 0.4666912853717804, "eval_meteor": 0.669274930453781, "eval_rouge1": 0.8072540196527545, "eval_rouge2": 0.70742760483214, "eval_runtime": 458.6727, "eval_samples_per_second": 14.093, "eval_steps_per_second": 0.881, "step": 2830 }, { "epoch": 2.0141342756183747, "grad_norm": 257279.75, "learning_rate": 2.9865724381625443e-05, "loss": 1.0781, "step": 2850 }, { "epoch": 2.049469964664311, "grad_norm": 189275.46875, "learning_rate": 2.9512367491166083e-05, "loss": 1.1029, "step": 2900 }, { "epoch": 2.0848056537102475, "grad_norm": 194805.359375, "learning_rate": 2.9159010600706717e-05, "loss": 1.0469, "step": 2950 }, { "epoch": 2.1201413427561837, "grad_norm": 201691.625, "learning_rate": 2.8805653710247347e-05, "loss": 1.0245, "step": 3000 }, { "epoch": 2.1554770318021204, "grad_norm": 235752.0, "learning_rate": 2.8452296819787988e-05, "loss": 1.0363, "step": 3050 }, { "epoch": 2.1908127208480566, "grad_norm": 173017.640625, "learning_rate": 2.809893992932862e-05, "loss": 1.0579, "step": 3100 }, { "epoch": 2.2261484098939928, "grad_norm": 184098.203125, "learning_rate": 2.7745583038869262e-05, "loss": 1.1153, "step": 3150 }, { "epoch": 2.2614840989399294, "grad_norm": 240071.0, "learning_rate": 2.7392226148409892e-05, "loss": 1.1175, "step": 3200 }, { "epoch": 2.2968197879858656, "grad_norm": 243769.109375, "learning_rate": 2.7038869257950533e-05, "loss": 1.027, "step": 3250 }, { "epoch": 2.3321554770318023, "grad_norm": 464112.78125, "learning_rate": 2.6685512367491167e-05, "loss": 1.0759, "step": 3300 }, { "epoch": 2.3674911660777385, "grad_norm": 169012.796875, "learning_rate": 2.6332155477031807e-05, "loss": 1.0839, "step": 3350 }, { "epoch": 2.402826855123675, "grad_norm": 166011.828125, "learning_rate": 2.597879858657244e-05, "loss": 1.0121, "step": 3400 }, { "epoch": 2.4381625441696113, "grad_norm": 158634.28125, "learning_rate": 2.562544169611307e-05, "loss": 1.06, "step": 3450 }, { "epoch": 2.4734982332155475, "grad_norm": 162840.34375, "learning_rate": 2.5272084805653712e-05, "loss": 0.9904, "step": 3500 }, { "epoch": 2.508833922261484, "grad_norm": 162062.4375, "learning_rate": 2.491872791519435e-05, "loss": 1.0173, "step": 3550 }, { "epoch": 2.5441696113074204, "grad_norm": 193363.75, "learning_rate": 2.4565371024734986e-05, "loss": 1.0344, "step": 3600 }, { "epoch": 2.579505300353357, "grad_norm": 190665.90625, "learning_rate": 2.421201413427562e-05, "loss": 1.0007, "step": 3650 }, { "epoch": 2.614840989399293, "grad_norm": 175752.640625, "learning_rate": 2.3858657243816253e-05, "loss": 1.0578, "step": 3700 }, { "epoch": 2.65017667844523, "grad_norm": 175319.953125, "learning_rate": 2.350530035335689e-05, "loss": 1.0294, "step": 3750 }, { "epoch": 2.685512367491166, "grad_norm": 169006.34375, "learning_rate": 2.3151943462897528e-05, "loss": 1.0066, "step": 3800 }, { "epoch": 2.7208480565371023, "grad_norm": 192368.734375, "learning_rate": 2.2798586572438165e-05, "loss": 0.9715, "step": 3850 }, { "epoch": 2.756183745583039, "grad_norm": 167620.265625, "learning_rate": 2.24452296819788e-05, "loss": 0.9757, "step": 3900 }, { "epoch": 2.791519434628975, "grad_norm": 216407.484375, "learning_rate": 2.2091872791519436e-05, "loss": 1.0102, "step": 3950 }, { "epoch": 2.8268551236749118, "grad_norm": 169598.328125, "learning_rate": 2.1738515901060073e-05, "loss": 1.0011, "step": 4000 }, { "epoch": 2.862190812720848, "grad_norm": 447852.53125, "learning_rate": 2.138515901060071e-05, "loss": 0.9803, "step": 4050 }, { "epoch": 2.8975265017667846, "grad_norm": 183687.234375, "learning_rate": 2.1031802120141344e-05, "loss": 1.0082, "step": 4100 }, { "epoch": 2.932862190812721, "grad_norm": 150918.265625, "learning_rate": 2.067844522968198e-05, "loss": 0.9742, "step": 4150 }, { "epoch": 2.968197879858657, "grad_norm": 174224.703125, "learning_rate": 2.0325088339222618e-05, "loss": 0.9893, "step": 4200 }, { "epoch": 3.0, "eval_bertscore_f1": 0.9605225993430998, "eval_bleu": 0.5316084480437336, "eval_loss": 0.40197497606277466, "eval_meteor": 0.6927241095474356, "eval_rouge1": 0.8163514632828656, "eval_rouge2": 0.7309898153084421, "eval_runtime": 453.3627, "eval_samples_per_second": 14.258, "eval_steps_per_second": 0.891, "step": 4245 }, { "epoch": 3.0035335689045937, "grad_norm": 155634.3125, "learning_rate": 1.997173144876325e-05, "loss": 0.9659, "step": 4250 }, { "epoch": 3.03886925795053, "grad_norm": 441207.90625, "learning_rate": 1.9618374558303885e-05, "loss": 0.9841, "step": 4300 }, { "epoch": 3.0742049469964665, "grad_norm": 171151.390625, "learning_rate": 1.9265017667844522e-05, "loss": 0.9685, "step": 4350 }, { "epoch": 3.1095406360424027, "grad_norm": 178025.75, "learning_rate": 1.891166077738516e-05, "loss": 0.9823, "step": 4400 }, { "epoch": 3.1448763250883394, "grad_norm": 194535.046875, "learning_rate": 1.8558303886925797e-05, "loss": 0.9673, "step": 4450 }, { "epoch": 3.1802120141342756, "grad_norm": 161927.78125, "learning_rate": 1.8204946996466434e-05, "loss": 0.9857, "step": 4500 }, { "epoch": 3.215547703180212, "grad_norm": 162012.375, "learning_rate": 1.7851590106007067e-05, "loss": 0.9981, "step": 4550 }, { "epoch": 3.2508833922261484, "grad_norm": 189190.890625, "learning_rate": 1.7498233215547705e-05, "loss": 0.9984, "step": 4600 }, { "epoch": 3.2862190812720846, "grad_norm": 160611.28125, "learning_rate": 1.714487632508834e-05, "loss": 0.962, "step": 4650 }, { "epoch": 3.3215547703180213, "grad_norm": 160583.796875, "learning_rate": 1.679151943462898e-05, "loss": 0.9382, "step": 4700 }, { "epoch": 3.3568904593639575, "grad_norm": 241355.578125, "learning_rate": 1.643816254416961e-05, "loss": 1.0251, "step": 4750 }, { "epoch": 3.392226148409894, "grad_norm": 186881.078125, "learning_rate": 1.6084805653710246e-05, "loss": 0.9726, "step": 4800 }, { "epoch": 3.4275618374558303, "grad_norm": 189630.578125, "learning_rate": 1.5731448763250883e-05, "loss": 0.912, "step": 4850 }, { "epoch": 3.462897526501767, "grad_norm": 427752.875, "learning_rate": 1.537809187279152e-05, "loss": 0.9862, "step": 4900 }, { "epoch": 3.498233215547703, "grad_norm": 221891.625, "learning_rate": 1.5024734982332156e-05, "loss": 0.9943, "step": 4950 }, { "epoch": 3.53356890459364, "grad_norm": 167730.796875, "learning_rate": 1.4671378091872793e-05, "loss": 0.926, "step": 5000 }, { "epoch": 3.568904593639576, "grad_norm": 193771.1875, "learning_rate": 1.4318021201413428e-05, "loss": 0.993, "step": 5050 }, { "epoch": 3.604240282685512, "grad_norm": 187069.09375, "learning_rate": 1.3964664310954066e-05, "loss": 1.0317, "step": 5100 }, { "epoch": 3.639575971731449, "grad_norm": 188425.546875, "learning_rate": 1.3611307420494701e-05, "loss": 0.9729, "step": 5150 }, { "epoch": 3.674911660777385, "grad_norm": 163470.703125, "learning_rate": 1.3257950530035338e-05, "loss": 0.9688, "step": 5200 }, { "epoch": 3.7102473498233217, "grad_norm": 187701.09375, "learning_rate": 1.2904593639575973e-05, "loss": 0.9105, "step": 5250 }, { "epoch": 3.745583038869258, "grad_norm": 144018.484375, "learning_rate": 1.2551236749116607e-05, "loss": 0.9754, "step": 5300 }, { "epoch": 3.7809187279151946, "grad_norm": 154232.65625, "learning_rate": 1.2197879858657244e-05, "loss": 0.9472, "step": 5350 }, { "epoch": 3.8162544169611308, "grad_norm": 170624.640625, "learning_rate": 1.184452296819788e-05, "loss": 0.9886, "step": 5400 }, { "epoch": 3.851590106007067, "grad_norm": 1157734.625, "learning_rate": 1.1491166077738517e-05, "loss": 0.915, "step": 5450 }, { "epoch": 3.8869257950530036, "grad_norm": 179182.0, "learning_rate": 1.1137809187279152e-05, "loss": 0.9216, "step": 5500 }, { "epoch": 3.92226148409894, "grad_norm": 872790.1875, "learning_rate": 1.078445229681979e-05, "loss": 0.9351, "step": 5550 }, { "epoch": 3.9575971731448765, "grad_norm": 269436.21875, "learning_rate": 1.0431095406360425e-05, "loss": 0.9223, "step": 5600 }, { "epoch": 3.9929328621908127, "grad_norm": 197819.828125, "learning_rate": 1.0077738515901062e-05, "loss": 0.9707, "step": 5650 }, { "epoch": 4.0, "eval_bertscore_f1": 0.9621182246373432, "eval_bleu": 0.5466384314770388, "eval_loss": 0.3729407489299774, "eval_meteor": 0.706948756410978, "eval_rouge1": 0.8231910064206467, "eval_rouge2": 0.7462398210944181, "eval_runtime": 455.8029, "eval_samples_per_second": 14.182, "eval_steps_per_second": 0.886, "step": 5660 }, { "epoch": 4.028268551236749, "grad_norm": 211690.5, "learning_rate": 9.724381625441696e-06, "loss": 0.9475, "step": 5700 }, { "epoch": 4.063604240282685, "grad_norm": 1410444.25, "learning_rate": 9.371024734982333e-06, "loss": 0.943, "step": 5750 }, { "epoch": 4.098939929328622, "grad_norm": 158585.84375, "learning_rate": 9.017667844522968e-06, "loss": 0.9525, "step": 5800 }, { "epoch": 4.134275618374558, "grad_norm": 127129.171875, "learning_rate": 8.664310954063605e-06, "loss": 0.9423, "step": 5850 }, { "epoch": 4.169611307420495, "grad_norm": 158647.09375, "learning_rate": 8.31095406360424e-06, "loss": 0.9317, "step": 5900 }, { "epoch": 4.204946996466431, "grad_norm": 151794.578125, "learning_rate": 7.957597173144876e-06, "loss": 0.9252, "step": 5950 }, { "epoch": 4.240282685512367, "grad_norm": 134228.625, "learning_rate": 7.6042402826855124e-06, "loss": 0.9466, "step": 6000 }, { "epoch": 4.275618374558304, "grad_norm": 153596.578125, "learning_rate": 7.250883392226149e-06, "loss": 0.9165, "step": 6050 }, { "epoch": 4.310954063604241, "grad_norm": 205703.125, "learning_rate": 6.897526501766785e-06, "loss": 0.9143, "step": 6100 }, { "epoch": 4.3462897526501765, "grad_norm": 133143.015625, "learning_rate": 6.544169611307421e-06, "loss": 0.9926, "step": 6150 }, { "epoch": 4.381625441696113, "grad_norm": 131342.953125, "learning_rate": 6.190812720848057e-06, "loss": 0.9561, "step": 6200 }, { "epoch": 4.41696113074205, "grad_norm": 167654.875, "learning_rate": 5.837455830388693e-06, "loss": 0.9183, "step": 6250 }, { "epoch": 4.4522968197879855, "grad_norm": 160178.71875, "learning_rate": 5.484098939929329e-06, "loss": 0.9432, "step": 6300 }, { "epoch": 4.487632508833922, "grad_norm": 169846.84375, "learning_rate": 5.1307420494699655e-06, "loss": 0.9598, "step": 6350 }, { "epoch": 4.522968197879859, "grad_norm": 159304.1875, "learning_rate": 4.777385159010601e-06, "loss": 0.9163, "step": 6400 }, { "epoch": 4.5583038869257955, "grad_norm": 159722.328125, "learning_rate": 4.424028268551237e-06, "loss": 0.9269, "step": 6450 }, { "epoch": 4.593639575971731, "grad_norm": 134695.21875, "learning_rate": 4.070671378091873e-06, "loss": 0.9521, "step": 6500 }, { "epoch": 4.628975265017668, "grad_norm": 214522.46875, "learning_rate": 3.717314487632509e-06, "loss": 0.9487, "step": 6550 }, { "epoch": 4.6643109540636045, "grad_norm": 277130.375, "learning_rate": 3.363957597173145e-06, "loss": 0.9512, "step": 6600 }, { "epoch": 4.69964664310954, "grad_norm": 229889.265625, "learning_rate": 3.010600706713781e-06, "loss": 0.9568, "step": 6650 }, { "epoch": 4.734982332155477, "grad_norm": 160132.65625, "learning_rate": 2.6572438162544172e-06, "loss": 0.9169, "step": 6700 }, { "epoch": 4.770318021201414, "grad_norm": 153591.515625, "learning_rate": 2.303886925795053e-06, "loss": 0.9247, "step": 6750 }, { "epoch": 4.80565371024735, "grad_norm": 198496.34375, "learning_rate": 1.9505300353356893e-06, "loss": 0.963, "step": 6800 }, { "epoch": 4.840989399293286, "grad_norm": 217991.0625, "learning_rate": 1.5971731448763252e-06, "loss": 0.9591, "step": 6850 }, { "epoch": 4.876325088339223, "grad_norm": 170718.59375, "learning_rate": 1.2438162544169612e-06, "loss": 0.9099, "step": 6900 }, { "epoch": 4.911660777385159, "grad_norm": 138367.578125, "learning_rate": 8.904593639575972e-07, "loss": 0.9267, "step": 6950 }, { "epoch": 4.946996466431095, "grad_norm": 170947.796875, "learning_rate": 5.371024734982332e-07, "loss": 0.9668, "step": 7000 }, { "epoch": 4.982332155477032, "grad_norm": 127770.6484375, "learning_rate": 1.8374558303886926e-07, "loss": 0.9397, "step": 7050 }, { "epoch": 5.0, "eval_bertscore_f1": 0.9626803387159316, "eval_bleu": 0.5519723913292562, "eval_loss": 0.36424726247787476, "eval_meteor": 0.7112807593232308, "eval_rouge1": 0.8253264091986198, "eval_rouge2": 0.750184658379115, "eval_runtime": 454.9282, "eval_samples_per_second": 14.209, "eval_steps_per_second": 0.888, "step": 7075 } ], "logging_steps": 50, "max_steps": 7075, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.730048539557888e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }