| { | |
| "best_global_step": 7075, | |
| "best_metric": 0.957730518736978, | |
| "best_model_checkpoint": "/kaggle/working/sparse_lora/checkpoint-7075", | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 7075, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0353356890459364, | |
| "grad_norm": 87870.6484375, | |
| "learning_rate": 4.9653710247349825e-05, | |
| "loss": 4.5998, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0706713780918728, | |
| "grad_norm": 671269.4375, | |
| "learning_rate": 4.930035335689046e-05, | |
| "loss": 3.6646, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.10600706713780919, | |
| "grad_norm": 103706.0, | |
| "learning_rate": 4.89469964664311e-05, | |
| "loss": 3.0394, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1413427561837456, | |
| "grad_norm": 60311.45703125, | |
| "learning_rate": 4.859363957597173e-05, | |
| "loss": 2.1607, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.17667844522968199, | |
| "grad_norm": 56506.35546875, | |
| "learning_rate": 4.8240282685512374e-05, | |
| "loss": 2.0396, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.21201413427561838, | |
| "grad_norm": 64563.61328125, | |
| "learning_rate": 4.788692579505301e-05, | |
| "loss": 1.8784, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.24734982332155478, | |
| "grad_norm": 84701.9765625, | |
| "learning_rate": 4.753356890459364e-05, | |
| "loss": 1.823, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.2826855123674912, | |
| "grad_norm": 80899.9765625, | |
| "learning_rate": 4.718021201413428e-05, | |
| "loss": 1.825, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.31802120141342755, | |
| "grad_norm": 83637.8671875, | |
| "learning_rate": 4.682685512367491e-05, | |
| "loss": 1.6937, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.35335689045936397, | |
| "grad_norm": 172655.1875, | |
| "learning_rate": 4.647349823321555e-05, | |
| "loss": 1.6309, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.38869257950530034, | |
| "grad_norm": 212648.234375, | |
| "learning_rate": 4.612014134275618e-05, | |
| "loss": 1.5667, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.42402826855123676, | |
| "grad_norm": 85414.8125, | |
| "learning_rate": 4.5766784452296823e-05, | |
| "loss": 1.5433, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.45936395759717313, | |
| "grad_norm": 84665.4296875, | |
| "learning_rate": 4.541342756183746e-05, | |
| "loss": 1.5225, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.49469964664310956, | |
| "grad_norm": 87850.546875, | |
| "learning_rate": 4.50600706713781e-05, | |
| "loss": 1.4696, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5300353356890459, | |
| "grad_norm": 105920.03125, | |
| "learning_rate": 4.470671378091873e-05, | |
| "loss": 1.5046, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5653710247349824, | |
| "grad_norm": 128489.2734375, | |
| "learning_rate": 4.435335689045937e-05, | |
| "loss": 1.4268, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6007067137809188, | |
| "grad_norm": 126656.5390625, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 1.3749, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.6360424028268551, | |
| "grad_norm": 90037.234375, | |
| "learning_rate": 4.364664310954063e-05, | |
| "loss": 1.4043, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.6713780918727915, | |
| "grad_norm": 123195.953125, | |
| "learning_rate": 4.329328621908127e-05, | |
| "loss": 1.3655, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.7067137809187279, | |
| "grad_norm": 106699.8984375, | |
| "learning_rate": 4.293992932862191e-05, | |
| "loss": 1.3125, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7420494699646644, | |
| "grad_norm": 141920.890625, | |
| "learning_rate": 4.258657243816255e-05, | |
| "loss": 1.2784, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.7773851590106007, | |
| "grad_norm": 155978.375, | |
| "learning_rate": 4.223321554770318e-05, | |
| "loss": 1.307, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.8127208480565371, | |
| "grad_norm": 151003.53125, | |
| "learning_rate": 4.187985865724382e-05, | |
| "loss": 1.2263, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.8480565371024735, | |
| "grad_norm": 122458.796875, | |
| "learning_rate": 4.1526501766784455e-05, | |
| "loss": 1.1795, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.8833922261484098, | |
| "grad_norm": 137286.90625, | |
| "learning_rate": 4.1173144876325096e-05, | |
| "loss": 1.2285, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.9187279151943463, | |
| "grad_norm": 112117.640625, | |
| "learning_rate": 4.081978798586573e-05, | |
| "loss": 1.2263, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.9540636042402827, | |
| "grad_norm": 127897.46875, | |
| "learning_rate": 4.0466431095406356e-05, | |
| "loss": 1.1636, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.9893992932862191, | |
| "grad_norm": 121189.921875, | |
| "learning_rate": 4.0113074204947e-05, | |
| "loss": 1.1762, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_bertscore_f1": 0.9435882283983255, | |
| "eval_bleu": 0.34081729524428495, | |
| "eval_loss": 0.8283334970474243, | |
| "eval_meteor": 0.5505336031382576, | |
| "eval_rouge1": 0.741807000391657, | |
| "eval_rouge2": 0.561792451504467, | |
| "eval_runtime": 458.3629, | |
| "eval_samples_per_second": 14.102, | |
| "eval_steps_per_second": 0.881, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 1.0247349823321554, | |
| "grad_norm": 128025.5703125, | |
| "learning_rate": 3.975971731448763e-05, | |
| "loss": 1.1689, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.0600706713780919, | |
| "grad_norm": 117022.6953125, | |
| "learning_rate": 3.940636042402827e-05, | |
| "loss": 1.1721, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.0954063604240283, | |
| "grad_norm": 127291.71875, | |
| "learning_rate": 3.9053003533568905e-05, | |
| "loss": 1.1144, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.1307420494699647, | |
| "grad_norm": 432520.5, | |
| "learning_rate": 3.8699646643109545e-05, | |
| "loss": 1.1173, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.1660777385159011, | |
| "grad_norm": 133514.078125, | |
| "learning_rate": 3.834628975265018e-05, | |
| "loss": 1.1349, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.2014134275618376, | |
| "grad_norm": 196108.515625, | |
| "learning_rate": 3.799293286219082e-05, | |
| "loss": 1.1105, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.2367491166077738, | |
| "grad_norm": 141071.890625, | |
| "learning_rate": 3.763957597173145e-05, | |
| "loss": 1.0843, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.2720848056537102, | |
| "grad_norm": 132118.53125, | |
| "learning_rate": 3.728621908127209e-05, | |
| "loss": 1.1424, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.3074204946996466, | |
| "grad_norm": 160950.84375, | |
| "learning_rate": 3.693286219081272e-05, | |
| "loss": 1.1462, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.342756183745583, | |
| "grad_norm": 128804.078125, | |
| "learning_rate": 3.6579505300353355e-05, | |
| "loss": 1.0572, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.3780918727915195, | |
| "grad_norm": 157611.953125, | |
| "learning_rate": 3.6226148409893995e-05, | |
| "loss": 1.0987, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.4134275618374559, | |
| "grad_norm": 141798.90625, | |
| "learning_rate": 3.587279151943463e-05, | |
| "loss": 1.0541, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.4487632508833923, | |
| "grad_norm": 171915.71875, | |
| "learning_rate": 3.551943462897527e-05, | |
| "loss": 1.0487, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.4840989399293285, | |
| "grad_norm": 127521.7109375, | |
| "learning_rate": 3.51660777385159e-05, | |
| "loss": 0.9808, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.5194346289752652, | |
| "grad_norm": 144515.828125, | |
| "learning_rate": 3.4812720848056544e-05, | |
| "loss": 0.9844, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.5547703180212014, | |
| "grad_norm": 135642.578125, | |
| "learning_rate": 3.445936395759717e-05, | |
| "loss": 0.9523, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.5901060070671378, | |
| "grad_norm": 211087.34375, | |
| "learning_rate": 3.410600706713781e-05, | |
| "loss": 0.9904, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.6254416961130742, | |
| "grad_norm": 157448.421875, | |
| "learning_rate": 3.3752650176678445e-05, | |
| "loss": 1.0143, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.6607773851590106, | |
| "grad_norm": 185968.609375, | |
| "learning_rate": 3.3399293286219085e-05, | |
| "loss": 1.0191, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.696113074204947, | |
| "grad_norm": 138438.078125, | |
| "learning_rate": 3.304593639575972e-05, | |
| "loss": 1.0524, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.7314487632508833, | |
| "grad_norm": 151158.640625, | |
| "learning_rate": 3.269257950530035e-05, | |
| "loss": 0.9748, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.76678445229682, | |
| "grad_norm": 146153.25, | |
| "learning_rate": 3.233922261484099e-05, | |
| "loss": 0.9595, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.802120141342756, | |
| "grad_norm": 145571.171875, | |
| "learning_rate": 3.198586572438163e-05, | |
| "loss": 0.9884, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.8374558303886925, | |
| "grad_norm": 200024.28125, | |
| "learning_rate": 3.163250883392227e-05, | |
| "loss": 0.9961, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.872791519434629, | |
| "grad_norm": 1255579.375, | |
| "learning_rate": 3.1279151943462894e-05, | |
| "loss": 0.9381, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.9081272084805654, | |
| "grad_norm": 191306.1875, | |
| "learning_rate": 3.0925795053003535e-05, | |
| "loss": 0.9179, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.9434628975265018, | |
| "grad_norm": 139976.265625, | |
| "learning_rate": 3.057243816254417e-05, | |
| "loss": 0.9741, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.978798586572438, | |
| "grad_norm": 254590.484375, | |
| "learning_rate": 3.021908127208481e-05, | |
| "loss": 0.97, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_bertscore_f1": 0.9522204981784862, | |
| "eval_bleu": 0.4391766527339286, | |
| "eval_loss": 0.6113855838775635, | |
| "eval_meteor": 0.6141492660226205, | |
| "eval_rouge1": 0.7802485332799334, | |
| "eval_rouge2": 0.6456575876833133, | |
| "eval_runtime": 442.4344, | |
| "eval_samples_per_second": 14.61, | |
| "eval_steps_per_second": 0.913, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 2.0141342756183747, | |
| "grad_norm": 251050.734375, | |
| "learning_rate": 2.9865724381625443e-05, | |
| "loss": 0.9362, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 2.049469964664311, | |
| "grad_norm": 165134.1875, | |
| "learning_rate": 2.9512367491166083e-05, | |
| "loss": 0.9642, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.0848056537102475, | |
| "grad_norm": 167977.453125, | |
| "learning_rate": 2.9159010600706717e-05, | |
| "loss": 0.899, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.1201413427561837, | |
| "grad_norm": 137508.921875, | |
| "learning_rate": 2.8805653710247347e-05, | |
| "loss": 0.8828, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.1554770318021204, | |
| "grad_norm": 155117.59375, | |
| "learning_rate": 2.8452296819787988e-05, | |
| "loss": 0.8958, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 2.1908127208480566, | |
| "grad_norm": 155701.109375, | |
| "learning_rate": 2.809893992932862e-05, | |
| "loss": 0.9106, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.2261484098939928, | |
| "grad_norm": 148495.125, | |
| "learning_rate": 2.7745583038869262e-05, | |
| "loss": 0.9755, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 2.2614840989399294, | |
| "grad_norm": 179815.203125, | |
| "learning_rate": 2.7392226148409892e-05, | |
| "loss": 0.9842, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.2968197879858656, | |
| "grad_norm": 193006.0625, | |
| "learning_rate": 2.7038869257950533e-05, | |
| "loss": 0.8794, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.3321554770318023, | |
| "grad_norm": 181638.421875, | |
| "learning_rate": 2.6685512367491167e-05, | |
| "loss": 0.9415, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.3674911660777385, | |
| "grad_norm": 172265.5, | |
| "learning_rate": 2.6332155477031807e-05, | |
| "loss": 0.9453, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.402826855123675, | |
| "grad_norm": 142922.71875, | |
| "learning_rate": 2.597879858657244e-05, | |
| "loss": 0.8669, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.4381625441696113, | |
| "grad_norm": 153024.515625, | |
| "learning_rate": 2.562544169611307e-05, | |
| "loss": 0.9168, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.4734982332155475, | |
| "grad_norm": 325320.28125, | |
| "learning_rate": 2.5272084805653712e-05, | |
| "loss": 0.8523, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.508833922261484, | |
| "grad_norm": 180908.15625, | |
| "learning_rate": 2.491872791519435e-05, | |
| "loss": 0.8853, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.5441696113074204, | |
| "grad_norm": 171192.765625, | |
| "learning_rate": 2.4565371024734986e-05, | |
| "loss": 0.9041, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.579505300353357, | |
| "grad_norm": 148674.578125, | |
| "learning_rate": 2.421201413427562e-05, | |
| "loss": 0.865, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.614840989399293, | |
| "grad_norm": 189248.390625, | |
| "learning_rate": 2.3858657243816253e-05, | |
| "loss": 0.9255, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.65017667844523, | |
| "grad_norm": 159843.796875, | |
| "learning_rate": 2.350530035335689e-05, | |
| "loss": 0.8886, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.685512367491166, | |
| "grad_norm": 162496.359375, | |
| "learning_rate": 2.3151943462897528e-05, | |
| "loss": 0.8689, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.7208480565371023, | |
| "grad_norm": 135053.5625, | |
| "learning_rate": 2.2798586572438165e-05, | |
| "loss": 0.8275, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.756183745583039, | |
| "grad_norm": 249403.953125, | |
| "learning_rate": 2.24452296819788e-05, | |
| "loss": 0.8319, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.791519434628975, | |
| "grad_norm": 181885.328125, | |
| "learning_rate": 2.2091872791519436e-05, | |
| "loss": 0.8719, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.8268551236749118, | |
| "grad_norm": 156204.75, | |
| "learning_rate": 2.1738515901060073e-05, | |
| "loss": 0.8592, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.862190812720848, | |
| "grad_norm": 159614.03125, | |
| "learning_rate": 2.138515901060071e-05, | |
| "loss": 0.8381, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.8975265017667846, | |
| "grad_norm": 162379.78125, | |
| "learning_rate": 2.1031802120141344e-05, | |
| "loss": 0.8612, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.932862190812721, | |
| "grad_norm": 149608.25, | |
| "learning_rate": 2.067844522968198e-05, | |
| "loss": 0.838, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.968197879858657, | |
| "grad_norm": 145233.40625, | |
| "learning_rate": 2.0325088339222618e-05, | |
| "loss": 0.8411, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_bertscore_f1": 0.955309976823628, | |
| "eval_bleu": 0.47422911395097656, | |
| "eval_loss": 0.5273156762123108, | |
| "eval_meteor": 0.6439931966505934, | |
| "eval_rouge1": 0.7960674625670938, | |
| "eval_rouge2": 0.6807866947718111, | |
| "eval_runtime": 440.5737, | |
| "eval_samples_per_second": 14.672, | |
| "eval_steps_per_second": 0.917, | |
| "step": 4245 | |
| }, | |
| { | |
| "epoch": 3.0035335689045937, | |
| "grad_norm": 146904.109375, | |
| "learning_rate": 1.997173144876325e-05, | |
| "loss": 0.8212, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 3.03886925795053, | |
| "grad_norm": 141972.140625, | |
| "learning_rate": 1.9618374558303885e-05, | |
| "loss": 0.8332, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 3.0742049469964665, | |
| "grad_norm": 155438.34375, | |
| "learning_rate": 1.9265017667844522e-05, | |
| "loss": 0.8314, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 3.1095406360424027, | |
| "grad_norm": 157805.5, | |
| "learning_rate": 1.891166077738516e-05, | |
| "loss": 0.8432, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 3.1448763250883394, | |
| "grad_norm": 171890.296875, | |
| "learning_rate": 1.8558303886925797e-05, | |
| "loss": 0.8171, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 3.1802120141342756, | |
| "grad_norm": 159529.09375, | |
| "learning_rate": 1.8204946996466434e-05, | |
| "loss": 0.8462, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.215547703180212, | |
| "grad_norm": 291361.3125, | |
| "learning_rate": 1.7851590106007067e-05, | |
| "loss": 0.8525, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 3.2508833922261484, | |
| "grad_norm": 146320.515625, | |
| "learning_rate": 1.7498233215547705e-05, | |
| "loss": 0.8691, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 3.2862190812720846, | |
| "grad_norm": 142328.234375, | |
| "learning_rate": 1.714487632508834e-05, | |
| "loss": 0.8152, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 3.3215547703180213, | |
| "grad_norm": 164441.828125, | |
| "learning_rate": 1.679151943462898e-05, | |
| "loss": 0.7897, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 3.3568904593639575, | |
| "grad_norm": 181799.328125, | |
| "learning_rate": 1.643816254416961e-05, | |
| "loss": 0.89, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 3.392226148409894, | |
| "grad_norm": 161868.921875, | |
| "learning_rate": 1.6084805653710246e-05, | |
| "loss": 0.8342, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 3.4275618374558303, | |
| "grad_norm": 168403.046875, | |
| "learning_rate": 1.5731448763250883e-05, | |
| "loss": 0.7708, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 3.462897526501767, | |
| "grad_norm": 180798.71875, | |
| "learning_rate": 1.537809187279152e-05, | |
| "loss": 0.8452, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 3.498233215547703, | |
| "grad_norm": 194585.109375, | |
| "learning_rate": 1.5024734982332156e-05, | |
| "loss": 0.8543, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 3.53356890459364, | |
| "grad_norm": 162353.515625, | |
| "learning_rate": 1.4671378091872793e-05, | |
| "loss": 0.7865, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.568904593639576, | |
| "grad_norm": 161337.140625, | |
| "learning_rate": 1.4318021201413428e-05, | |
| "loss": 0.8459, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 3.604240282685512, | |
| "grad_norm": 174313.953125, | |
| "learning_rate": 1.3964664310954066e-05, | |
| "loss": 0.8914, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 3.639575971731449, | |
| "grad_norm": 163668.421875, | |
| "learning_rate": 1.3611307420494701e-05, | |
| "loss": 0.8457, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 3.674911660777385, | |
| "grad_norm": 164355.765625, | |
| "learning_rate": 1.3257950530035338e-05, | |
| "loss": 0.8266, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 3.7102473498233217, | |
| "grad_norm": 159919.046875, | |
| "learning_rate": 1.2904593639575973e-05, | |
| "loss": 0.7511, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 3.745583038869258, | |
| "grad_norm": 131121.03125, | |
| "learning_rate": 1.2551236749116607e-05, | |
| "loss": 0.8333, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 3.7809187279151946, | |
| "grad_norm": 160561.640625, | |
| "learning_rate": 1.2197879858657244e-05, | |
| "loss": 0.7992, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 3.8162544169611308, | |
| "grad_norm": 200662.859375, | |
| "learning_rate": 1.184452296819788e-05, | |
| "loss": 0.8494, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 3.851590106007067, | |
| "grad_norm": 158871.84375, | |
| "learning_rate": 1.1491166077738517e-05, | |
| "loss": 0.7689, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 3.8869257950530036, | |
| "grad_norm": 233291.453125, | |
| "learning_rate": 1.1137809187279152e-05, | |
| "loss": 0.7717, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.92226148409894, | |
| "grad_norm": 152039.84375, | |
| "learning_rate": 1.078445229681979e-05, | |
| "loss": 0.7991, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 3.9575971731448765, | |
| "grad_norm": 174898.390625, | |
| "learning_rate": 1.0431095406360425e-05, | |
| "loss": 0.7713, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 3.9929328621908127, | |
| "grad_norm": 185653.25, | |
| "learning_rate": 1.0077738515901062e-05, | |
| "loss": 0.8271, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_bertscore_f1": 0.9574022400700072, | |
| "eval_bleu": 0.49288907686192807, | |
| "eval_loss": 0.4886058270931244, | |
| "eval_meteor": 0.6607653874114606, | |
| "eval_rouge1": 0.8043075620747033, | |
| "eval_rouge2": 0.698973717724064, | |
| "eval_runtime": 437.782, | |
| "eval_samples_per_second": 14.765, | |
| "eval_steps_per_second": 0.923, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 4.028268551236749, | |
| "grad_norm": 189546.390625, | |
| "learning_rate": 9.724381625441696e-06, | |
| "loss": 0.8009, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 4.063604240282685, | |
| "grad_norm": 179902.28125, | |
| "learning_rate": 9.371024734982333e-06, | |
| "loss": 0.7955, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 4.098939929328622, | |
| "grad_norm": 134316.078125, | |
| "learning_rate": 9.017667844522968e-06, | |
| "loss": 0.8093, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 4.134275618374558, | |
| "grad_norm": 113333.8203125, | |
| "learning_rate": 8.664310954063605e-06, | |
| "loss": 0.7959, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 4.169611307420495, | |
| "grad_norm": 153999.46875, | |
| "learning_rate": 8.31095406360424e-06, | |
| "loss": 0.7883, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 4.204946996466431, | |
| "grad_norm": 284284.375, | |
| "learning_rate": 7.957597173144876e-06, | |
| "loss": 0.7858, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 4.240282685512367, | |
| "grad_norm": 139182.28125, | |
| "learning_rate": 7.6042402826855124e-06, | |
| "loss": 0.804, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 4.275618374558304, | |
| "grad_norm": 169185.328125, | |
| "learning_rate": 7.250883392226149e-06, | |
| "loss": 0.7745, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 4.310954063604241, | |
| "grad_norm": 200688.453125, | |
| "learning_rate": 6.897526501766785e-06, | |
| "loss": 0.7795, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 4.3462897526501765, | |
| "grad_norm": 135357.96875, | |
| "learning_rate": 6.544169611307421e-06, | |
| "loss": 0.8412, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 4.381625441696113, | |
| "grad_norm": 137914.25, | |
| "learning_rate": 6.190812720848057e-06, | |
| "loss": 0.811, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 4.41696113074205, | |
| "grad_norm": 179002.140625, | |
| "learning_rate": 5.837455830388693e-06, | |
| "loss": 0.7859, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 4.4522968197879855, | |
| "grad_norm": 139846.9375, | |
| "learning_rate": 5.484098939929329e-06, | |
| "loss": 0.801, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 4.487632508833922, | |
| "grad_norm": 161350.671875, | |
| "learning_rate": 5.1307420494699655e-06, | |
| "loss": 0.8203, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 4.522968197879859, | |
| "grad_norm": 153958.46875, | |
| "learning_rate": 4.777385159010601e-06, | |
| "loss": 0.7693, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 4.5583038869257955, | |
| "grad_norm": 154714.984375, | |
| "learning_rate": 4.424028268551237e-06, | |
| "loss": 0.7808, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 4.593639575971731, | |
| "grad_norm": 294880.1875, | |
| "learning_rate": 4.070671378091873e-06, | |
| "loss": 0.8194, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 4.628975265017668, | |
| "grad_norm": 480907.34375, | |
| "learning_rate": 3.717314487632509e-06, | |
| "loss": 0.8096, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 4.6643109540636045, | |
| "grad_norm": 319982.71875, | |
| "learning_rate": 3.363957597173145e-06, | |
| "loss": 0.7987, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 4.69964664310954, | |
| "grad_norm": 188937.359375, | |
| "learning_rate": 3.010600706713781e-06, | |
| "loss": 0.8162, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 4.734982332155477, | |
| "grad_norm": 166750.71875, | |
| "learning_rate": 2.6572438162544172e-06, | |
| "loss": 0.7726, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 4.770318021201414, | |
| "grad_norm": 179732.5625, | |
| "learning_rate": 2.303886925795053e-06, | |
| "loss": 0.7848, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 4.80565371024735, | |
| "grad_norm": 135768.640625, | |
| "learning_rate": 1.9505300353356893e-06, | |
| "loss": 0.8204, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 4.840989399293286, | |
| "grad_norm": 194958.0625, | |
| "learning_rate": 1.5971731448763252e-06, | |
| "loss": 0.8214, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 4.876325088339223, | |
| "grad_norm": 205979.40625, | |
| "learning_rate": 1.2438162544169612e-06, | |
| "loss": 0.7658, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 4.911660777385159, | |
| "grad_norm": 140958.078125, | |
| "learning_rate": 8.904593639575972e-07, | |
| "loss": 0.777, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 4.946996466431095, | |
| "grad_norm": 149027.953125, | |
| "learning_rate": 5.371024734982332e-07, | |
| "loss": 0.8286, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 4.982332155477032, | |
| "grad_norm": 161736.4375, | |
| "learning_rate": 1.8374558303886926e-07, | |
| "loss": 0.7922, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_bertscore_f1": 0.957730518736978, | |
| "eval_bleu": 0.49562010024897835, | |
| "eval_loss": 0.47903409600257874, | |
| "eval_meteor": 0.6644886697409358, | |
| "eval_rouge1": 0.8053692499714378, | |
| "eval_rouge2": 0.7018033389816531, | |
| "eval_runtime": 438.2906, | |
| "eval_samples_per_second": 14.748, | |
| "eval_steps_per_second": 0.922, | |
| "step": 7075 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 7075, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.730048539557888e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |