Lora_1B_TR / checkpoint-8238 /trainer_state.json
Codex07's picture
Upload folder using huggingface_hub
2dce28f verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 8238,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.012138868657441126,
"grad_norm": 2.9070301055908203,
"learning_rate": 1.975806451612903e-05,
"loss": 2.9199,
"step": 50
},
{
"epoch": 0.024277737314882253,
"grad_norm": 1.895039439201355,
"learning_rate": 3.991935483870968e-05,
"loss": 2.8553,
"step": 100
},
{
"epoch": 0.03641660597232338,
"grad_norm": 2.2209153175354004,
"learning_rate": 6.0080645161290325e-05,
"loss": 2.8865,
"step": 150
},
{
"epoch": 0.048555474629764506,
"grad_norm": 2.2095389366149902,
"learning_rate": 8.024193548387097e-05,
"loss": 2.7245,
"step": 200
},
{
"epoch": 0.06069434328720563,
"grad_norm": 2.3479413986206055,
"learning_rate": 9.999999613502945e-05,
"loss": 2.7798,
"step": 250
},
{
"epoch": 0.07283321194464676,
"grad_norm": 2.177093744277954,
"learning_rate": 9.99899475483094e-05,
"loss": 2.7718,
"step": 300
},
{
"epoch": 0.08497208060208788,
"grad_norm": 2.8577401638031006,
"learning_rate": 9.996057861608239e-05,
"loss": 2.7981,
"step": 350
},
{
"epoch": 0.09711094925952901,
"grad_norm": 2.592057466506958,
"learning_rate": 9.991190068898889e-05,
"loss": 2.7317,
"step": 400
},
{
"epoch": 0.10924981791697014,
"grad_norm": 1.9039149284362793,
"learning_rate": 9.98439325802986e-05,
"loss": 2.6439,
"step": 450
},
{
"epoch": 0.12138868657441126,
"grad_norm": 1.9637870788574219,
"learning_rate": 9.975670055863974e-05,
"loss": 2.7429,
"step": 500
},
{
"epoch": 0.1335275552318524,
"grad_norm": 2.6924002170562744,
"learning_rate": 9.965023833784636e-05,
"loss": 2.7226,
"step": 550
},
{
"epoch": 0.14566642388929352,
"grad_norm": 1.896791934967041,
"learning_rate": 9.952458706392864e-05,
"loss": 2.6811,
"step": 600
},
{
"epoch": 0.15780529254673464,
"grad_norm": 2.0208559036254883,
"learning_rate": 9.937979529917046e-05,
"loss": 2.6905,
"step": 650
},
{
"epoch": 0.16994416120417577,
"grad_norm": 2.337047815322876,
"learning_rate": 9.921591900336092e-05,
"loss": 2.716,
"step": 700
},
{
"epoch": 0.1820830298616169,
"grad_norm": 1.9349219799041748,
"learning_rate": 9.903302151216671e-05,
"loss": 2.7061,
"step": 750
},
{
"epoch": 0.19422189851905802,
"grad_norm": 1.7132676839828491,
"learning_rate": 9.883117351265385e-05,
"loss": 2.7762,
"step": 800
},
{
"epoch": 0.20636076717649915,
"grad_norm": 2.1389896869659424,
"learning_rate": 9.861045301596821e-05,
"loss": 2.7318,
"step": 850
},
{
"epoch": 0.21849963583394028,
"grad_norm": 3.0611305236816406,
"learning_rate": 9.837094532718541e-05,
"loss": 2.7319,
"step": 900
},
{
"epoch": 0.2306385044913814,
"grad_norm": 1.7353436946868896,
"learning_rate": 9.811274301234174e-05,
"loss": 2.7076,
"step": 950
},
{
"epoch": 0.24277737314882253,
"grad_norm": 2.3191990852355957,
"learning_rate": 9.78359458626588e-05,
"loss": 2.7457,
"step": 1000
},
{
"epoch": 0.25491624180626365,
"grad_norm": 2.0282199382781982,
"learning_rate": 9.754066085597576e-05,
"loss": 2.638,
"step": 1050
},
{
"epoch": 0.2670551104637048,
"grad_norm": 2.1684181690216064,
"learning_rate": 9.722700211540394e-05,
"loss": 2.6815,
"step": 1100
},
{
"epoch": 0.2791939791211459,
"grad_norm": 1.9141716957092285,
"learning_rate": 9.689509086522019e-05,
"loss": 2.5845,
"step": 1150
},
{
"epoch": 0.29133284777858703,
"grad_norm": 1.8547425270080566,
"learning_rate": 9.65450553840154e-05,
"loss": 2.686,
"step": 1200
},
{
"epoch": 0.30347171643602816,
"grad_norm": 2.2245781421661377,
"learning_rate": 9.617703095511691e-05,
"loss": 2.757,
"step": 1250
},
{
"epoch": 0.3156105850934693,
"grad_norm": 1.9658602476119995,
"learning_rate": 9.579115981430349e-05,
"loss": 2.6181,
"step": 1300
},
{
"epoch": 0.3277494537509104,
"grad_norm": 2.5929782390594482,
"learning_rate": 9.538759109483347e-05,
"loss": 2.6221,
"step": 1350
},
{
"epoch": 0.33988832240835154,
"grad_norm": 1.781426191329956,
"learning_rate": 9.496648076980702e-05,
"loss": 2.6583,
"step": 1400
},
{
"epoch": 0.35202719106579267,
"grad_norm": 2.546909809112549,
"learning_rate": 9.452799159188492e-05,
"loss": 2.637,
"step": 1450
},
{
"epoch": 0.3641660597232338,
"grad_norm": 2.062859058380127,
"learning_rate": 9.407229303038719e-05,
"loss": 2.6607,
"step": 1500
},
{
"epoch": 0.3763049283806749,
"grad_norm": 2.205317735671997,
"learning_rate": 9.359956120579578e-05,
"loss": 2.6899,
"step": 1550
},
{
"epoch": 0.38844379703811605,
"grad_norm": 1.6034296751022339,
"learning_rate": 9.310997882168673e-05,
"loss": 2.6986,
"step": 1600
},
{
"epoch": 0.40058266569555717,
"grad_norm": 1.492854356765747,
"learning_rate": 9.260373509411806e-05,
"loss": 2.7071,
"step": 1650
},
{
"epoch": 0.4127215343529983,
"grad_norm": 2.1287174224853516,
"learning_rate": 9.208102567850063e-05,
"loss": 2.6058,
"step": 1700
},
{
"epoch": 0.4248604030104394,
"grad_norm": 2.4047040939331055,
"learning_rate": 9.154205259398038e-05,
"loss": 2.705,
"step": 1750
},
{
"epoch": 0.43699927166788055,
"grad_norm": 1.8397117853164673,
"learning_rate": 9.098702414536107e-05,
"loss": 2.6512,
"step": 1800
},
{
"epoch": 0.4491381403253217,
"grad_norm": 2.055699586868286,
"learning_rate": 9.041615484259753e-05,
"loss": 2.6701,
"step": 1850
},
{
"epoch": 0.4612770089827628,
"grad_norm": 1.9400551319122314,
"learning_rate": 8.982966531789105e-05,
"loss": 2.6792,
"step": 1900
},
{
"epoch": 0.47341587764020393,
"grad_norm": 2.668945074081421,
"learning_rate": 8.922778224041835e-05,
"loss": 2.6004,
"step": 1950
},
{
"epoch": 0.48555474629764506,
"grad_norm": 2.9810187816619873,
"learning_rate": 8.861073822872734e-05,
"loss": 2.5851,
"step": 2000
},
{
"epoch": 0.4976936149550862,
"grad_norm": 1.625508427619934,
"learning_rate": 8.79787717608338e-05,
"loss": 2.5802,
"step": 2050
},
{
"epoch": 0.5098324836125273,
"grad_norm": 2.1407854557037354,
"learning_rate": 8.733212708205321e-05,
"loss": 2.5865,
"step": 2100
},
{
"epoch": 0.5219713522699685,
"grad_norm": 2.2356784343719482,
"learning_rate": 8.667105411060361e-05,
"loss": 2.6538,
"step": 2150
},
{
"epoch": 0.5341102209274096,
"grad_norm": 2.3607735633850098,
"learning_rate": 8.599580834101625e-05,
"loss": 2.5077,
"step": 2200
},
{
"epoch": 0.5462490895848507,
"grad_norm": 2.3377416133880615,
"learning_rate": 8.530665074539073e-05,
"loss": 2.5979,
"step": 2250
},
{
"epoch": 0.5583879582422918,
"grad_norm": 2.3431484699249268,
"learning_rate": 8.460384767253331e-05,
"loss": 2.4996,
"step": 2300
},
{
"epoch": 0.570526826899733,
"grad_norm": 2.106093406677246,
"learning_rate": 8.388767074501731e-05,
"loss": 2.4795,
"step": 2350
},
{
"epoch": 0.5826656955571741,
"grad_norm": 1.8955905437469482,
"learning_rate": 8.3158396754205e-05,
"loss": 2.5837,
"step": 2400
},
{
"epoch": 0.5948045642146153,
"grad_norm": 1.9230371713638306,
"learning_rate": 8.241630755327213e-05,
"loss": 2.5845,
"step": 2450
},
{
"epoch": 0.6069434328720563,
"grad_norm": 1.6631944179534912,
"learning_rate": 8.166168994827599e-05,
"loss": 2.6071,
"step": 2500
},
{
"epoch": 0.6190823015294975,
"grad_norm": 2.2075533866882324,
"learning_rate": 8.089483558730919e-05,
"loss": 2.5412,
"step": 2550
},
{
"epoch": 0.6312211701869386,
"grad_norm": 1.8824903964996338,
"learning_rate": 8.011604084778229e-05,
"loss": 2.5386,
"step": 2600
},
{
"epoch": 0.6433600388443798,
"grad_norm": 2.604081869125366,
"learning_rate": 7.932560672187839e-05,
"loss": 2.6509,
"step": 2650
},
{
"epoch": 0.6554989075018208,
"grad_norm": 2.0620648860931396,
"learning_rate": 7.852383870022439e-05,
"loss": 2.6403,
"step": 2700
},
{
"epoch": 0.667637776159262,
"grad_norm": 2.0239202976226807,
"learning_rate": 7.771104665382341e-05,
"loss": 2.6965,
"step": 2750
},
{
"epoch": 0.6797766448167031,
"grad_norm": 1.7837492227554321,
"learning_rate": 7.688754471429456e-05,
"loss": 2.5448,
"step": 2800
},
{
"epoch": 0.6919155134741443,
"grad_norm": 1.9377483129501343,
"learning_rate": 7.605365115246581e-05,
"loss": 2.6333,
"step": 2850
},
{
"epoch": 0.7040543821315853,
"grad_norm": 2.297499179840088,
"learning_rate": 7.520968825536732e-05,
"loss": 2.4747,
"step": 2900
},
{
"epoch": 0.7161932507890265,
"grad_norm": 1.857254147529602,
"learning_rate": 7.435598220167226e-05,
"loss": 2.6631,
"step": 2950
},
{
"epoch": 0.7283321194464676,
"grad_norm": 2.1972172260284424,
"learning_rate": 7.349286293563402e-05,
"loss": 2.5898,
"step": 3000
},
{
"epoch": 0.7404709881039088,
"grad_norm": 2.267690896987915,
"learning_rate": 7.26206640395677e-05,
"loss": 2.4341,
"step": 3050
},
{
"epoch": 0.7526098567613498,
"grad_norm": 1.6826646327972412,
"learning_rate": 7.17397226049256e-05,
"loss": 2.6269,
"step": 3100
},
{
"epoch": 0.764748725418791,
"grad_norm": 2.3957300186157227,
"learning_rate": 7.085037910201677e-05,
"loss": 2.6107,
"step": 3150
},
{
"epoch": 0.7768875940762321,
"grad_norm": 2.471625566482544,
"learning_rate": 6.99529772484203e-05,
"loss": 2.5767,
"step": 3200
},
{
"epoch": 0.7890264627336733,
"grad_norm": 1.8939329385757446,
"learning_rate": 6.904786387614382e-05,
"loss": 2.5009,
"step": 3250
},
{
"epoch": 0.8011653313911143,
"grad_norm": 2.498994827270508,
"learning_rate": 6.813538879757828e-05,
"loss": 2.5742,
"step": 3300
},
{
"epoch": 0.8133042000485555,
"grad_norm": 2.3812406063079834,
"learning_rate": 6.721590467030083e-05,
"loss": 2.5011,
"step": 3350
},
{
"epoch": 0.8254430687059966,
"grad_norm": 1.9224671125411987,
"learning_rate": 6.62897668607781e-05,
"loss": 2.5455,
"step": 3400
},
{
"epoch": 0.8375819373634378,
"grad_norm": 1.811013102531433,
"learning_rate": 6.535733330702254e-05,
"loss": 2.5791,
"step": 3450
},
{
"epoch": 0.8497208060208788,
"grad_norm": 1.4125910997390747,
"learning_rate": 6.441896438025482e-05,
"loss": 2.477,
"step": 3500
},
{
"epoch": 0.86185967467832,
"grad_norm": 1.7109546661376953,
"learning_rate": 6.3475022745626e-05,
"loss": 2.4967,
"step": 3550
},
{
"epoch": 0.8739985433357611,
"grad_norm": 1.8944520950317383,
"learning_rate": 6.252587322205299e-05,
"loss": 2.6007,
"step": 3600
},
{
"epoch": 0.8861374119932023,
"grad_norm": 2.4895029067993164,
"learning_rate": 6.157188264122153e-05,
"loss": 2.5122,
"step": 3650
},
{
"epoch": 0.8982762806506434,
"grad_norm": 2.2736401557922363,
"learning_rate": 6.061341970581165e-05,
"loss": 2.5942,
"step": 3700
},
{
"epoch": 0.9104151493080845,
"grad_norm": 2.258389711380005,
"learning_rate": 5.9650854846999495e-05,
"loss": 2.4973,
"step": 3750
},
{
"epoch": 0.9225540179655256,
"grad_norm": 2.1070783138275146,
"learning_rate": 5.868456008129154e-05,
"loss": 2.5858,
"step": 3800
},
{
"epoch": 0.9346928866229668,
"grad_norm": 1.8113417625427246,
"learning_rate": 5.7714908866745864e-05,
"loss": 2.5253,
"step": 3850
},
{
"epoch": 0.9468317552804079,
"grad_norm": 1.8022534847259521,
"learning_rate": 5.674227595863638e-05,
"loss": 2.5297,
"step": 3900
},
{
"epoch": 0.958970623937849,
"grad_norm": 2.208134174346924,
"learning_rate": 5.5767037264615686e-05,
"loss": 2.5352,
"step": 3950
},
{
"epoch": 0.9711094925952901,
"grad_norm": 1.7783771753311157,
"learning_rate": 5.478956969943252e-05,
"loss": 2.622,
"step": 4000
},
{
"epoch": 0.9832483612527313,
"grad_norm": 1.889061689376831,
"learning_rate": 5.3810251039260026e-05,
"loss": 2.5766,
"step": 4050
},
{
"epoch": 0.9953872299101724,
"grad_norm": 1.7664889097213745,
"learning_rate": 5.2829459775691124e-05,
"loss": 2.5343,
"step": 4100
},
{
"epoch": 1.0075260985676135,
"grad_norm": 2.389195442199707,
"learning_rate": 5.184757496945726e-05,
"loss": 2.4996,
"step": 4150
},
{
"epoch": 1.0196649672250546,
"grad_norm": 2.4707448482513428,
"learning_rate": 5.086497610392723e-05,
"loss": 2.3471,
"step": 4200
},
{
"epoch": 1.0318038358824957,
"grad_norm": 2.3839166164398193,
"learning_rate": 4.988204293844289e-05,
"loss": 2.3737,
"step": 4250
},
{
"epoch": 1.043942704539937,
"grad_norm": 2.6970324516296387,
"learning_rate": 4.889915536154776e-05,
"loss": 2.3854,
"step": 4300
},
{
"epoch": 1.056081573197378,
"grad_norm": 1.623435616493225,
"learning_rate": 4.7916693244166126e-05,
"loss": 2.3536,
"step": 4350
},
{
"epoch": 1.0682204418548191,
"grad_norm": 2.695117473602295,
"learning_rate": 4.693503629278875e-05,
"loss": 2.3699,
"step": 4400
},
{
"epoch": 1.0803593105122602,
"grad_norm": 2.7556312084198,
"learning_rate": 4.595456390272207e-05,
"loss": 2.3021,
"step": 4450
},
{
"epoch": 1.0924981791697013,
"grad_norm": 2.1368134021759033,
"learning_rate": 4.4975655011457815e-05,
"loss": 2.3003,
"step": 4500
},
{
"epoch": 1.1046370478271426,
"grad_norm": 1.6469930410385132,
"learning_rate": 4.399868795221951e-05,
"loss": 2.3007,
"step": 4550
},
{
"epoch": 1.1167759164845836,
"grad_norm": 1.8031399250030518,
"learning_rate": 4.302404030774248e-05,
"loss": 2.4757,
"step": 4600
},
{
"epoch": 1.1289147851420247,
"grad_norm": 2.02652907371521,
"learning_rate": 4.205208876434389e-05,
"loss": 2.2888,
"step": 4650
},
{
"epoch": 1.141053653799466,
"grad_norm": 1.9721205234527588,
"learning_rate": 4.108320896633937e-05,
"loss": 2.3307,
"step": 4700
},
{
"epoch": 1.153192522456907,
"grad_norm": 2.1819326877593994,
"learning_rate": 4.011777537086219e-05,
"loss": 2.3219,
"step": 4750
},
{
"epoch": 1.1653313911143481,
"grad_norm": 2.973172187805176,
"learning_rate": 3.915616110314142e-05,
"loss": 2.252,
"step": 4800
},
{
"epoch": 1.1774702597717892,
"grad_norm": 2.2087929248809814,
"learning_rate": 3.8198737812294675e-05,
"loss": 2.3202,
"step": 4850
},
{
"epoch": 1.1896091284292303,
"grad_norm": 2.286069869995117,
"learning_rate": 3.724587552769152e-05,
"loss": 2.3541,
"step": 4900
},
{
"epoch": 1.2017479970866716,
"grad_norm": 2.08137583732605,
"learning_rate": 3.6297942515942776e-05,
"loss": 2.3576,
"step": 4950
},
{
"epoch": 1.2138868657441126,
"grad_norm": 2.029747724533081,
"learning_rate": 3.535530513857115e-05,
"loss": 2.3344,
"step": 5000
},
{
"epoch": 1.2260257344015537,
"grad_norm": 2.449650764465332,
"learning_rate": 3.441832771041818e-05,
"loss": 2.3351,
"step": 5050
},
{
"epoch": 1.238164603058995,
"grad_norm": 2.0461597442626953,
"learning_rate": 3.34873723588421e-05,
"loss": 2.2197,
"step": 5100
},
{
"epoch": 1.250303471716436,
"grad_norm": 1.7304949760437012,
"learning_rate": 3.25627988837612e-05,
"loss": 2.3097,
"step": 5150
},
{
"epoch": 1.2624423403738771,
"grad_norm": 2.58225417137146,
"learning_rate": 3.164496461859673e-05,
"loss": 2.4066,
"step": 5200
},
{
"epoch": 1.2745812090313182,
"grad_norm": 1.7446330785751343,
"learning_rate": 3.0734224292169e-05,
"loss": 2.3252,
"step": 5250
},
{
"epoch": 1.2867200776887593,
"grad_norm": 1.8611998558044434,
"learning_rate": 2.9830929891600177e-05,
"loss": 2.2757,
"step": 5300
},
{
"epoch": 1.2988589463462006,
"grad_norm": 1.8992869853973389,
"learning_rate": 2.8935430526276586e-05,
"loss": 2.3245,
"step": 5350
},
{
"epoch": 1.3109978150036417,
"grad_norm": 2.460495710372925,
"learning_rate": 2.8048072292923465e-05,
"loss": 2.2645,
"step": 5400
},
{
"epoch": 1.3231366836610827,
"grad_norm": 2.6929290294647217,
"learning_rate": 2.7169198141843767e-05,
"loss": 2.2588,
"step": 5450
},
{
"epoch": 1.335275552318524,
"grad_norm": 3.0288407802581787,
"learning_rate": 2.6299147744373193e-05,
"loss": 2.2605,
"step": 5500
},
{
"epoch": 1.347414420975965,
"grad_norm": 1.7983629703521729,
"learning_rate": 2.5438257361602474e-05,
"loss": 2.2654,
"step": 5550
},
{
"epoch": 1.3595532896334062,
"grad_norm": 2.5929248332977295,
"learning_rate": 2.4586859714417594e-05,
"loss": 2.2965,
"step": 5600
},
{
"epoch": 1.3716921582908472,
"grad_norm": 1.558080792427063,
"learning_rate": 2.3745283854908305e-05,
"loss": 2.3072,
"step": 5650
},
{
"epoch": 1.3838310269482883,
"grad_norm": 1.997135877609253,
"learning_rate": 2.2913855039194553e-05,
"loss": 2.3047,
"step": 5700
},
{
"epoch": 1.3959698956057296,
"grad_norm": 1.9210643768310547,
"learning_rate": 2.2092894601720005e-05,
"loss": 2.2756,
"step": 5750
},
{
"epoch": 1.4081087642631707,
"grad_norm": 2.384209156036377,
"learning_rate": 2.128271983106121e-05,
"loss": 2.2948,
"step": 5800
},
{
"epoch": 1.4202476329206117,
"grad_norm": 2.263803482055664,
"learning_rate": 2.0483643847300453e-05,
"loss": 2.3062,
"step": 5850
},
{
"epoch": 1.432386501578053,
"grad_norm": 2.315314769744873,
"learning_rate": 1.9695975481009683e-05,
"loss": 2.3215,
"step": 5900
},
{
"epoch": 1.444525370235494,
"grad_norm": 2.041764497756958,
"learning_rate": 1.89200191538922e-05,
"loss": 2.3256,
"step": 5950
},
{
"epoch": 1.4566642388929352,
"grad_norm": 2.1705563068389893,
"learning_rate": 1.8156074761128454e-05,
"loss": 2.2912,
"step": 6000
},
{
"epoch": 1.4688031075503762,
"grad_norm": 2.304280996322632,
"learning_rate": 1.7404437555471003e-05,
"loss": 2.309,
"step": 6050
},
{
"epoch": 1.4809419762078173,
"grad_norm": 2.4376580715179443,
"learning_rate": 1.6665398033134034e-05,
"loss": 2.345,
"step": 6100
},
{
"epoch": 1.4930808448652586,
"grad_norm": 2.959686279296875,
"learning_rate": 1.5939241821520952e-05,
"loss": 2.2565,
"step": 6150
},
{
"epoch": 1.5052197135226997,
"grad_norm": 1.8753809928894043,
"learning_rate": 1.5226249568833794e-05,
"loss": 2.3363,
"step": 6200
},
{
"epoch": 1.5173585821801407,
"grad_norm": 1.8722175359725952,
"learning_rate": 1.452669683560709e-05,
"loss": 2.3196,
"step": 6250
},
{
"epoch": 1.529497450837582,
"grad_norm": 2.468750238418579,
"learning_rate": 1.3840853988207847e-05,
"loss": 2.3277,
"step": 6300
},
{
"epoch": 1.541636319495023,
"grad_norm": 1.8061391115188599,
"learning_rate": 1.316898609434319e-05,
"loss": 2.2795,
"step": 6350
},
{
"epoch": 1.5537751881524642,
"grad_norm": 1.9603863954544067,
"learning_rate": 1.2511352820615691e-05,
"loss": 2.326,
"step": 6400
},
{
"epoch": 1.5659140568099055,
"grad_norm": 2.2773890495300293,
"learning_rate": 1.1868208332166336e-05,
"loss": 2.2427,
"step": 6450
},
{
"epoch": 1.5780529254673463,
"grad_norm": 1.9823254346847534,
"learning_rate": 1.1239801194443506e-05,
"loss": 2.2775,
"step": 6500
},
{
"epoch": 1.5901917941247876,
"grad_norm": 2.00081205368042,
"learning_rate": 1.0626374277136342e-05,
"loss": 2.3023,
"step": 6550
},
{
"epoch": 1.6023306627822287,
"grad_norm": 2.134455919265747,
"learning_rate": 1.0028164660309259e-05,
"loss": 2.4271,
"step": 6600
},
{
"epoch": 1.6144695314396698,
"grad_norm": 2.493212938308716,
"learning_rate": 9.445403542774206e-06,
"loss": 2.2615,
"step": 6650
},
{
"epoch": 1.626608400097111,
"grad_norm": 2.063344955444336,
"learning_rate": 8.878316152735888e-06,
"loss": 2.2552,
"step": 6700
},
{
"epoch": 1.6387472687545521,
"grad_norm": 2.2275609970092773,
"learning_rate": 8.327121660744452e-06,
"loss": 2.3427,
"step": 6750
},
{
"epoch": 1.6508861374119932,
"grad_norm": 2.143228769302368,
"learning_rate": 7.792033094989593e-06,
"loss": 2.2294,
"step": 6800
},
{
"epoch": 1.6630250060694345,
"grad_norm": 1.6725349426269531,
"learning_rate": 7.273257258968275e-06,
"loss": 2.3335,
"step": 6850
},
{
"epoch": 1.6751638747268753,
"grad_norm": 1.7002774477005005,
"learning_rate": 6.77099465155846e-06,
"loss": 2.3019,
"step": 6900
},
{
"epoch": 1.6873027433843166,
"grad_norm": 2.0058093070983887,
"learning_rate": 6.285439389529346e-06,
"loss": 2.2801,
"step": 6950
},
{
"epoch": 1.6994416120417577,
"grad_norm": 2.444603681564331,
"learning_rate": 5.816779132518224e-06,
"loss": 2.2837,
"step": 7000
},
{
"epoch": 1.7115804806991988,
"grad_norm": 2.3724894523620605,
"learning_rate": 5.365195010502916e-06,
"loss": 2.3238,
"step": 7050
},
{
"epoch": 1.72371934935664,
"grad_norm": 2.394784450531006,
"learning_rate": 4.930861553797822e-06,
"loss": 2.2119,
"step": 7100
},
{
"epoch": 1.7358582180140811,
"grad_norm": 1.8876112699508667,
"learning_rate": 4.5139466256006625e-06,
"loss": 2.3293,
"step": 7150
},
{
"epoch": 1.7479970866715222,
"grad_norm": 2.4736382961273193,
"learning_rate": 4.1146113571158995e-06,
"loss": 2.2619,
"step": 7200
},
{
"epoch": 1.7601359553289635,
"grad_norm": 2.3860538005828857,
"learning_rate": 3.733010085280031e-06,
"loss": 2.2628,
"step": 7250
},
{
"epoch": 1.7722748239864043,
"grad_norm": 2.2846248149871826,
"learning_rate": 3.3692902931127256e-06,
"loss": 2.2636,
"step": 7300
},
{
"epoch": 1.7844136926438456,
"grad_norm": 1.9925642013549805,
"learning_rate": 3.0235925527169196e-06,
"loss": 2.2772,
"step": 7350
},
{
"epoch": 1.7965525613012867,
"grad_norm": 2.708155870437622,
"learning_rate": 2.696050470949857e-06,
"loss": 2.2776,
"step": 7400
},
{
"epoch": 1.8086914299587278,
"grad_norm": 1.6095919609069824,
"learning_rate": 2.386790637786085e-06,
"loss": 2.3365,
"step": 7450
},
{
"epoch": 1.820830298616169,
"grad_norm": 1.8871222734451294,
"learning_rate": 2.0959325773923732e-06,
"loss": 2.3408,
"step": 7500
},
{
"epoch": 1.8329691672736101,
"grad_norm": 2.4641993045806885,
"learning_rate": 1.8235887019334985e-06,
"loss": 2.2675,
"step": 7550
},
{
"epoch": 1.8451080359310512,
"grad_norm": 2.003045082092285,
"learning_rate": 1.569864268126614e-06,
"loss": 2.3028,
"step": 7600
},
{
"epoch": 1.8572469045884925,
"grad_norm": 2.2934603691101074,
"learning_rate": 1.3348573365612184e-06,
"loss": 2.3406,
"step": 7650
},
{
"epoch": 1.8693857732459334,
"grad_norm": 1.78590989112854,
"learning_rate": 1.118658733800193e-06,
"loss": 2.2264,
"step": 7700
},
{
"epoch": 1.8815246419033747,
"grad_norm": 1.7274677753448486,
"learning_rate": 9.213520172767332e-07,
"loss": 2.3045,
"step": 7750
},
{
"epoch": 1.8936635105608157,
"grad_norm": 1.8418123722076416,
"learning_rate": 7.43013443000734e-07,
"loss": 2.2462,
"step": 7800
},
{
"epoch": 1.9058023792182568,
"grad_norm": 2.445272445678711,
"learning_rate": 5.837119360869503e-07,
"loss": 2.4228,
"step": 7850
},
{
"epoch": 1.917941247875698,
"grad_norm": 2.3169541358947754,
"learning_rate": 4.435090641165651e-07,
"loss": 2.3271,
"step": 7900
},
{
"epoch": 1.9300801165331392,
"grad_norm": 1.9685901403427124,
"learning_rate": 3.2245901334221895e-07,
"loss": 2.2368,
"step": 7950
},
{
"epoch": 1.9422189851905802,
"grad_norm": 2.039243221282959,
"learning_rate": 2.2060856774587803e-07,
"loss": 2.3857,
"step": 8000
},
{
"epoch": 1.9543578538480215,
"grad_norm": 2.139963150024414,
"learning_rate": 1.3799709095754232e-07,
"loss": 2.3981,
"step": 8050
},
{
"epoch": 1.9664967225054624,
"grad_norm": 2.1113266944885254,
"learning_rate": 7.46565110417985e-08,
"loss": 2.306,
"step": 8100
},
{
"epoch": 1.9786355911629037,
"grad_norm": 2.550076723098755,
"learning_rate": 3.06113081581405e-08,
"loss": 2.2583,
"step": 8150
},
{
"epoch": 1.9907744598203447,
"grad_norm": 1.6660057306289673,
"learning_rate": 5.878505099732312e-09,
"loss": 2.3201,
"step": 8200
}
],
"logging_steps": 50,
"max_steps": 8238,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 4000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.2322467312492544e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}