test_model_data3 / trainer_state.json
liyang619's picture
Upload folder using huggingface_hub
461e618 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.997824139255088,
"eval_steps": 500,
"global_step": 9765,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02559836170485089,
"grad_norm": 0.7305641763872902,
"learning_rate": 1.9897593445980545e-05,
"loss": 0.24,
"step": 50
},
{
"epoch": 0.05119672340970178,
"grad_norm": 0.05103642085416945,
"learning_rate": 1.9795186891961087e-05,
"loss": 0.0049,
"step": 100
},
{
"epoch": 0.07679508511455267,
"grad_norm": 0.030274497499991383,
"learning_rate": 1.969278033794163e-05,
"loss": 0.0017,
"step": 150
},
{
"epoch": 0.10239344681940356,
"grad_norm": 0.029756392410786685,
"learning_rate": 1.9590373783922173e-05,
"loss": 0.0011,
"step": 200
},
{
"epoch": 0.12799180852425446,
"grad_norm": 0.02549118945457913,
"learning_rate": 1.9487967229902716e-05,
"loss": 0.0008,
"step": 250
},
{
"epoch": 0.15359017022910534,
"grad_norm": 0.02438470537104193,
"learning_rate": 1.9385560675883256e-05,
"loss": 0.0008,
"step": 300
},
{
"epoch": 0.17918853193395623,
"grad_norm": 0.016258758537083494,
"learning_rate": 1.9283154121863802e-05,
"loss": 0.0007,
"step": 350
},
{
"epoch": 0.20478689363880712,
"grad_norm": 0.014976187170917211,
"learning_rate": 1.9180747567844345e-05,
"loss": 0.0005,
"step": 400
},
{
"epoch": 0.230385255343658,
"grad_norm": 0.014196620668655414,
"learning_rate": 1.9078341013824884e-05,
"loss": 0.0005,
"step": 450
},
{
"epoch": 0.2559836170485089,
"grad_norm": 0.01865887251990293,
"learning_rate": 1.897593445980543e-05,
"loss": 0.0004,
"step": 500
},
{
"epoch": 0.2815819787533598,
"grad_norm": 0.016349380331586796,
"learning_rate": 1.887352790578597e-05,
"loss": 0.0004,
"step": 550
},
{
"epoch": 0.3071803404582107,
"grad_norm": 0.01413169547355441,
"learning_rate": 1.8771121351766516e-05,
"loss": 0.0003,
"step": 600
},
{
"epoch": 0.33277870216306155,
"grad_norm": 0.010152573483527069,
"learning_rate": 1.866871479774706e-05,
"loss": 0.0003,
"step": 650
},
{
"epoch": 0.35837706386791246,
"grad_norm": 0.01017225834937972,
"learning_rate": 1.85663082437276e-05,
"loss": 0.0003,
"step": 700
},
{
"epoch": 0.3839754255727633,
"grad_norm": 0.013000431764113899,
"learning_rate": 1.8463901689708145e-05,
"loss": 0.0003,
"step": 750
},
{
"epoch": 0.40957378727761423,
"grad_norm": 0.013254090310897974,
"learning_rate": 1.8361495135688684e-05,
"loss": 0.0003,
"step": 800
},
{
"epoch": 0.43517214898246515,
"grad_norm": 0.00966168025347855,
"learning_rate": 1.8259088581669227e-05,
"loss": 0.0002,
"step": 850
},
{
"epoch": 0.460770510687316,
"grad_norm": 0.00712828374097157,
"learning_rate": 1.815668202764977e-05,
"loss": 0.0002,
"step": 900
},
{
"epoch": 0.4863688723921669,
"grad_norm": 0.010211960398621855,
"learning_rate": 1.8054275473630313e-05,
"loss": 0.0002,
"step": 950
},
{
"epoch": 0.5119672340970178,
"grad_norm": 0.01261082529443916,
"learning_rate": 1.7951868919610856e-05,
"loss": 0.0002,
"step": 1000
},
{
"epoch": 0.5375655958018687,
"grad_norm": 0.011854166152885242,
"learning_rate": 1.78494623655914e-05,
"loss": 0.0002,
"step": 1050
},
{
"epoch": 0.5631639575067195,
"grad_norm": 0.009916438252277934,
"learning_rate": 1.7747055811571942e-05,
"loss": 0.0003,
"step": 1100
},
{
"epoch": 0.5887623192115704,
"grad_norm": 0.005025129187771019,
"learning_rate": 1.7644649257552485e-05,
"loss": 0.0002,
"step": 1150
},
{
"epoch": 0.6143606809164214,
"grad_norm": 0.006788101186805052,
"learning_rate": 1.7542242703533028e-05,
"loss": 0.0002,
"step": 1200
},
{
"epoch": 0.6399590426212722,
"grad_norm": 0.00978494920049853,
"learning_rate": 1.743983614951357e-05,
"loss": 0.0002,
"step": 1250
},
{
"epoch": 0.6655574043261231,
"grad_norm": 0.009494329535464946,
"learning_rate": 1.7337429595494113e-05,
"loss": 0.0002,
"step": 1300
},
{
"epoch": 0.6911557660309741,
"grad_norm": 0.010405998997878094,
"learning_rate": 1.7235023041474656e-05,
"loss": 0.0002,
"step": 1350
},
{
"epoch": 0.7167541277358249,
"grad_norm": 0.01406008137282546,
"learning_rate": 1.71326164874552e-05,
"loss": 0.0001,
"step": 1400
},
{
"epoch": 0.7423524894406758,
"grad_norm": 0.012511809648905668,
"learning_rate": 1.7030209933435742e-05,
"loss": 0.0002,
"step": 1450
},
{
"epoch": 0.7679508511455266,
"grad_norm": 0.014402924650339832,
"learning_rate": 1.6927803379416285e-05,
"loss": 0.0002,
"step": 1500
},
{
"epoch": 0.7935492128503776,
"grad_norm": 0.007985015692090758,
"learning_rate": 1.6825396825396828e-05,
"loss": 0.0002,
"step": 1550
},
{
"epoch": 0.8191475745552285,
"grad_norm": 0.016922696684847503,
"learning_rate": 1.6722990271377367e-05,
"loss": 0.0002,
"step": 1600
},
{
"epoch": 0.8447459362600793,
"grad_norm": 0.0058610303905484145,
"learning_rate": 1.6620583717357914e-05,
"loss": 0.0002,
"step": 1650
},
{
"epoch": 0.8703442979649303,
"grad_norm": 0.005758710688055935,
"learning_rate": 1.6518177163338457e-05,
"loss": 0.0001,
"step": 1700
},
{
"epoch": 0.8959426596697811,
"grad_norm": 0.010005518642531934,
"learning_rate": 1.6415770609318996e-05,
"loss": 0.0001,
"step": 1750
},
{
"epoch": 0.921541021374632,
"grad_norm": 0.006740034442277339,
"learning_rate": 1.6313364055299542e-05,
"loss": 0.0001,
"step": 1800
},
{
"epoch": 0.9471393830794829,
"grad_norm": 0.0062403985576606705,
"learning_rate": 1.6210957501280082e-05,
"loss": 0.0001,
"step": 1850
},
{
"epoch": 0.9727377447843338,
"grad_norm": 0.006225396199669411,
"learning_rate": 1.6108550947260625e-05,
"loss": 0.0001,
"step": 1900
},
{
"epoch": 0.9983361064891847,
"grad_norm": 0.006345423633281202,
"learning_rate": 1.600614439324117e-05,
"loss": 0.0001,
"step": 1950
},
{
"epoch": 1.0235504927684629,
"grad_norm": 0.00782413794335222,
"learning_rate": 1.590373783922171e-05,
"loss": 0.0001,
"step": 2000
},
{
"epoch": 1.0491488544733136,
"grad_norm": 0.004640264761208562,
"learning_rate": 1.5801331285202253e-05,
"loss": 0.0001,
"step": 2050
},
{
"epoch": 1.0747472161781646,
"grad_norm": 0.004837968806765287,
"learning_rate": 1.5698924731182796e-05,
"loss": 0.0001,
"step": 2100
},
{
"epoch": 1.1003455778830156,
"grad_norm": 0.003966873491178343,
"learning_rate": 1.559651817716334e-05,
"loss": 0.0001,
"step": 2150
},
{
"epoch": 1.1259439395878663,
"grad_norm": 0.007280756408676898,
"learning_rate": 1.5494111623143882e-05,
"loss": 0.0001,
"step": 2200
},
{
"epoch": 1.1515423012927173,
"grad_norm": 0.007676063330830094,
"learning_rate": 1.5391705069124425e-05,
"loss": 0.0001,
"step": 2250
},
{
"epoch": 1.177140662997568,
"grad_norm": 0.00759666513908814,
"learning_rate": 1.5289298515104968e-05,
"loss": 0.0001,
"step": 2300
},
{
"epoch": 1.202739024702419,
"grad_norm": 0.006094073180808279,
"learning_rate": 1.518689196108551e-05,
"loss": 0.0001,
"step": 2350
},
{
"epoch": 1.22833738640727,
"grad_norm": 0.009992171934592443,
"learning_rate": 1.5084485407066054e-05,
"loss": 0.0001,
"step": 2400
},
{
"epoch": 1.253935748112121,
"grad_norm": 0.004425939861516632,
"learning_rate": 1.4982078853046595e-05,
"loss": 0.0001,
"step": 2450
},
{
"epoch": 1.2795341098169717,
"grad_norm": 0.00618221779159838,
"learning_rate": 1.487967229902714e-05,
"loss": 0.0001,
"step": 2500
},
{
"epoch": 1.3051324715218227,
"grad_norm": 0.0037721408282199286,
"learning_rate": 1.477726574500768e-05,
"loss": 0.0001,
"step": 2550
},
{
"epoch": 1.3307308332266734,
"grad_norm": 0.006148728469232912,
"learning_rate": 1.4674859190988225e-05,
"loss": 0.0001,
"step": 2600
},
{
"epoch": 1.3563291949315244,
"grad_norm": 0.00661518038661282,
"learning_rate": 1.4572452636968768e-05,
"loss": 0.0001,
"step": 2650
},
{
"epoch": 1.3819275566363753,
"grad_norm": 0.0036588312853667437,
"learning_rate": 1.447004608294931e-05,
"loss": 0.0001,
"step": 2700
},
{
"epoch": 1.407525918341226,
"grad_norm": 0.005484459005497015,
"learning_rate": 1.4367639528929854e-05,
"loss": 0.0001,
"step": 2750
},
{
"epoch": 1.433124280046077,
"grad_norm": 0.010402616539983395,
"learning_rate": 1.4265232974910395e-05,
"loss": 0.0001,
"step": 2800
},
{
"epoch": 1.4587226417509278,
"grad_norm": 0.007335666061283071,
"learning_rate": 1.4162826420890938e-05,
"loss": 0.0001,
"step": 2850
},
{
"epoch": 1.4843210034557788,
"grad_norm": 0.006550005824502188,
"learning_rate": 1.4060419866871483e-05,
"loss": 0.0001,
"step": 2900
},
{
"epoch": 1.5099193651606297,
"grad_norm": 0.0027811575400146435,
"learning_rate": 1.3958013312852024e-05,
"loss": 0.0001,
"step": 2950
},
{
"epoch": 1.5355177268654807,
"grad_norm": 0.006308963330505965,
"learning_rate": 1.3855606758832567e-05,
"loss": 0.0001,
"step": 3000
},
{
"epoch": 1.5611160885703315,
"grad_norm": 0.006401332782035864,
"learning_rate": 1.3753200204813108e-05,
"loss": 0.0001,
"step": 3050
},
{
"epoch": 1.5867144502751824,
"grad_norm": 0.004718742517696451,
"learning_rate": 1.3650793650793652e-05,
"loss": 0.0001,
"step": 3100
},
{
"epoch": 1.6123128119800332,
"grad_norm": 0.003877950477268835,
"learning_rate": 1.3548387096774194e-05,
"loss": 0.0001,
"step": 3150
},
{
"epoch": 1.6379111736848841,
"grad_norm": 0.0063083392896106745,
"learning_rate": 1.3445980542754738e-05,
"loss": 0.0001,
"step": 3200
},
{
"epoch": 1.6635095353897351,
"grad_norm": 0.006412039922690925,
"learning_rate": 1.3343573988735281e-05,
"loss": 0.0001,
"step": 3250
},
{
"epoch": 1.689107897094586,
"grad_norm": 0.0029627793040849877,
"learning_rate": 1.3241167434715822e-05,
"loss": 0.0001,
"step": 3300
},
{
"epoch": 1.7147062587994368,
"grad_norm": 0.002164481803452725,
"learning_rate": 1.3138760880696367e-05,
"loss": 0.0001,
"step": 3350
},
{
"epoch": 1.7403046205042876,
"grad_norm": 0.004111311446657877,
"learning_rate": 1.3036354326676908e-05,
"loss": 0.0001,
"step": 3400
},
{
"epoch": 1.7659029822091385,
"grad_norm": 0.0024071410600000186,
"learning_rate": 1.2933947772657451e-05,
"loss": 0.0001,
"step": 3450
},
{
"epoch": 1.7915013439139895,
"grad_norm": 0.00428027777175206,
"learning_rate": 1.2831541218637992e-05,
"loss": 0.0001,
"step": 3500
},
{
"epoch": 1.8170997056188405,
"grad_norm": 0.0035937450146907115,
"learning_rate": 1.2729134664618537e-05,
"loss": 0.0001,
"step": 3550
},
{
"epoch": 1.8426980673236912,
"grad_norm": 0.007360372295628917,
"learning_rate": 1.262672811059908e-05,
"loss": 0.0001,
"step": 3600
},
{
"epoch": 1.8682964290285422,
"grad_norm": 0.004225210869508024,
"learning_rate": 1.2524321556579622e-05,
"loss": 0.0001,
"step": 3650
},
{
"epoch": 1.893894790733393,
"grad_norm": 0.00344941681643163,
"learning_rate": 1.2421915002560165e-05,
"loss": 0.0001,
"step": 3700
},
{
"epoch": 1.919493152438244,
"grad_norm": 0.0036839082828609084,
"learning_rate": 1.2319508448540707e-05,
"loss": 0.0001,
"step": 3750
},
{
"epoch": 1.945091514143095,
"grad_norm": 0.009934710271474315,
"learning_rate": 1.2217101894521251e-05,
"loss": 0.0001,
"step": 3800
},
{
"epoch": 1.9706898758479459,
"grad_norm": 0.0024663729558732648,
"learning_rate": 1.2114695340501794e-05,
"loss": 0.0001,
"step": 3850
},
{
"epoch": 1.9962882375527966,
"grad_norm": 0.003817898440024657,
"learning_rate": 1.2012288786482335e-05,
"loss": 0.0001,
"step": 3900
},
{
"epoch": 2.021502623832075,
"grad_norm": 0.0031639489328900696,
"learning_rate": 1.190988223246288e-05,
"loss": 0.0001,
"step": 3950
},
{
"epoch": 2.0471009855369258,
"grad_norm": 0.002020596329737904,
"learning_rate": 1.1807475678443421e-05,
"loss": 0.0001,
"step": 4000
},
{
"epoch": 2.0726993472417767,
"grad_norm": 0.0041297671592259375,
"learning_rate": 1.1705069124423964e-05,
"loss": 0.0001,
"step": 4050
},
{
"epoch": 2.0982977089466273,
"grad_norm": 0.0030187753698489852,
"learning_rate": 1.1602662570404507e-05,
"loss": 0.0001,
"step": 4100
},
{
"epoch": 2.1238960706514782,
"grad_norm": 0.006719688660763743,
"learning_rate": 1.150025601638505e-05,
"loss": 0.0001,
"step": 4150
},
{
"epoch": 2.149494432356329,
"grad_norm": 0.007455082822481147,
"learning_rate": 1.1397849462365593e-05,
"loss": 0.0001,
"step": 4200
},
{
"epoch": 2.17509279406118,
"grad_norm": 0.0020929393058777236,
"learning_rate": 1.1295442908346135e-05,
"loss": 0.0001,
"step": 4250
},
{
"epoch": 2.200691155766031,
"grad_norm": 0.004647943941373522,
"learning_rate": 1.1193036354326678e-05,
"loss": 0.0001,
"step": 4300
},
{
"epoch": 2.2262895174708817,
"grad_norm": 0.002919096778092067,
"learning_rate": 1.109062980030722e-05,
"loss": 0.0001,
"step": 4350
},
{
"epoch": 2.2518878791757326,
"grad_norm": 0.0022980302252630044,
"learning_rate": 1.0988223246287764e-05,
"loss": 0.0001,
"step": 4400
},
{
"epoch": 2.2774862408805836,
"grad_norm": 0.0007777129612344223,
"learning_rate": 1.0885816692268305e-05,
"loss": 0.0001,
"step": 4450
},
{
"epoch": 2.3030846025854346,
"grad_norm": 0.001856334209823885,
"learning_rate": 1.0783410138248848e-05,
"loss": 0.0001,
"step": 4500
},
{
"epoch": 2.3286829642902855,
"grad_norm": 0.009562277401713636,
"learning_rate": 1.0681003584229393e-05,
"loss": 0.0115,
"step": 4550
},
{
"epoch": 2.354281325995136,
"grad_norm": 0.008162550932912013,
"learning_rate": 1.0578597030209934e-05,
"loss": 0.0001,
"step": 4600
},
{
"epoch": 2.379879687699987,
"grad_norm": 0.006086517667163692,
"learning_rate": 1.0476190476190477e-05,
"loss": 0.0001,
"step": 4650
},
{
"epoch": 2.405478049404838,
"grad_norm": 0.0027507057924501116,
"learning_rate": 1.037378392217102e-05,
"loss": 0.0001,
"step": 4700
},
{
"epoch": 2.431076411109689,
"grad_norm": 0.005652923712553444,
"learning_rate": 1.0271377368151563e-05,
"loss": 0.0001,
"step": 4750
},
{
"epoch": 2.45667477281454,
"grad_norm": 0.0022548738506609723,
"learning_rate": 1.0168970814132104e-05,
"loss": 0.0001,
"step": 4800
},
{
"epoch": 2.482273134519391,
"grad_norm": 0.0034968078517645545,
"learning_rate": 1.0066564260112648e-05,
"loss": 0.0001,
"step": 4850
},
{
"epoch": 2.507871496224242,
"grad_norm": 0.0029725026316189704,
"learning_rate": 9.96415770609319e-06,
"loss": 0.0001,
"step": 4900
},
{
"epoch": 2.5334698579290924,
"grad_norm": 0.002417471051371214,
"learning_rate": 9.861751152073733e-06,
"loss": 0.0,
"step": 4950
},
{
"epoch": 2.5590682196339434,
"grad_norm": 0.0037793013717612994,
"learning_rate": 9.759344598054277e-06,
"loss": 0.0,
"step": 5000
},
{
"epoch": 2.5846665813387943,
"grad_norm": 0.0020282875993942345,
"learning_rate": 9.65693804403482e-06,
"loss": 0.0,
"step": 5050
},
{
"epoch": 2.6102649430436453,
"grad_norm": 0.006653751475623009,
"learning_rate": 9.554531490015361e-06,
"loss": 0.0,
"step": 5100
},
{
"epoch": 2.6358633047484963,
"grad_norm": 0.0006177303859045243,
"learning_rate": 9.452124935995904e-06,
"loss": 0.0,
"step": 5150
},
{
"epoch": 2.661461666453347,
"grad_norm": 0.001574016672401847,
"learning_rate": 9.349718381976447e-06,
"loss": 0.0,
"step": 5200
},
{
"epoch": 2.6870600281581978,
"grad_norm": 0.002722823477162341,
"learning_rate": 9.24731182795699e-06,
"loss": 0.0,
"step": 5250
},
{
"epoch": 2.7126583898630487,
"grad_norm": 0.0031352467257030996,
"learning_rate": 9.144905273937533e-06,
"loss": 0.0,
"step": 5300
},
{
"epoch": 2.7382567515678997,
"grad_norm": 0.002855241030593728,
"learning_rate": 9.042498719918076e-06,
"loss": 0.0,
"step": 5350
},
{
"epoch": 2.7638551132727507,
"grad_norm": 0.001616961495391244,
"learning_rate": 8.940092165898619e-06,
"loss": 0.0,
"step": 5400
},
{
"epoch": 2.789453474977601,
"grad_norm": 0.005071888691549564,
"learning_rate": 8.837685611879161e-06,
"loss": 0.0001,
"step": 5450
},
{
"epoch": 2.815051836682452,
"grad_norm": 0.0029692529220848914,
"learning_rate": 8.735279057859704e-06,
"loss": 0.0,
"step": 5500
},
{
"epoch": 2.840650198387303,
"grad_norm": 0.0004172545224757254,
"learning_rate": 8.632872503840246e-06,
"loss": 0.0,
"step": 5550
},
{
"epoch": 2.866248560092154,
"grad_norm": 0.008642812223624062,
"learning_rate": 8.530465949820788e-06,
"loss": 0.0,
"step": 5600
},
{
"epoch": 2.891846921797005,
"grad_norm": 0.0027205512248527812,
"learning_rate": 8.428059395801333e-06,
"loss": 0.0001,
"step": 5650
},
{
"epoch": 2.9174452835018556,
"grad_norm": 0.0019007511180201269,
"learning_rate": 8.325652841781874e-06,
"loss": 0.0,
"step": 5700
},
{
"epoch": 2.943043645206707,
"grad_norm": 0.0011932680335579203,
"learning_rate": 8.223246287762417e-06,
"loss": 0.0,
"step": 5750
},
{
"epoch": 2.9686420069115576,
"grad_norm": 0.0017683528985724605,
"learning_rate": 8.12083973374296e-06,
"loss": 0.0,
"step": 5800
},
{
"epoch": 2.9942403686164085,
"grad_norm": 0.0010172759283751088,
"learning_rate": 8.018433179723503e-06,
"loss": 0.0,
"step": 5850
},
{
"epoch": 3.0194547548956865,
"grad_norm": 0.0035687260604544088,
"learning_rate": 7.916026625704046e-06,
"loss": 0.0,
"step": 5900
},
{
"epoch": 3.0450531166005375,
"grad_norm": 0.0017674951945728509,
"learning_rate": 7.813620071684589e-06,
"loss": 0.0,
"step": 5950
},
{
"epoch": 3.0706514783053884,
"grad_norm": 0.001417796923981359,
"learning_rate": 7.711213517665132e-06,
"loss": 0.0,
"step": 6000
},
{
"epoch": 3.0962498400102394,
"grad_norm": 0.001392466393900218,
"learning_rate": 7.6088069636456744e-06,
"loss": 0.0,
"step": 6050
},
{
"epoch": 3.1218482017150904,
"grad_norm": 0.0010317131953453697,
"learning_rate": 7.5064004096262165e-06,
"loss": 0.0,
"step": 6100
},
{
"epoch": 3.1474465634199413,
"grad_norm": 0.002671567960001357,
"learning_rate": 7.403993855606759e-06,
"loss": 0.0,
"step": 6150
},
{
"epoch": 3.173044925124792,
"grad_norm": 0.0010993308683270022,
"learning_rate": 7.301587301587301e-06,
"loss": 0.0,
"step": 6200
},
{
"epoch": 3.198643286829643,
"grad_norm": 0.0011792521113742672,
"learning_rate": 7.199180747567845e-06,
"loss": 0.0,
"step": 6250
},
{
"epoch": 3.224241648534494,
"grad_norm": 0.002377211855286283,
"learning_rate": 7.096774193548388e-06,
"loss": 0.0,
"step": 6300
},
{
"epoch": 3.2498400102393448,
"grad_norm": 0.0013709631857059255,
"learning_rate": 6.994367639528931e-06,
"loss": 0.0,
"step": 6350
},
{
"epoch": 3.2754383719441957,
"grad_norm": 0.0010686686441566805,
"learning_rate": 6.891961085509473e-06,
"loss": 0.0,
"step": 6400
},
{
"epoch": 3.3010367336490463,
"grad_norm": 0.0013656971047177361,
"learning_rate": 6.789554531490016e-06,
"loss": 0.0,
"step": 6450
},
{
"epoch": 3.3266350953538972,
"grad_norm": 0.0030691234388346175,
"learning_rate": 6.687147977470559e-06,
"loss": 0.0,
"step": 6500
},
{
"epoch": 3.352233457058748,
"grad_norm": 0.0016958004183478155,
"learning_rate": 6.584741423451101e-06,
"loss": 0.0,
"step": 6550
},
{
"epoch": 3.377831818763599,
"grad_norm": 0.002679660529985062,
"learning_rate": 6.4823348694316445e-06,
"loss": 0.0,
"step": 6600
},
{
"epoch": 3.40343018046845,
"grad_norm": 0.0009123671464204819,
"learning_rate": 6.379928315412187e-06,
"loss": 0.0,
"step": 6650
},
{
"epoch": 3.4290285421733007,
"grad_norm": 0.0011492363622442438,
"learning_rate": 6.2775217613927295e-06,
"loss": 0.0,
"step": 6700
},
{
"epoch": 3.4546269038781516,
"grad_norm": 0.0008232117328172145,
"learning_rate": 6.175115207373272e-06,
"loss": 0.0,
"step": 6750
},
{
"epoch": 3.4802252655830026,
"grad_norm": 0.0022449544565699437,
"learning_rate": 6.072708653353815e-06,
"loss": 0.0,
"step": 6800
},
{
"epoch": 3.5058236272878536,
"grad_norm": 0.0015276485422571994,
"learning_rate": 5.970302099334357e-06,
"loss": 0.0,
"step": 6850
},
{
"epoch": 3.5314219889927045,
"grad_norm": 0.0011827584209431721,
"learning_rate": 5.867895545314901e-06,
"loss": 0.0,
"step": 6900
},
{
"epoch": 3.5570203506975555,
"grad_norm": 0.003917245208198937,
"learning_rate": 5.765488991295444e-06,
"loss": 0.0,
"step": 6950
},
{
"epoch": 3.5826187124024065,
"grad_norm": 0.0016006551963278512,
"learning_rate": 5.663082437275986e-06,
"loss": 0.0,
"step": 7000
},
{
"epoch": 3.608217074107257,
"grad_norm": 0.0006324168438582504,
"learning_rate": 5.560675883256529e-06,
"loss": 0.0,
"step": 7050
},
{
"epoch": 3.633815435812108,
"grad_norm": 0.0019190937453439484,
"learning_rate": 5.458269329237072e-06,
"loss": 0.0,
"step": 7100
},
{
"epoch": 3.659413797516959,
"grad_norm": 0.0014235404292782222,
"learning_rate": 5.355862775217614e-06,
"loss": 0.0,
"step": 7150
},
{
"epoch": 3.68501215922181,
"grad_norm": 0.002036273934913596,
"learning_rate": 5.253456221198157e-06,
"loss": 0.0,
"step": 7200
},
{
"epoch": 3.710610520926661,
"grad_norm": 0.001317406088761277,
"learning_rate": 5.1510496671787e-06,
"loss": 0.0,
"step": 7250
},
{
"epoch": 3.7362088826315114,
"grad_norm": 0.0036103172590374257,
"learning_rate": 5.0486431131592425e-06,
"loss": 0.0,
"step": 7300
},
{
"epoch": 3.7618072443363624,
"grad_norm": 0.004126819254015297,
"learning_rate": 4.946236559139785e-06,
"loss": 0.0,
"step": 7350
},
{
"epoch": 3.7874056060412133,
"grad_norm": 0.0016332834938483983,
"learning_rate": 4.843830005120328e-06,
"loss": 0.0,
"step": 7400
},
{
"epoch": 3.8130039677460643,
"grad_norm": 0.0004651327688046216,
"learning_rate": 4.741423451100871e-06,
"loss": 0.0,
"step": 7450
},
{
"epoch": 3.8386023294509153,
"grad_norm": 0.0014442256648981874,
"learning_rate": 4.639016897081414e-06,
"loss": 0.0,
"step": 7500
},
{
"epoch": 3.864200691155766,
"grad_norm": 0.0044845026985442255,
"learning_rate": 4.536610343061956e-06,
"loss": 0.0,
"step": 7550
},
{
"epoch": 3.889799052860617,
"grad_norm": 0.0018809502401269417,
"learning_rate": 4.434203789042499e-06,
"loss": 0.0,
"step": 7600
},
{
"epoch": 3.9153974145654677,
"grad_norm": 0.0038926669396500383,
"learning_rate": 4.331797235023042e-06,
"loss": 0.0,
"step": 7650
},
{
"epoch": 3.9409957762703187,
"grad_norm": 0.004267638189231195,
"learning_rate": 4.229390681003585e-06,
"loss": 0.0,
"step": 7700
},
{
"epoch": 3.9665941379751697,
"grad_norm": 0.00238145784225052,
"learning_rate": 4.126984126984127e-06,
"loss": 0.0,
"step": 7750
},
{
"epoch": 3.99219249968002,
"grad_norm": 0.0014756463100693462,
"learning_rate": 4.0245775729646705e-06,
"loss": 0.0,
"step": 7800
},
{
"epoch": 4.017406885959298,
"grad_norm": 0.0014856407081637794,
"learning_rate": 3.9221710189452126e-06,
"loss": 0.0,
"step": 7850
},
{
"epoch": 4.04300524766415,
"grad_norm": 0.0023551139030403893,
"learning_rate": 3.8197644649257554e-06,
"loss": 0.0,
"step": 7900
},
{
"epoch": 4.068603609369,
"grad_norm": 0.0011981597110419454,
"learning_rate": 3.7173579109062983e-06,
"loss": 0.0,
"step": 7950
},
{
"epoch": 4.0942019710738515,
"grad_norm": 0.0028843508232214783,
"learning_rate": 3.6149513568868412e-06,
"loss": 0.0,
"step": 8000
},
{
"epoch": 4.119800332778702,
"grad_norm": 0.0018843735220208588,
"learning_rate": 3.5125448028673837e-06,
"loss": 0.0,
"step": 8050
},
{
"epoch": 4.1453986944835535,
"grad_norm": 0.0009664312361068302,
"learning_rate": 3.4101382488479266e-06,
"loss": 0.0,
"step": 8100
},
{
"epoch": 4.170997056188404,
"grad_norm": 0.0007573250297130035,
"learning_rate": 3.3077316948284695e-06,
"loss": 0.0,
"step": 8150
},
{
"epoch": 4.1965954178932545,
"grad_norm": 0.0011501400163836143,
"learning_rate": 3.205325140809012e-06,
"loss": 0.0,
"step": 8200
},
{
"epoch": 4.222193779598106,
"grad_norm": 0.001612883833598221,
"learning_rate": 3.1029185867895553e-06,
"loss": 0.0,
"step": 8250
},
{
"epoch": 4.2477921413029565,
"grad_norm": 0.0018641211713703483,
"learning_rate": 3.0005120327700977e-06,
"loss": 0.0,
"step": 8300
},
{
"epoch": 4.273390503007808,
"grad_norm": 0.0009646184071954378,
"learning_rate": 2.89810547875064e-06,
"loss": 0.0,
"step": 8350
},
{
"epoch": 4.298988864712658,
"grad_norm": 0.0009154804456547847,
"learning_rate": 2.7956989247311827e-06,
"loss": 0.0,
"step": 8400
},
{
"epoch": 4.324587226417509,
"grad_norm": 0.0020826965736280532,
"learning_rate": 2.693292370711726e-06,
"loss": 0.0,
"step": 8450
},
{
"epoch": 4.35018558812236,
"grad_norm": 0.0011199777755488004,
"learning_rate": 2.5908858166922684e-06,
"loss": 0.0,
"step": 8500
},
{
"epoch": 4.375783949827211,
"grad_norm": 0.0008422274985046506,
"learning_rate": 2.4884792626728113e-06,
"loss": 0.0,
"step": 8550
},
{
"epoch": 4.401382311532062,
"grad_norm": 0.0006334420803463363,
"learning_rate": 2.386072708653354e-06,
"loss": 0.0,
"step": 8600
},
{
"epoch": 4.426980673236913,
"grad_norm": 0.001586741258322779,
"learning_rate": 2.2836661546338967e-06,
"loss": 0.0,
"step": 8650
},
{
"epoch": 4.452579034941763,
"grad_norm": 0.0009727630299961603,
"learning_rate": 2.1812596006144396e-06,
"loss": 0.0,
"step": 8700
},
{
"epoch": 4.478177396646615,
"grad_norm": 0.0018829318719699433,
"learning_rate": 2.078853046594982e-06,
"loss": 0.0,
"step": 8750
},
{
"epoch": 4.503775758351465,
"grad_norm": 0.0010542211381998285,
"learning_rate": 1.976446492575525e-06,
"loss": 0.0,
"step": 8800
},
{
"epoch": 4.529374120056317,
"grad_norm": 0.0008019247102530823,
"learning_rate": 1.8740399385560678e-06,
"loss": 0.0,
"step": 8850
},
{
"epoch": 4.554972481761167,
"grad_norm": 0.0025552327229986455,
"learning_rate": 1.7716333845366105e-06,
"loss": 0.0,
"step": 8900
},
{
"epoch": 4.580570843466019,
"grad_norm": 0.003395542465106546,
"learning_rate": 1.6692268305171534e-06,
"loss": 0.0,
"step": 8950
},
{
"epoch": 4.606169205170869,
"grad_norm": 0.0013482230913774654,
"learning_rate": 1.5668202764976959e-06,
"loss": 0.0,
"step": 9000
},
{
"epoch": 4.63176756687572,
"grad_norm": 0.0015382731152315148,
"learning_rate": 1.4644137224782387e-06,
"loss": 0.0,
"step": 9050
},
{
"epoch": 4.657365928580571,
"grad_norm": 0.0019761534195212524,
"learning_rate": 1.3620071684587816e-06,
"loss": 0.0,
"step": 9100
},
{
"epoch": 4.682964290285422,
"grad_norm": 0.00038991149426242233,
"learning_rate": 1.259600614439324e-06,
"loss": 0.0,
"step": 9150
},
{
"epoch": 4.708562651990272,
"grad_norm": 0.002601115466918085,
"learning_rate": 1.157194060419867e-06,
"loss": 0.0,
"step": 9200
},
{
"epoch": 4.7341610136951235,
"grad_norm": 0.002305779477709061,
"learning_rate": 1.0547875064004097e-06,
"loss": 0.0,
"step": 9250
},
{
"epoch": 4.759759375399974,
"grad_norm": 0.0014673554584682805,
"learning_rate": 9.523809523809525e-07,
"loss": 0.0,
"step": 9300
},
{
"epoch": 4.7853577371048255,
"grad_norm": 0.0032349538713087054,
"learning_rate": 8.499743983614952e-07,
"loss": 0.0,
"step": 9350
},
{
"epoch": 4.810956098809676,
"grad_norm": 0.000820434912366508,
"learning_rate": 7.475678443420379e-07,
"loss": 0.0,
"step": 9400
},
{
"epoch": 4.836554460514527,
"grad_norm": 0.0018281915833400296,
"learning_rate": 6.451612903225807e-07,
"loss": 0.0,
"step": 9450
},
{
"epoch": 4.862152822219378,
"grad_norm": 0.001079014832848559,
"learning_rate": 5.427547363031235e-07,
"loss": 0.0,
"step": 9500
},
{
"epoch": 4.8877511839242285,
"grad_norm": 0.0009643703560768136,
"learning_rate": 4.4034818228366616e-07,
"loss": 0.0,
"step": 9550
},
{
"epoch": 4.91334954562908,
"grad_norm": 0.0012576968463295232,
"learning_rate": 3.3794162826420895e-07,
"loss": 0.0,
"step": 9600
},
{
"epoch": 4.93894790733393,
"grad_norm": 0.0017081309137603447,
"learning_rate": 2.355350742447517e-07,
"loss": 0.0,
"step": 9650
},
{
"epoch": 4.964546269038782,
"grad_norm": 0.0014382920472094229,
"learning_rate": 1.3312852022529444e-07,
"loss": 0.0,
"step": 9700
},
{
"epoch": 4.990144630743632,
"grad_norm": 0.0014054516528993024,
"learning_rate": 3.0721966205837177e-08,
"loss": 0.0,
"step": 9750
}
],
"logging_steps": 50,
"max_steps": 9765,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.471170805648589e+16,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}