harry_potter_1Epoch / checkpoint-1450 /trainer_state.json
kadeck's picture
Upload LoRA adapter from Training Data Detection Lab
124001f verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 1450,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.006896551724137931,
"grad_norm": 0.23170334100723267,
"learning_rate": 4.0909090909090915e-05,
"loss": 3.881548309326172,
"step": 10
},
{
"epoch": 0.013793103448275862,
"grad_norm": 0.2462630271911621,
"learning_rate": 8.636363636363637e-05,
"loss": 3.794137954711914,
"step": 20
},
{
"epoch": 0.020689655172413793,
"grad_norm": 0.2365545630455017,
"learning_rate": 0.0001318181818181818,
"loss": 3.7382652282714846,
"step": 30
},
{
"epoch": 0.027586206896551724,
"grad_norm": 0.2707105576992035,
"learning_rate": 0.00017727272727272728,
"loss": 3.7427467346191405,
"step": 40
},
{
"epoch": 0.034482758620689655,
"grad_norm": 0.24755217134952545,
"learning_rate": 0.0001992887624466572,
"loss": 3.61156005859375,
"step": 50
},
{
"epoch": 0.041379310344827586,
"grad_norm": 0.3027788996696472,
"learning_rate": 0.00019786628733997158,
"loss": 3.653477096557617,
"step": 60
},
{
"epoch": 0.04827586206896552,
"grad_norm": 0.2786545753479004,
"learning_rate": 0.00019644381223328592,
"loss": 3.6743812561035156,
"step": 70
},
{
"epoch": 0.05517241379310345,
"grad_norm": 0.2662774622440338,
"learning_rate": 0.0001950213371266003,
"loss": 3.5414581298828125,
"step": 80
},
{
"epoch": 0.06206896551724138,
"grad_norm": 0.30465996265411377,
"learning_rate": 0.00019359886201991466,
"loss": 3.5396636962890624,
"step": 90
},
{
"epoch": 0.06896551724137931,
"grad_norm": 0.26726341247558594,
"learning_rate": 0.00019217638691322903,
"loss": 3.56505126953125,
"step": 100
},
{
"epoch": 0.06896551724137931,
"eval_loss": 3.540494680404663,
"eval_runtime": 20.7606,
"eval_samples_per_second": 59.68,
"eval_steps_per_second": 7.466,
"step": 100
},
{
"epoch": 0.07586206896551724,
"grad_norm": 0.28638505935668945,
"learning_rate": 0.00019075391180654338,
"loss": 3.5479259490966797,
"step": 110
},
{
"epoch": 0.08275862068965517,
"grad_norm": 0.26623088121414185,
"learning_rate": 0.00018933143669985775,
"loss": 3.538827133178711,
"step": 120
},
{
"epoch": 0.0896551724137931,
"grad_norm": 0.3132971525192261,
"learning_rate": 0.00018790896159317212,
"loss": 3.500360107421875,
"step": 130
},
{
"epoch": 0.09655172413793103,
"grad_norm": 0.2965874969959259,
"learning_rate": 0.0001864864864864865,
"loss": 3.5192401885986326,
"step": 140
},
{
"epoch": 0.10344827586206896,
"grad_norm": 0.2784412205219269,
"learning_rate": 0.00018506401137980089,
"loss": 3.6045772552490236,
"step": 150
},
{
"epoch": 0.1103448275862069,
"grad_norm": 0.3488187789916992,
"learning_rate": 0.00018364153627311523,
"loss": 3.497270202636719,
"step": 160
},
{
"epoch": 0.11724137931034483,
"grad_norm": 0.2777283191680908,
"learning_rate": 0.0001822190611664296,
"loss": 3.417000961303711,
"step": 170
},
{
"epoch": 0.12413793103448276,
"grad_norm": 0.3144644796848297,
"learning_rate": 0.00018079658605974397,
"loss": 3.5388118743896486,
"step": 180
},
{
"epoch": 0.1310344827586207,
"grad_norm": 0.32053834199905396,
"learning_rate": 0.00017937411095305834,
"loss": 3.4628257751464844,
"step": 190
},
{
"epoch": 0.13793103448275862,
"grad_norm": 0.3077249228954315,
"learning_rate": 0.00017795163584637268,
"loss": 3.456898498535156,
"step": 200
},
{
"epoch": 0.13793103448275862,
"eval_loss": 3.459949016571045,
"eval_runtime": 23.2077,
"eval_samples_per_second": 53.388,
"eval_steps_per_second": 6.679,
"step": 200
},
{
"epoch": 0.14482758620689656,
"grad_norm": 0.3117501139640808,
"learning_rate": 0.00017652916073968705,
"loss": 3.4756256103515626,
"step": 210
},
{
"epoch": 0.15172413793103448,
"grad_norm": 0.31221652030944824,
"learning_rate": 0.00017510668563300142,
"loss": 3.4557666778564453,
"step": 220
},
{
"epoch": 0.15862068965517243,
"grad_norm": 0.30920568108558655,
"learning_rate": 0.0001736842105263158,
"loss": 3.5030059814453125,
"step": 230
},
{
"epoch": 0.16551724137931034,
"grad_norm": 0.3118240535259247,
"learning_rate": 0.00017226173541963016,
"loss": 3.463837814331055,
"step": 240
},
{
"epoch": 0.1724137931034483,
"grad_norm": 0.29713189601898193,
"learning_rate": 0.00017083926031294454,
"loss": 3.482789993286133,
"step": 250
},
{
"epoch": 0.1793103448275862,
"grad_norm": 0.31772103905677795,
"learning_rate": 0.0001694167852062589,
"loss": 3.4497989654541015,
"step": 260
},
{
"epoch": 0.18620689655172415,
"grad_norm": 0.31749677658081055,
"learning_rate": 0.00016799431009957328,
"loss": 3.426702880859375,
"step": 270
},
{
"epoch": 0.19310344827586207,
"grad_norm": 0.3107665479183197,
"learning_rate": 0.00016657183499288765,
"loss": 3.396825408935547,
"step": 280
},
{
"epoch": 0.2,
"grad_norm": 0.32543718814849854,
"learning_rate": 0.000165149359886202,
"loss": 3.4777637481689454,
"step": 290
},
{
"epoch": 0.20689655172413793,
"grad_norm": 0.3045833110809326,
"learning_rate": 0.00016372688477951636,
"loss": 3.417892074584961,
"step": 300
},
{
"epoch": 0.20689655172413793,
"eval_loss": 3.4208664894104004,
"eval_runtime": 22.6365,
"eval_samples_per_second": 54.735,
"eval_steps_per_second": 6.847,
"step": 300
},
{
"epoch": 0.21379310344827587,
"grad_norm": 0.3320230543613434,
"learning_rate": 0.00016230440967283073,
"loss": 3.4840187072753905,
"step": 310
},
{
"epoch": 0.2206896551724138,
"grad_norm": 0.30821651220321655,
"learning_rate": 0.0001608819345661451,
"loss": 3.362594985961914,
"step": 320
},
{
"epoch": 0.22758620689655173,
"grad_norm": 0.330126017332077,
"learning_rate": 0.00015945945945945947,
"loss": 3.4734111785888673,
"step": 330
},
{
"epoch": 0.23448275862068965,
"grad_norm": 0.30710867047309875,
"learning_rate": 0.00015803698435277384,
"loss": 3.4003364562988283,
"step": 340
},
{
"epoch": 0.2413793103448276,
"grad_norm": 0.30796217918395996,
"learning_rate": 0.0001566145092460882,
"loss": 3.4497623443603516,
"step": 350
},
{
"epoch": 0.2482758620689655,
"grad_norm": 0.31471186876296997,
"learning_rate": 0.00015519203413940258,
"loss": 3.410964584350586,
"step": 360
},
{
"epoch": 0.25517241379310346,
"grad_norm": 0.31033286452293396,
"learning_rate": 0.00015376955903271693,
"loss": 3.4347129821777345,
"step": 370
},
{
"epoch": 0.2620689655172414,
"grad_norm": 0.32137277722358704,
"learning_rate": 0.0001523470839260313,
"loss": 3.4425697326660156,
"step": 380
},
{
"epoch": 0.2689655172413793,
"grad_norm": 0.3627667725086212,
"learning_rate": 0.00015092460881934567,
"loss": 3.3625614166259767,
"step": 390
},
{
"epoch": 0.27586206896551724,
"grad_norm": 0.3407364785671234,
"learning_rate": 0.00014950213371266004,
"loss": 3.3849735260009766,
"step": 400
},
{
"epoch": 0.27586206896551724,
"eval_loss": 3.3975093364715576,
"eval_runtime": 24.3998,
"eval_samples_per_second": 50.779,
"eval_steps_per_second": 6.353,
"step": 400
},
{
"epoch": 0.2827586206896552,
"grad_norm": 0.32097798585891724,
"learning_rate": 0.00014807965860597438,
"loss": 3.417188262939453,
"step": 410
},
{
"epoch": 0.2896551724137931,
"grad_norm": 0.33030977845191956,
"learning_rate": 0.00014665718349928875,
"loss": 3.429982376098633,
"step": 420
},
{
"epoch": 0.296551724137931,
"grad_norm": 0.3301655054092407,
"learning_rate": 0.00014523470839260315,
"loss": 3.338633728027344,
"step": 430
},
{
"epoch": 0.30344827586206896,
"grad_norm": 0.32900184392929077,
"learning_rate": 0.00014381223328591752,
"loss": 3.376237487792969,
"step": 440
},
{
"epoch": 0.3103448275862069,
"grad_norm": 0.3433472812175751,
"learning_rate": 0.0001423897581792319,
"loss": 3.3466358184814453,
"step": 450
},
{
"epoch": 0.31724137931034485,
"grad_norm": 0.31025466322898865,
"learning_rate": 0.00014096728307254623,
"loss": 3.371001052856445,
"step": 460
},
{
"epoch": 0.32413793103448274,
"grad_norm": 0.3327469527721405,
"learning_rate": 0.0001395448079658606,
"loss": 3.355500411987305,
"step": 470
},
{
"epoch": 0.3310344827586207,
"grad_norm": 0.34813839197158813,
"learning_rate": 0.00013812233285917497,
"loss": 3.3663055419921877,
"step": 480
},
{
"epoch": 0.33793103448275863,
"grad_norm": 0.35365816950798035,
"learning_rate": 0.00013669985775248934,
"loss": 3.4172080993652343,
"step": 490
},
{
"epoch": 0.3448275862068966,
"grad_norm": 0.31251364946365356,
"learning_rate": 0.0001352773826458037,
"loss": 3.334903335571289,
"step": 500
},
{
"epoch": 0.3448275862068966,
"eval_loss": 3.377959728240967,
"eval_runtime": 22.4366,
"eval_samples_per_second": 55.222,
"eval_steps_per_second": 6.908,
"step": 500
},
{
"epoch": 0.35172413793103446,
"grad_norm": 0.3221539855003357,
"learning_rate": 0.00013385490753911806,
"loss": 3.3751365661621096,
"step": 510
},
{
"epoch": 0.3586206896551724,
"grad_norm": 0.31918609142303467,
"learning_rate": 0.00013243243243243243,
"loss": 3.360042953491211,
"step": 520
},
{
"epoch": 0.36551724137931035,
"grad_norm": 0.3304445445537567,
"learning_rate": 0.00013100995732574682,
"loss": 3.355394744873047,
"step": 530
},
{
"epoch": 0.3724137931034483,
"grad_norm": 0.31707221269607544,
"learning_rate": 0.00012958748221906117,
"loss": 3.3813400268554688,
"step": 540
},
{
"epoch": 0.3793103448275862,
"grad_norm": 0.3358207643032074,
"learning_rate": 0.00012816500711237554,
"loss": 3.4241260528564452,
"step": 550
},
{
"epoch": 0.38620689655172413,
"grad_norm": 0.3196071982383728,
"learning_rate": 0.0001267425320056899,
"loss": 3.3610572814941406,
"step": 560
},
{
"epoch": 0.3931034482758621,
"grad_norm": 0.31611961126327515,
"learning_rate": 0.00012532005689900428,
"loss": 3.328931427001953,
"step": 570
},
{
"epoch": 0.4,
"grad_norm": 0.33409208059310913,
"learning_rate": 0.00012389758179231865,
"loss": 3.32372932434082,
"step": 580
},
{
"epoch": 0.4068965517241379,
"grad_norm": 0.322489470243454,
"learning_rate": 0.000122475106685633,
"loss": 3.389539337158203,
"step": 590
},
{
"epoch": 0.41379310344827586,
"grad_norm": 0.3401939272880554,
"learning_rate": 0.00012105263157894738,
"loss": 3.292881393432617,
"step": 600
},
{
"epoch": 0.41379310344827586,
"eval_loss": 3.3636481761932373,
"eval_runtime": 24.5094,
"eval_samples_per_second": 50.552,
"eval_steps_per_second": 6.324,
"step": 600
},
{
"epoch": 0.4206896551724138,
"grad_norm": 0.36831986904144287,
"learning_rate": 0.00011963015647226175,
"loss": 3.3248523712158202,
"step": 610
},
{
"epoch": 0.42758620689655175,
"grad_norm": 0.31736257672309875,
"learning_rate": 0.00011820768136557612,
"loss": 3.328786849975586,
"step": 620
},
{
"epoch": 0.43448275862068964,
"grad_norm": 0.3393501341342926,
"learning_rate": 0.00011678520625889046,
"loss": 3.3191741943359374,
"step": 630
},
{
"epoch": 0.4413793103448276,
"grad_norm": 0.3327409327030182,
"learning_rate": 0.00011536273115220485,
"loss": 3.4381561279296875,
"step": 640
},
{
"epoch": 0.4482758620689655,
"grad_norm": 0.32990631461143494,
"learning_rate": 0.00011394025604551922,
"loss": 3.4140262603759766,
"step": 650
},
{
"epoch": 0.45517241379310347,
"grad_norm": 0.3171171247959137,
"learning_rate": 0.00011251778093883359,
"loss": 3.358592987060547,
"step": 660
},
{
"epoch": 0.46206896551724136,
"grad_norm": 0.319813072681427,
"learning_rate": 0.00011109530583214793,
"loss": 3.3165565490722657,
"step": 670
},
{
"epoch": 0.4689655172413793,
"grad_norm": 0.3260372579097748,
"learning_rate": 0.0001096728307254623,
"loss": 3.353110122680664,
"step": 680
},
{
"epoch": 0.47586206896551725,
"grad_norm": 0.3186911642551422,
"learning_rate": 0.00010825035561877668,
"loss": 3.4071842193603517,
"step": 690
},
{
"epoch": 0.4827586206896552,
"grad_norm": 0.3407030701637268,
"learning_rate": 0.00010682788051209105,
"loss": 3.3047447204589844,
"step": 700
},
{
"epoch": 0.4827586206896552,
"eval_loss": 3.353717803955078,
"eval_runtime": 23.3132,
"eval_samples_per_second": 53.146,
"eval_steps_per_second": 6.649,
"step": 700
},
{
"epoch": 0.4896551724137931,
"grad_norm": 0.34808802604675293,
"learning_rate": 0.0001054054054054054,
"loss": 3.3398147583007813,
"step": 710
},
{
"epoch": 0.496551724137931,
"grad_norm": 0.31498315930366516,
"learning_rate": 0.00010398293029871977,
"loss": 3.3216724395751953,
"step": 720
},
{
"epoch": 0.503448275862069,
"grad_norm": 0.32081830501556396,
"learning_rate": 0.00010256045519203414,
"loss": 3.3753803253173826,
"step": 730
},
{
"epoch": 0.5103448275862069,
"grad_norm": 0.38737478852272034,
"learning_rate": 0.00010113798008534852,
"loss": 3.347806930541992,
"step": 740
},
{
"epoch": 0.5172413793103449,
"grad_norm": 0.3532518744468689,
"learning_rate": 9.971550497866288e-05,
"loss": 3.3405616760253904,
"step": 750
},
{
"epoch": 0.5241379310344828,
"grad_norm": 0.3295048773288727,
"learning_rate": 9.829302987197725e-05,
"loss": 3.3597396850585937,
"step": 760
},
{
"epoch": 0.5310344827586206,
"grad_norm": 0.3602186441421509,
"learning_rate": 9.68705547652916e-05,
"loss": 3.3083240509033205,
"step": 770
},
{
"epoch": 0.5379310344827586,
"grad_norm": 0.3464964032173157,
"learning_rate": 9.544807965860598e-05,
"loss": 3.3121707916259764,
"step": 780
},
{
"epoch": 0.5448275862068965,
"grad_norm": 0.314314067363739,
"learning_rate": 9.402560455192035e-05,
"loss": 3.3149440765380858,
"step": 790
},
{
"epoch": 0.5517241379310345,
"grad_norm": 0.3291971683502197,
"learning_rate": 9.260312944523472e-05,
"loss": 3.3775871276855467,
"step": 800
},
{
"epoch": 0.5517241379310345,
"eval_loss": 3.3451411724090576,
"eval_runtime": 22.871,
"eval_samples_per_second": 54.173,
"eval_steps_per_second": 6.777,
"step": 800
},
{
"epoch": 0.5586206896551724,
"grad_norm": 0.33278515934944153,
"learning_rate": 9.118065433854907e-05,
"loss": 3.348500061035156,
"step": 810
},
{
"epoch": 0.5655172413793104,
"grad_norm": 0.32254090905189514,
"learning_rate": 8.975817923186344e-05,
"loss": 3.289051818847656,
"step": 820
},
{
"epoch": 0.5724137931034483,
"grad_norm": 0.37034258246421814,
"learning_rate": 8.833570412517781e-05,
"loss": 3.3568161010742186,
"step": 830
},
{
"epoch": 0.5793103448275863,
"grad_norm": 0.3335118889808655,
"learning_rate": 8.691322901849219e-05,
"loss": 3.388734817504883,
"step": 840
},
{
"epoch": 0.5862068965517241,
"grad_norm": 0.321696013212204,
"learning_rate": 8.549075391180654e-05,
"loss": 3.225112533569336,
"step": 850
},
{
"epoch": 0.593103448275862,
"grad_norm": 0.32803264260292053,
"learning_rate": 8.406827880512091e-05,
"loss": 3.3828250885009767,
"step": 860
},
{
"epoch": 0.6,
"grad_norm": 0.32728055119514465,
"learning_rate": 8.264580369843528e-05,
"loss": 3.382917022705078,
"step": 870
},
{
"epoch": 0.6068965517241379,
"grad_norm": 0.3484093248844147,
"learning_rate": 8.122332859174965e-05,
"loss": 3.3160984039306642,
"step": 880
},
{
"epoch": 0.6137931034482759,
"grad_norm": 0.3902784585952759,
"learning_rate": 7.980085348506402e-05,
"loss": 3.366690444946289,
"step": 890
},
{
"epoch": 0.6206896551724138,
"grad_norm": 0.32276031374931335,
"learning_rate": 7.837837837837838e-05,
"loss": 3.2556941986083983,
"step": 900
},
{
"epoch": 0.6206896551724138,
"eval_loss": 3.337270736694336,
"eval_runtime": 23.3962,
"eval_samples_per_second": 52.957,
"eval_steps_per_second": 6.625,
"step": 900
},
{
"epoch": 0.6275862068965518,
"grad_norm": 0.36281818151474,
"learning_rate": 7.695590327169275e-05,
"loss": 3.4061851501464844,
"step": 910
},
{
"epoch": 0.6344827586206897,
"grad_norm": 0.3139365017414093,
"learning_rate": 7.553342816500711e-05,
"loss": 3.2938968658447267,
"step": 920
},
{
"epoch": 0.6413793103448275,
"grad_norm": 0.33926886320114136,
"learning_rate": 7.411095305832149e-05,
"loss": 3.3076290130615233,
"step": 930
},
{
"epoch": 0.6482758620689655,
"grad_norm": 0.3455406427383423,
"learning_rate": 7.268847795163585e-05,
"loss": 3.338056182861328,
"step": 940
},
{
"epoch": 0.6551724137931034,
"grad_norm": 0.3547625243663788,
"learning_rate": 7.126600284495022e-05,
"loss": 3.3874538421630858,
"step": 950
},
{
"epoch": 0.6620689655172414,
"grad_norm": 0.34468552470207214,
"learning_rate": 6.984352773826458e-05,
"loss": 3.35147705078125,
"step": 960
},
{
"epoch": 0.6689655172413793,
"grad_norm": 0.3656456470489502,
"learning_rate": 6.842105263157895e-05,
"loss": 3.415922164916992,
"step": 970
},
{
"epoch": 0.6758620689655173,
"grad_norm": 0.34468477964401245,
"learning_rate": 6.699857752489332e-05,
"loss": 3.3692134857177733,
"step": 980
},
{
"epoch": 0.6827586206896552,
"grad_norm": 0.3500272333621979,
"learning_rate": 6.557610241820769e-05,
"loss": 3.371417999267578,
"step": 990
},
{
"epoch": 0.6896551724137931,
"grad_norm": 0.3438541889190674,
"learning_rate": 6.415362731152204e-05,
"loss": 3.4223506927490233,
"step": 1000
},
{
"epoch": 0.6896551724137931,
"eval_loss": 3.330833911895752,
"eval_runtime": 22.3929,
"eval_samples_per_second": 55.33,
"eval_steps_per_second": 6.922,
"step": 1000
},
{
"epoch": 0.696551724137931,
"grad_norm": 0.33815649151802063,
"learning_rate": 6.273115220483641e-05,
"loss": 3.3118003845214843,
"step": 1010
},
{
"epoch": 0.7034482758620689,
"grad_norm": 0.3285435438156128,
"learning_rate": 6.130867709815078e-05,
"loss": 3.3082767486572267,
"step": 1020
},
{
"epoch": 0.7103448275862069,
"grad_norm": 0.3286275863647461,
"learning_rate": 5.988620199146515e-05,
"loss": 3.373445510864258,
"step": 1030
},
{
"epoch": 0.7172413793103448,
"grad_norm": 0.3484683334827423,
"learning_rate": 5.8463726884779526e-05,
"loss": 3.3201057434082033,
"step": 1040
},
{
"epoch": 0.7241379310344828,
"grad_norm": 0.37690791487693787,
"learning_rate": 5.704125177809388e-05,
"loss": 3.322885513305664,
"step": 1050
},
{
"epoch": 0.7310344827586207,
"grad_norm": 0.3458273112773895,
"learning_rate": 5.561877667140826e-05,
"loss": 3.3502052307128904,
"step": 1060
},
{
"epoch": 0.7379310344827587,
"grad_norm": 0.3618911802768707,
"learning_rate": 5.4196301564722616e-05,
"loss": 3.3665504455566406,
"step": 1070
},
{
"epoch": 0.7448275862068966,
"grad_norm": 0.34324246644973755,
"learning_rate": 5.277382645803699e-05,
"loss": 3.318630599975586,
"step": 1080
},
{
"epoch": 0.7517241379310344,
"grad_norm": 0.3743279278278351,
"learning_rate": 5.135135135135135e-05,
"loss": 3.2997642517089845,
"step": 1090
},
{
"epoch": 0.7586206896551724,
"grad_norm": 0.3348490595817566,
"learning_rate": 4.992887624466572e-05,
"loss": 3.194792556762695,
"step": 1100
},
{
"epoch": 0.7586206896551724,
"eval_loss": 3.3259100914001465,
"eval_runtime": 22.2447,
"eval_samples_per_second": 55.699,
"eval_steps_per_second": 6.968,
"step": 1100
},
{
"epoch": 0.7655172413793103,
"grad_norm": 0.33868151903152466,
"learning_rate": 4.850640113798009e-05,
"loss": 3.346274566650391,
"step": 1110
},
{
"epoch": 0.7724137931034483,
"grad_norm": 0.3498711585998535,
"learning_rate": 4.7083926031294455e-05,
"loss": 3.323177719116211,
"step": 1120
},
{
"epoch": 0.7793103448275862,
"grad_norm": 0.3602657914161682,
"learning_rate": 4.5661450924608825e-05,
"loss": 3.273370361328125,
"step": 1130
},
{
"epoch": 0.7862068965517242,
"grad_norm": 0.34091508388519287,
"learning_rate": 4.423897581792319e-05,
"loss": 3.288585662841797,
"step": 1140
},
{
"epoch": 0.7931034482758621,
"grad_norm": 0.35901182889938354,
"learning_rate": 4.281650071123756e-05,
"loss": 3.3729190826416016,
"step": 1150
},
{
"epoch": 0.8,
"grad_norm": 0.33599621057510376,
"learning_rate": 4.139402560455192e-05,
"loss": 3.355024719238281,
"step": 1160
},
{
"epoch": 0.8068965517241379,
"grad_norm": 0.38110241293907166,
"learning_rate": 3.997155049786629e-05,
"loss": 3.343807601928711,
"step": 1170
},
{
"epoch": 0.8137931034482758,
"grad_norm": 0.34958431124687195,
"learning_rate": 3.854907539118066e-05,
"loss": 3.272492218017578,
"step": 1180
},
{
"epoch": 0.8206896551724138,
"grad_norm": 0.3552829623222351,
"learning_rate": 3.712660028449502e-05,
"loss": 3.3572948455810545,
"step": 1190
},
{
"epoch": 0.8275862068965517,
"grad_norm": 0.322081595659256,
"learning_rate": 3.570412517780939e-05,
"loss": 3.3037082672119142,
"step": 1200
},
{
"epoch": 0.8275862068965517,
"eval_loss": 3.3224334716796875,
"eval_runtime": 22.2507,
"eval_samples_per_second": 55.684,
"eval_steps_per_second": 6.966,
"step": 1200
},
{
"epoch": 0.8344827586206897,
"grad_norm": 0.35375288128852844,
"learning_rate": 3.4281650071123755e-05,
"loss": 3.2933795928955076,
"step": 1210
},
{
"epoch": 0.8413793103448276,
"grad_norm": 0.35284116864204407,
"learning_rate": 3.2859174964438125e-05,
"loss": 3.3349658966064455,
"step": 1220
},
{
"epoch": 0.8482758620689655,
"grad_norm": 0.36195898056030273,
"learning_rate": 3.143669985775249e-05,
"loss": 3.3784534454345705,
"step": 1230
},
{
"epoch": 0.8551724137931035,
"grad_norm": 0.3708537518978119,
"learning_rate": 3.0014224751066856e-05,
"loss": 3.3437496185302735,
"step": 1240
},
{
"epoch": 0.8620689655172413,
"grad_norm": 0.32489216327667236,
"learning_rate": 2.8591749644381226e-05,
"loss": 3.276387023925781,
"step": 1250
},
{
"epoch": 0.8689655172413793,
"grad_norm": 0.3359311819076538,
"learning_rate": 2.7169274537695593e-05,
"loss": 3.2540233612060545,
"step": 1260
},
{
"epoch": 0.8758620689655172,
"grad_norm": 0.40804561972618103,
"learning_rate": 2.574679943100996e-05,
"loss": 3.2611133575439455,
"step": 1270
},
{
"epoch": 0.8827586206896552,
"grad_norm": 0.3684781491756439,
"learning_rate": 2.4324324324324327e-05,
"loss": 3.362625503540039,
"step": 1280
},
{
"epoch": 0.8896551724137931,
"grad_norm": 0.38623297214508057,
"learning_rate": 2.2901849217638694e-05,
"loss": 3.302141571044922,
"step": 1290
},
{
"epoch": 0.896551724137931,
"grad_norm": 0.3602025508880615,
"learning_rate": 2.147937411095306e-05,
"loss": 3.3853092193603516,
"step": 1300
},
{
"epoch": 0.896551724137931,
"eval_loss": 3.319241762161255,
"eval_runtime": 22.5951,
"eval_samples_per_second": 54.835,
"eval_steps_per_second": 6.86,
"step": 1300
},
{
"epoch": 0.903448275862069,
"grad_norm": 0.3617671728134155,
"learning_rate": 2.0056899004267428e-05,
"loss": 3.3196762084960936,
"step": 1310
},
{
"epoch": 0.9103448275862069,
"grad_norm": 0.3671157956123352,
"learning_rate": 1.8634423897581792e-05,
"loss": 3.358323669433594,
"step": 1320
},
{
"epoch": 0.9172413793103448,
"grad_norm": 0.3617306053638458,
"learning_rate": 1.721194879089616e-05,
"loss": 3.2942028045654297,
"step": 1330
},
{
"epoch": 0.9241379310344827,
"grad_norm": 0.3539746403694153,
"learning_rate": 1.5789473684210526e-05,
"loss": 3.332453155517578,
"step": 1340
},
{
"epoch": 0.9310344827586207,
"grad_norm": 0.34931978583335876,
"learning_rate": 1.4366998577524893e-05,
"loss": 3.304658889770508,
"step": 1350
},
{
"epoch": 0.9379310344827586,
"grad_norm": 0.33509236574172974,
"learning_rate": 1.2944523470839262e-05,
"loss": 3.365464782714844,
"step": 1360
},
{
"epoch": 0.9448275862068966,
"grad_norm": 0.36600831151008606,
"learning_rate": 1.1522048364153627e-05,
"loss": 3.357099914550781,
"step": 1370
},
{
"epoch": 0.9517241379310345,
"grad_norm": 0.32806214690208435,
"learning_rate": 1.0099573257467996e-05,
"loss": 3.3524654388427733,
"step": 1380
},
{
"epoch": 0.9586206896551724,
"grad_norm": 0.34161072969436646,
"learning_rate": 8.677098150782363e-06,
"loss": 3.266713333129883,
"step": 1390
},
{
"epoch": 0.9655172413793104,
"grad_norm": 0.3262627124786377,
"learning_rate": 7.254623044096729e-06,
"loss": 3.2161521911621094,
"step": 1400
},
{
"epoch": 0.9655172413793104,
"eval_loss": 3.3177387714385986,
"eval_runtime": 22.491,
"eval_samples_per_second": 55.089,
"eval_steps_per_second": 6.892,
"step": 1400
},
{
"epoch": 0.9724137931034482,
"grad_norm": 0.35331958532333374,
"learning_rate": 5.832147937411096e-06,
"loss": 3.3992801666259767,
"step": 1410
},
{
"epoch": 0.9793103448275862,
"grad_norm": 0.35313528776168823,
"learning_rate": 4.409672830725463e-06,
"loss": 3.3636543273925783,
"step": 1420
},
{
"epoch": 0.9862068965517241,
"grad_norm": 0.3474940359592438,
"learning_rate": 2.9871977240398294e-06,
"loss": 3.320618438720703,
"step": 1430
},
{
"epoch": 0.993103448275862,
"grad_norm": 0.3796871304512024,
"learning_rate": 1.5647226173541964e-06,
"loss": 3.3694156646728515,
"step": 1440
},
{
"epoch": 1.0,
"grad_norm": 0.37185928225517273,
"learning_rate": 1.422475106685633e-07,
"loss": 3.3201263427734373,
"step": 1450
}
],
"logging_steps": 10,
"max_steps": 1450,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 494830102118400.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}