sql-generator-adapter / checkpoint-900 /trainer_state.json

Upload folder using huggingface_hub

c8ebbc9 verified about 1 year ago

16.3 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 9.89875173370319,
	"eval_steps": 500,
	"global_step": 900,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.11095700416088766,
	"grad_norm": 0.31662145256996155,
	"learning_rate": 7.407407407407407e-05,
	"loss": 0.5604,
	"step": 10
	},
	{
	"epoch": 0.22191400832177532,
	"grad_norm": 0.38665255904197693,
	"learning_rate": 0.00014814814814814815,
	"loss": 0.3448,
	"step": 20
	},
	{
	"epoch": 0.332871012482663,
	"grad_norm": 0.38282278180122375,
	"learning_rate": 0.00019999417253661235,
	"loss": 0.1345,
	"step": 30
	},
	{
	"epoch": 0.44382801664355065,
	"grad_norm": 0.33959391713142395,
	"learning_rate": 0.000199890592080658,
	"loss": 0.1206,
	"step": 40
	},
	{
	"epoch": 0.5547850208044383,
	"grad_norm": 0.2943621873855591,
	"learning_rate": 0.00019965766682369186,
	"loss": 0.1234,
	"step": 50
	},
	{
	"epoch": 0.665742024965326,
	"grad_norm": 0.25359126925468445,
	"learning_rate": 0.00019929569837240564,
	"loss": 0.1039,
	"step": 60
	},
	{
	"epoch": 0.7766990291262136,
	"grad_norm": 0.23930878937244415,
	"learning_rate": 0.0001988051554269675,
	"loss": 0.102,
	"step": 70
	},
	{
	"epoch": 0.8876560332871013,
	"grad_norm": 0.2013150006532669,
	"learning_rate": 0.00019818667317411865,
	"loss": 0.0974,
	"step": 80
	},
	{
	"epoch": 0.9986130374479889,
	"grad_norm": 0.25096118450164795,
	"learning_rate": 0.00019744105246469263,
	"loss": 0.099,
	"step": 90
	},
	{
	"epoch": 1.0998613037447988,
	"grad_norm": 0.25178226828575134,
	"learning_rate": 0.0001965692587766216,
	"loss": 0.0714,
	"step": 100
	},
	{
	"epoch": 1.2108183079056865,
	"grad_norm": 0.2704208195209503,
	"learning_rate": 0.00019557242096477327,
	"loss": 0.0771,
	"step": 110
	},
	{
	"epoch": 1.3217753120665743,
	"grad_norm": 0.22107760608196259,
	"learning_rate": 0.00019445182979923654,
	"loss": 0.0703,
	"step": 120
	},
	{
	"epoch": 1.4327323162274619,
	"grad_norm": 0.26953792572021484,
	"learning_rate": 0.00019320893629394873,
	"loss": 0.0753,
	"step": 130
	},
	{
	"epoch": 1.5436893203883495,
	"grad_norm": 0.2142401486635208,
	"learning_rate": 0.00019184534982782904,
	"loss": 0.0724,
	"step": 140
	},
	{
	"epoch": 1.6546463245492373,
	"grad_norm": 0.25699618458747864,
	"learning_rate": 0.00019036283606085053,
	"loss": 0.0648,
	"step": 150
	},
	{
	"epoch": 1.765603328710125,
	"grad_norm": 0.2224379926919937,
	"learning_rate": 0.00018876331464774945,
	"loss": 0.0706,
	"step": 160
	},
	{
	"epoch": 1.8765603328710125,
	"grad_norm": 0.23435620963573456,
	"learning_rate": 0.0001870488567523318,
	"loss": 0.0695,
	"step": 170
	},
	{
	"epoch": 1.9875173370319001,
	"grad_norm": 0.18676415085792542,
	"learning_rate": 0.00018522168236559695,
	"loss": 0.0615,
	"step": 180
	},
	{
	"epoch": 2.08876560332871,
	"grad_norm": 0.24162153899669647,
	"learning_rate": 0.00018328415743114912,
	"loss": 0.0445,
	"step": 190
	},
	{
	"epoch": 2.1997226074895977,
	"grad_norm": 0.3869277536869049,
	"learning_rate": 0.00018123879078162097,
	"loss": 0.0502,
	"step": 200
	},
	{
	"epoch": 2.3106796116504853,
	"grad_norm": 0.3037394881248474,
	"learning_rate": 0.00017908823089007457,
	"loss": 0.0482,
	"step": 210
	},
	{
	"epoch": 2.421636615811373,
	"grad_norm": 0.18976379930973053,
	"learning_rate": 0.00017683526244058716,
	"loss": 0.0528,
	"step": 220
	},
	{
	"epoch": 2.5325936199722605,
	"grad_norm": 0.30705705285072327,
	"learning_rate": 0.00017448280272246212,
	"loss": 0.0521,
	"step": 230
	},
	{
	"epoch": 2.6435506241331486,
	"grad_norm": 0.21610881388187408,
	"learning_rate": 0.000172033897852734,
	"loss": 0.0535,
	"step": 240
	},
	{
	"epoch": 2.754507628294036,
	"grad_norm": 0.18693220615386963,
	"learning_rate": 0.00016949171883185918,
	"loss": 0.0517,
	"step": 250
	},
	{
	"epoch": 2.8654646324549238,
	"grad_norm": 0.3321268558502197,
	"learning_rate": 0.0001668595574376992,
	"loss": 0.0407,
	"step": 260
	},
	{
	"epoch": 2.9764216366158114,
	"grad_norm": 0.20721495151519775,
	"learning_rate": 0.000164140821963114,
	"loss": 0.0417,
	"step": 270
	},
	{
	"epoch": 3.0776699029126213,
	"grad_norm": 0.20151656866073608,
	"learning_rate": 0.00016133903280268362,
	"loss": 0.0373,
	"step": 280
	},
	{
	"epoch": 3.188626907073509,
	"grad_norm": 0.3590203821659088,
	"learning_rate": 0.00015845781789427377,
	"loss": 0.0358,
	"step": 290
	},
	{
	"epoch": 3.2995839112343965,
	"grad_norm": 0.20630675554275513,
	"learning_rate": 0.000155500908021347,
	"loss": 0.0299,
	"step": 300
	},
	{
	"epoch": 3.410540915395284,
	"grad_norm": 0.3287246525287628,
	"learning_rate": 0.000152472131982103,
	"loss": 0.0331,
	"step": 310
	},
	{
	"epoch": 3.5214979195561718,
	"grad_norm": 0.24394913017749786,
	"learning_rate": 0.0001493754116317029,
	"loss": 0.0368,
	"step": 320
	},
	{
	"epoch": 3.63245492371706,
	"grad_norm": 0.20165830850601196,
	"learning_rate": 0.0001462147568039977,
	"loss": 0.0336,
	"step": 330
	},
	{
	"epoch": 3.7434119278779474,
	"grad_norm": 0.2538021504878998,
	"learning_rate": 0.00014299426011933568,
	"loss": 0.0295,
	"step": 340
	},
	{
	"epoch": 3.854368932038835,
	"grad_norm": 0.36229604482650757,
	"learning_rate": 0.00013971809168517298,
	"loss": 0.0358,
	"step": 350
	},
	{
	"epoch": 3.9653259361997226,
	"grad_norm": 0.4092184603214264,
	"learning_rate": 0.00013639049369634876,
	"loss": 0.034,
	"step": 360
	},
	{
	"epoch": 4.066574202496533,
	"grad_norm": 0.11960680782794952,
	"learning_rate": 0.00013301577494201664,
	"loss": 0.0233,
	"step": 370
	},
	{
	"epoch": 4.17753120665742,
	"grad_norm": 0.26415354013442993,
	"learning_rate": 0.00012959830522634596,
	"loss": 0.02,
	"step": 380
	},
	{
	"epoch": 4.288488210818308,
	"grad_norm": 0.21966516971588135,
	"learning_rate": 0.00012614250971021657,
	"loss": 0.0225,
	"step": 390
	},
	{
	"epoch": 4.399445214979195,
	"grad_norm": 0.2905697524547577,
	"learning_rate": 0.00012265286318123415,
	"loss": 0.0244,
	"step": 400
	},
	{
	"epoch": 4.510402219140083,
	"grad_norm": 0.24163606762886047,
	"learning_rate": 0.00011913388425948584,
	"loss": 0.017,
	"step": 410
	},
	{
	"epoch": 4.621359223300971,
	"grad_norm": 0.40009695291519165,
	"learning_rate": 0.00011559012954653865,
	"loss": 0.0219,
	"step": 420
	},
	{
	"epoch": 4.732316227461858,
	"grad_norm": 0.1963382512331009,
	"learning_rate": 0.0001120261877252568,
	"loss": 0.0179,
	"step": 430
	},
	{
	"epoch": 4.843273231622746,
	"grad_norm": 0.33989155292510986,
	"learning_rate": 0.00010844667361807842,
	"loss": 0.0198,
	"step": 440
	},
	{
	"epoch": 4.954230235783633,
	"grad_norm": 0.38484710454940796,
	"learning_rate": 0.00010485622221144484,
	"loss": 0.0249,
	"step": 450
	},
	{
	"epoch": 5.055478502080444,
	"grad_norm": 0.18945415318012238,
	"learning_rate": 0.00010125948265412033,
	"loss": 0.0177,
	"step": 460
	},
	{
	"epoch": 5.166435506241331,
	"grad_norm": 0.25906893610954285,
	"learning_rate": 9.766111223717352e-05,
	"loss": 0.0127,
	"step": 470
	},
	{
	"epoch": 5.277392510402219,
	"grad_norm": 0.23804187774658203,
	"learning_rate": 9.406577036341548e-05,
	"loss": 0.0128,
	"step": 480
	},
	{
	"epoch": 5.388349514563107,
	"grad_norm": 0.20456787943840027,
	"learning_rate": 9.047811251410376e-05,
	"loss": 0.0111,
	"step": 490
	},
	{
	"epoch": 5.499306518723994,
	"grad_norm": 0.15757159888744354,
	"learning_rate": 8.690278422072384e-05,
	"loss": 0.0101,
	"step": 500
	},
	{
	"epoch": 5.610263522884882,
	"grad_norm": 0.16691505908966064,
	"learning_rate": 8.334441504965455e-05,
	"loss": 0.0115,
	"step": 510
	},
	{
	"epoch": 5.721220527045769,
	"grad_norm": 0.5055399537086487,
	"learning_rate": 7.980761260750607e-05,
	"loss": 0.0088,
	"step": 520
	},
	{
	"epoch": 5.832177531206657,
	"grad_norm": 0.15076065063476562,
	"learning_rate": 7.629695657489257e-05,
	"loss": 0.0117,
	"step": 530
	},
	{
	"epoch": 5.943134535367545,
	"grad_norm": 0.09655993431806564,
	"learning_rate": 7.281699277636572e-05,
	"loss": 0.0111,
	"step": 540
	},
	{
	"epoch": 6.044382801664355,
	"grad_norm": 0.4866645336151123,
	"learning_rate": 6.93722272941869e-05,
	"loss": 0.0092,
	"step": 550
	},
	{
	"epoch": 6.155339805825243,
	"grad_norm": 0.1816895604133606,
	"learning_rate": 6.59671206335602e-05,
	"loss": 0.0082,
	"step": 560
	},
	{
	"epoch": 6.26629680998613,
	"grad_norm": 0.22271257638931274,
	"learning_rate": 6.260608194688206e-05,
	"loss": 0.0046,
	"step": 570
	},
	{
	"epoch": 6.377253814147018,
	"grad_norm": 0.06787201762199402,
	"learning_rate": 5.929346332448511e-05,
	"loss": 0.0051,
	"step": 580
	},
	{
	"epoch": 6.4882108183079055,
	"grad_norm": 0.09298055619001389,
	"learning_rate": 5.6033554159270294e-05,
	"loss": 0.0054,
	"step": 590
	},
	{
	"epoch": 6.599167822468793,
	"grad_norm": 0.03731105476617813,
	"learning_rate": 5.283057559252341e-05,
	"loss": 0.0053,
	"step": 600
	},
	{
	"epoch": 6.710124826629681,
	"grad_norm": 0.10652171820402145,
	"learning_rate": 4.96886750481082e-05,
	"loss": 0.0057,
	"step": 610
	},
	{
	"epoch": 6.821081830790568,
	"grad_norm": 0.2607424259185791,
	"learning_rate": 4.661192086211366e-05,
	"loss": 0.0077,
	"step": 620
	},
	{
	"epoch": 6.932038834951456,
	"grad_norm": 0.11328639835119247,
	"learning_rate": 4.360429701490934e-05,
	"loss": 0.0073,
	"step": 630
	},
	{
	"epoch": 7.033287101248266,
	"grad_norm": 0.0941685363650322,
	"learning_rate": 4.06696979724298e-05,
	"loss": 0.0039,
	"step": 640
	},
	{
	"epoch": 7.144244105409154,
	"grad_norm": 0.45776239037513733,
	"learning_rate": 3.7811923643367974e-05,
	"loss": 0.0032,
	"step": 650
	},
	{
	"epoch": 7.2552011095700415,
	"grad_norm": 0.08863729238510132,
	"learning_rate": 3.503467445880789e-05,
	"loss": 0.0026,
	"step": 660
	},
	{
	"epoch": 7.366158113730929,
	"grad_norm": 0.04661976918578148,
	"learning_rate": 3.2341546580666796e-05,
	"loss": 0.0024,
	"step": 670
	},
	{
	"epoch": 7.477115117891817,
	"grad_norm": 0.08003357797861099,
	"learning_rate": 2.9736027245152275e-05,
	"loss": 0.0022,
	"step": 680
	},
	{
	"epoch": 7.588072122052704,
	"grad_norm": 0.15967042744159698,
	"learning_rate": 2.722149024726307e-05,
	"loss": 0.0024,
	"step": 690
	},
	{
	"epoch": 7.699029126213592,
	"grad_norm": 0.0572751984000206,
	"learning_rate": 2.480119157218108e-05,
	"loss": 0.003,
	"step": 700
	},
	{
	"epoch": 7.8099861303744795,
	"grad_norm": 0.0780700072646141,
	"learning_rate": 2.247826517921121e-05,
	"loss": 0.0035,
	"step": 710
	},
	{
	"epoch": 7.920943134535367,
	"grad_norm": 0.19474399089813232,
	"learning_rate": 2.025571894372794e-05,
	"loss": 0.0027,
	"step": 720
	},
	{
	"epoch": 8.022191400832178,
	"grad_norm": 0.12848657369613647,
	"learning_rate": 1.813643076238375e-05,
	"loss": 0.002,
	"step": 730
	},
	{
	"epoch": 8.133148404993065,
	"grad_norm": 0.05772533640265465,
	"learning_rate": 1.6123144826622504e-05,
	"loss": 0.0017,
	"step": 740
	},
	{
	"epoch": 8.244105409153953,
	"grad_norm": 0.14121367037296295,
	"learning_rate": 1.4218468069322578e-05,
	"loss": 0.0013,
	"step": 750
	},
	{
	"epoch": 8.35506241331484,
	"grad_norm": 0.14342299103736877,
	"learning_rate": 1.2424866789171729e-05,
	"loss": 0.0016,
	"step": 760
	},
	{
	"epoch": 8.466019417475728,
	"grad_norm": 0.03438349440693855,
	"learning_rate": 1.0744663457143878e-05,
	"loss": 0.0011,
	"step": 770
	},
	{
	"epoch": 8.576976421636616,
	"grad_norm": 0.0756613090634346,
	"learning_rate": 9.180033709213454e-06,
	"loss": 0.0017,
	"step": 780
	},
	{
	"epoch": 8.687933425797503,
	"grad_norm": 0.0464102178812027,
	"learning_rate": 7.733003529201278e-06,
	"loss": 0.0014,
	"step": 790
	},
	{
	"epoch": 8.79889042995839,
	"grad_norm": 0.12452979385852814,
	"learning_rate": 6.405446625399481e-06,
	"loss": 0.0015,
	"step": 800
	},
	{
	"epoch": 8.909847434119278,
	"grad_norm": 0.08071909099817276,
	"learning_rate": 5.199082004372957e-06,
	"loss": 0.0014,
	"step": 810
	},
	{
	"epoch": 9.011095700416089,
	"grad_norm": 0.06948132812976837,
	"learning_rate": 4.115471745078314e-06,
	"loss": 0.0012,
	"step": 820
	},
	{
	"epoch": 9.122052704576976,
	"grad_norm": 0.07605510950088501,
	"learning_rate": 3.1560189761830728e-06,
	"loss": 0.0009,
	"step": 830
	},
	{
	"epoch": 9.233009708737864,
	"grad_norm": 0.0312280785292387,
	"learning_rate": 2.3219660592038285e-06,
	"loss": 0.0012,
	"step": 840
	},
	{
	"epoch": 9.343966712898752,
	"grad_norm": 0.02329327166080475,
	"learning_rate": 1.6143929798162704e-06,
	"loss": 0.001,
	"step": 850
	},
	{
	"epoch": 9.45492371705964,
	"grad_norm": 0.08054498583078384,
	"learning_rate": 1.034215949419748e-06,
	"loss": 0.0012,
	"step": 860
	},
	{
	"epoch": 9.565880721220527,
	"grad_norm": 0.09850303828716278,
	"learning_rate": 5.821862187675775e-07,
	"loss": 0.0011,
	"step": 870
	},
	{
	"epoch": 9.676837725381414,
	"grad_norm": 0.08373916149139404,
	"learning_rate": 2.588891051988895e-07,
	"loss": 0.0019,
	"step": 880
	},
	{
	"epoch": 9.787794729542302,
	"grad_norm": 0.017217393964529037,
	"learning_rate": 6.474323473194543e-08,
	"loss": 0.0009,
	"step": 890
	},
	{
	"epoch": 9.89875173370319,
	"grad_norm": 0.04848321154713631,
	"learning_rate": 0.0,
	"loss": 0.0009,
	"step": 900
	}
	],
	"logging_steps": 10,
	"max_steps": 900,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 10,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 9.301284175906406e+16,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}