Chem_train / checkpoint-500 /trainer_state.json
grohitraj's picture
Upload folder using huggingface_hub
8376519 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.887573964497041,
"eval_steps": 500,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05917159763313609,
"grad_norm": 1.1964364051818848,
"learning_rate": 8e-05,
"loss": 2.1543,
"step": 5
},
{
"epoch": 0.11834319526627218,
"grad_norm": 1.1647157669067383,
"learning_rate": 0.00018,
"loss": 1.6346,
"step": 10
},
{
"epoch": 0.17751479289940827,
"grad_norm": 0.6181473135948181,
"learning_rate": 0.00019904761904761907,
"loss": 0.9978,
"step": 15
},
{
"epoch": 0.23668639053254437,
"grad_norm": 0.5694869160652161,
"learning_rate": 0.00019785714285714288,
"loss": 0.9807,
"step": 20
},
{
"epoch": 0.2958579881656805,
"grad_norm": 0.6708640456199646,
"learning_rate": 0.00019666666666666666,
"loss": 1.2485,
"step": 25
},
{
"epoch": 0.35502958579881655,
"grad_norm": 0.7021521925926208,
"learning_rate": 0.00019547619047619047,
"loss": 1.1107,
"step": 30
},
{
"epoch": 0.41420118343195267,
"grad_norm": 0.5067740082740784,
"learning_rate": 0.0001942857142857143,
"loss": 1.1293,
"step": 35
},
{
"epoch": 0.47337278106508873,
"grad_norm": 0.5455656051635742,
"learning_rate": 0.0001930952380952381,
"loss": 1.1371,
"step": 40
},
{
"epoch": 0.5325443786982249,
"grad_norm": 0.6190764307975769,
"learning_rate": 0.00019190476190476192,
"loss": 1.0131,
"step": 45
},
{
"epoch": 0.591715976331361,
"grad_norm": 0.544291615486145,
"learning_rate": 0.00019071428571428573,
"loss": 0.969,
"step": 50
},
{
"epoch": 0.650887573964497,
"grad_norm": 0.600204348564148,
"learning_rate": 0.0001895238095238095,
"loss": 1.0509,
"step": 55
},
{
"epoch": 0.7100591715976331,
"grad_norm": 0.5397897958755493,
"learning_rate": 0.00018833333333333335,
"loss": 0.9839,
"step": 60
},
{
"epoch": 0.7692307692307693,
"grad_norm": 0.571107804775238,
"learning_rate": 0.00018714285714285716,
"loss": 0.9281,
"step": 65
},
{
"epoch": 0.8284023668639053,
"grad_norm": 0.5789744257926941,
"learning_rate": 0.00018595238095238097,
"loss": 0.9543,
"step": 70
},
{
"epoch": 0.8875739644970414,
"grad_norm": 0.6334110498428345,
"learning_rate": 0.00018476190476190478,
"loss": 1.0625,
"step": 75
},
{
"epoch": 0.9467455621301775,
"grad_norm": 0.5225853323936462,
"learning_rate": 0.00018357142857142858,
"loss": 0.9734,
"step": 80
},
{
"epoch": 1.0,
"grad_norm": 0.7854479551315308,
"learning_rate": 0.0001823809523809524,
"loss": 0.9452,
"step": 85
},
{
"epoch": 1.0591715976331362,
"grad_norm": 0.47304806113243103,
"learning_rate": 0.0001811904761904762,
"loss": 0.8473,
"step": 90
},
{
"epoch": 1.1183431952662721,
"grad_norm": 0.6840282678604126,
"learning_rate": 0.00018,
"loss": 0.6189,
"step": 95
},
{
"epoch": 1.1775147928994083,
"grad_norm": 0.5173171162605286,
"learning_rate": 0.00017880952380952382,
"loss": 0.9301,
"step": 100
},
{
"epoch": 1.2366863905325443,
"grad_norm": 0.4892285466194153,
"learning_rate": 0.00017761904761904763,
"loss": 0.7659,
"step": 105
},
{
"epoch": 1.2958579881656804,
"grad_norm": 0.5754849910736084,
"learning_rate": 0.00017642857142857144,
"loss": 0.7756,
"step": 110
},
{
"epoch": 1.3550295857988166,
"grad_norm": 0.47277289628982544,
"learning_rate": 0.00017523809523809525,
"loss": 0.7844,
"step": 115
},
{
"epoch": 1.4142011834319526,
"grad_norm": 0.8198840022087097,
"learning_rate": 0.00017404761904761906,
"loss": 0.8969,
"step": 120
},
{
"epoch": 1.4733727810650887,
"grad_norm": 0.5040334463119507,
"learning_rate": 0.00017285714285714287,
"loss": 0.8727,
"step": 125
},
{
"epoch": 1.532544378698225,
"grad_norm": 0.5382494926452637,
"learning_rate": 0.00017166666666666667,
"loss": 0.7677,
"step": 130
},
{
"epoch": 1.5917159763313609,
"grad_norm": 0.631537914276123,
"learning_rate": 0.00017047619047619048,
"loss": 0.8364,
"step": 135
},
{
"epoch": 1.650887573964497,
"grad_norm": 0.5718739628791809,
"learning_rate": 0.0001692857142857143,
"loss": 0.7447,
"step": 140
},
{
"epoch": 1.7100591715976332,
"grad_norm": 0.557224452495575,
"learning_rate": 0.0001680952380952381,
"loss": 0.8828,
"step": 145
},
{
"epoch": 1.7692307692307692,
"grad_norm": 0.6206871271133423,
"learning_rate": 0.0001669047619047619,
"loss": 0.5674,
"step": 150
},
{
"epoch": 1.8284023668639053,
"grad_norm": 0.6297276616096497,
"learning_rate": 0.00016571428571428575,
"loss": 0.7956,
"step": 155
},
{
"epoch": 1.8875739644970415,
"grad_norm": 0.6178033351898193,
"learning_rate": 0.00016452380952380953,
"loss": 0.5527,
"step": 160
},
{
"epoch": 1.9467455621301775,
"grad_norm": 0.6269710063934326,
"learning_rate": 0.00016333333333333334,
"loss": 0.9635,
"step": 165
},
{
"epoch": 2.0,
"grad_norm": 0.7290076613426208,
"learning_rate": 0.00016214285714285715,
"loss": 0.8048,
"step": 170
},
{
"epoch": 2.059171597633136,
"grad_norm": 0.5376546382904053,
"learning_rate": 0.00016095238095238096,
"loss": 0.602,
"step": 175
},
{
"epoch": 2.1183431952662723,
"grad_norm": 0.720078706741333,
"learning_rate": 0.0001597619047619048,
"loss": 0.5738,
"step": 180
},
{
"epoch": 2.1775147928994083,
"grad_norm": 0.5647716522216797,
"learning_rate": 0.00015857142857142857,
"loss": 0.5445,
"step": 185
},
{
"epoch": 2.2366863905325443,
"grad_norm": 0.7397224307060242,
"learning_rate": 0.00015738095238095238,
"loss": 0.5383,
"step": 190
},
{
"epoch": 2.2958579881656807,
"grad_norm": 0.8834079504013062,
"learning_rate": 0.0001561904761904762,
"loss": 0.6575,
"step": 195
},
{
"epoch": 2.3550295857988166,
"grad_norm": 0.6497870683670044,
"learning_rate": 0.000155,
"loss": 0.6677,
"step": 200
},
{
"epoch": 2.4142011834319526,
"grad_norm": 0.686392605304718,
"learning_rate": 0.00015380952380952384,
"loss": 0.493,
"step": 205
},
{
"epoch": 2.4733727810650885,
"grad_norm": 0.719688892364502,
"learning_rate": 0.00015261904761904762,
"loss": 0.5356,
"step": 210
},
{
"epoch": 2.532544378698225,
"grad_norm": 0.6884217262268066,
"learning_rate": 0.00015142857142857143,
"loss": 0.7167,
"step": 215
},
{
"epoch": 2.591715976331361,
"grad_norm": 0.7767056822776794,
"learning_rate": 0.00015023809523809524,
"loss": 0.7346,
"step": 220
},
{
"epoch": 2.6508875739644973,
"grad_norm": 0.6508312225341797,
"learning_rate": 0.00014904761904761904,
"loss": 0.547,
"step": 225
},
{
"epoch": 2.710059171597633,
"grad_norm": 0.6159693598747253,
"learning_rate": 0.00014785714285714288,
"loss": 0.5539,
"step": 230
},
{
"epoch": 2.769230769230769,
"grad_norm": 0.7028509378433228,
"learning_rate": 0.00014666666666666666,
"loss": 0.609,
"step": 235
},
{
"epoch": 2.828402366863905,
"grad_norm": 0.6096014976501465,
"learning_rate": 0.00014547619047619047,
"loss": 0.5913,
"step": 240
},
{
"epoch": 2.8875739644970415,
"grad_norm": 0.8518397212028503,
"learning_rate": 0.00014428571428571428,
"loss": 0.6356,
"step": 245
},
{
"epoch": 2.9467455621301775,
"grad_norm": 0.6462046504020691,
"learning_rate": 0.00014309523809523812,
"loss": 0.527,
"step": 250
},
{
"epoch": 3.0,
"grad_norm": 0.8931583762168884,
"learning_rate": 0.00014190476190476193,
"loss": 0.6505,
"step": 255
},
{
"epoch": 3.059171597633136,
"grad_norm": 0.6089041233062744,
"learning_rate": 0.00014071428571428573,
"loss": 0.4721,
"step": 260
},
{
"epoch": 3.1183431952662723,
"grad_norm": 0.9845924973487854,
"learning_rate": 0.00013952380952380952,
"loss": 0.4478,
"step": 265
},
{
"epoch": 3.1775147928994083,
"grad_norm": 0.6962316036224365,
"learning_rate": 0.00013833333333333333,
"loss": 0.3862,
"step": 270
},
{
"epoch": 3.2366863905325443,
"grad_norm": 0.6963745951652527,
"learning_rate": 0.00013714285714285716,
"loss": 0.5446,
"step": 275
},
{
"epoch": 3.2958579881656807,
"grad_norm": 0.9289587736129761,
"learning_rate": 0.00013595238095238097,
"loss": 0.4985,
"step": 280
},
{
"epoch": 3.3550295857988166,
"grad_norm": 0.7913327813148499,
"learning_rate": 0.00013476190476190478,
"loss": 0.4291,
"step": 285
},
{
"epoch": 3.4142011834319526,
"grad_norm": 0.7623841166496277,
"learning_rate": 0.00013357142857142856,
"loss": 0.4198,
"step": 290
},
{
"epoch": 3.4733727810650885,
"grad_norm": 1.1334826946258545,
"learning_rate": 0.00013238095238095237,
"loss": 0.4442,
"step": 295
},
{
"epoch": 3.532544378698225,
"grad_norm": 0.8162091374397278,
"learning_rate": 0.0001311904761904762,
"loss": 0.4249,
"step": 300
},
{
"epoch": 3.591715976331361,
"grad_norm": 0.7582007646560669,
"learning_rate": 0.00013000000000000002,
"loss": 0.4166,
"step": 305
},
{
"epoch": 3.6508875739644973,
"grad_norm": 0.8337474465370178,
"learning_rate": 0.00012880952380952382,
"loss": 0.3552,
"step": 310
},
{
"epoch": 3.710059171597633,
"grad_norm": 0.7497977018356323,
"learning_rate": 0.0001276190476190476,
"loss": 0.3778,
"step": 315
},
{
"epoch": 3.769230769230769,
"grad_norm": 0.9030293226242065,
"learning_rate": 0.00012642857142857144,
"loss": 0.4048,
"step": 320
},
{
"epoch": 3.828402366863905,
"grad_norm": 0.8548532128334045,
"learning_rate": 0.00012523809523809525,
"loss": 0.5433,
"step": 325
},
{
"epoch": 3.8875739644970415,
"grad_norm": 1.1865911483764648,
"learning_rate": 0.00012404761904761906,
"loss": 0.4465,
"step": 330
},
{
"epoch": 3.9467455621301775,
"grad_norm": 0.6329714059829712,
"learning_rate": 0.00012285714285714287,
"loss": 0.499,
"step": 335
},
{
"epoch": 4.0,
"grad_norm": 0.8335389494895935,
"learning_rate": 0.00012166666666666667,
"loss": 0.3774,
"step": 340
},
{
"epoch": 4.059171597633136,
"grad_norm": 0.7739379405975342,
"learning_rate": 0.00012047619047619047,
"loss": 0.2472,
"step": 345
},
{
"epoch": 4.118343195266272,
"grad_norm": 1.0731953382492065,
"learning_rate": 0.00011928571428571428,
"loss": 0.3058,
"step": 350
},
{
"epoch": 4.177514792899408,
"grad_norm": 1.051379680633545,
"learning_rate": 0.0001180952380952381,
"loss": 0.3446,
"step": 355
},
{
"epoch": 4.236686390532545,
"grad_norm": 0.6324198842048645,
"learning_rate": 0.00011690476190476191,
"loss": 0.2351,
"step": 360
},
{
"epoch": 4.295857988165681,
"grad_norm": 0.9921632409095764,
"learning_rate": 0.00011571428571428574,
"loss": 0.2795,
"step": 365
},
{
"epoch": 4.355029585798817,
"grad_norm": 0.9360544085502625,
"learning_rate": 0.00011452380952380952,
"loss": 0.4056,
"step": 370
},
{
"epoch": 4.414201183431953,
"grad_norm": 0.956781268119812,
"learning_rate": 0.00011333333333333334,
"loss": 0.3001,
"step": 375
},
{
"epoch": 4.4733727810650885,
"grad_norm": 1.0604465007781982,
"learning_rate": 0.00011214285714285715,
"loss": 0.3972,
"step": 380
},
{
"epoch": 4.5325443786982245,
"grad_norm": 0.8613020181655884,
"learning_rate": 0.00011095238095238096,
"loss": 0.2828,
"step": 385
},
{
"epoch": 4.591715976331361,
"grad_norm": 0.666599690914154,
"learning_rate": 0.00010976190476190478,
"loss": 0.3187,
"step": 390
},
{
"epoch": 4.650887573964497,
"grad_norm": 0.7497467398643494,
"learning_rate": 0.00010857142857142856,
"loss": 0.4278,
"step": 395
},
{
"epoch": 4.710059171597633,
"grad_norm": 0.733259916305542,
"learning_rate": 0.00010738095238095239,
"loss": 0.2384,
"step": 400
},
{
"epoch": 4.769230769230769,
"grad_norm": 0.7570552229881287,
"learning_rate": 0.0001061904761904762,
"loss": 0.3938,
"step": 405
},
{
"epoch": 4.828402366863905,
"grad_norm": 0.8109162449836731,
"learning_rate": 0.000105,
"loss": 0.2729,
"step": 410
},
{
"epoch": 4.887573964497041,
"grad_norm": 0.7985107898712158,
"learning_rate": 0.00010380952380952383,
"loss": 0.3805,
"step": 415
},
{
"epoch": 4.946745562130177,
"grad_norm": 0.704366147518158,
"learning_rate": 0.00010261904761904761,
"loss": 0.2468,
"step": 420
},
{
"epoch": 5.0,
"grad_norm": 1.054413080215454,
"learning_rate": 0.00010142857142857143,
"loss": 0.2892,
"step": 425
},
{
"epoch": 5.059171597633136,
"grad_norm": 0.7603329420089722,
"learning_rate": 0.00010023809523809524,
"loss": 0.2865,
"step": 430
},
{
"epoch": 5.118343195266272,
"grad_norm": 0.9612646102905273,
"learning_rate": 9.904761904761905e-05,
"loss": 0.2215,
"step": 435
},
{
"epoch": 5.177514792899408,
"grad_norm": 0.8669071793556213,
"learning_rate": 9.785714285714286e-05,
"loss": 0.2078,
"step": 440
},
{
"epoch": 5.236686390532545,
"grad_norm": 0.7441051006317139,
"learning_rate": 9.666666666666667e-05,
"loss": 0.233,
"step": 445
},
{
"epoch": 5.295857988165681,
"grad_norm": 0.5900620818138123,
"learning_rate": 9.547619047619049e-05,
"loss": 0.2581,
"step": 450
},
{
"epoch": 5.355029585798817,
"grad_norm": 0.990178644657135,
"learning_rate": 9.428571428571429e-05,
"loss": 0.2603,
"step": 455
},
{
"epoch": 5.414201183431953,
"grad_norm": 0.7644340991973877,
"learning_rate": 9.309523809523811e-05,
"loss": 0.2021,
"step": 460
},
{
"epoch": 5.4733727810650885,
"grad_norm": 0.5087964534759521,
"learning_rate": 9.19047619047619e-05,
"loss": 0.1126,
"step": 465
},
{
"epoch": 5.5325443786982245,
"grad_norm": 0.7896738052368164,
"learning_rate": 9.071428571428571e-05,
"loss": 0.2084,
"step": 470
},
{
"epoch": 5.591715976331361,
"grad_norm": 0.71749347448349,
"learning_rate": 8.952380952380953e-05,
"loss": 0.1618,
"step": 475
},
{
"epoch": 5.650887573964497,
"grad_norm": 0.8466284871101379,
"learning_rate": 8.833333333333333e-05,
"loss": 0.2574,
"step": 480
},
{
"epoch": 5.710059171597633,
"grad_norm": 1.0023925304412842,
"learning_rate": 8.714285714285715e-05,
"loss": 0.1985,
"step": 485
},
{
"epoch": 5.769230769230769,
"grad_norm": 0.8096638321876526,
"learning_rate": 8.595238095238096e-05,
"loss": 0.285,
"step": 490
},
{
"epoch": 5.828402366863905,
"grad_norm": 0.9154897332191467,
"learning_rate": 8.476190476190477e-05,
"loss": 0.2051,
"step": 495
},
{
"epoch": 5.887573964497041,
"grad_norm": 0.9506188035011292,
"learning_rate": 8.357142857142858e-05,
"loss": 0.2664,
"step": 500
}
],
"logging_steps": 5,
"max_steps": 850,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.524257725218611e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}