classifier_raft / trainer_state.json
Madao0830's picture
Upload folder using huggingface_hub
801657d verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 503,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0019880715705765406,
"grad_norm": 127.0667237907462,
"learning_rate": 5.882352941176471e-08,
"loss": 3.918,
"step": 1
},
{
"epoch": 0.019880715705765408,
"grad_norm": 115.98143785016711,
"learning_rate": 5.882352941176471e-07,
"loss": 3.7431,
"step": 10
},
{
"epoch": 0.039761431411530816,
"grad_norm": 65.61390946147996,
"learning_rate": 1.1764705882352942e-06,
"loss": 3.0734,
"step": 20
},
{
"epoch": 0.05964214711729622,
"grad_norm": 41.009392563213375,
"learning_rate": 1.7647058823529412e-06,
"loss": 2.4079,
"step": 30
},
{
"epoch": 0.07952286282306163,
"grad_norm": 25.739828109506355,
"learning_rate": 2.3529411764705885e-06,
"loss": 2.1062,
"step": 40
},
{
"epoch": 0.09940357852882704,
"grad_norm": 17.26914793098219,
"learning_rate": 2.941176470588235e-06,
"loss": 2.0659,
"step": 50
},
{
"epoch": 0.11928429423459244,
"grad_norm": 25.05959744665656,
"learning_rate": 2.9970662200387674e-06,
"loss": 2.0592,
"step": 60
},
{
"epoch": 0.13916500994035785,
"grad_norm": 15.43081495953451,
"learning_rate": 2.986939491128791e-06,
"loss": 2.0407,
"step": 70
},
{
"epoch": 0.15904572564612326,
"grad_norm": 12.58490370084457,
"learning_rate": 2.969632483038685e-06,
"loss": 2.0257,
"step": 80
},
{
"epoch": 0.17892644135188868,
"grad_norm": 20.125125391550814,
"learning_rate": 2.94522876954573e-06,
"loss": 1.9816,
"step": 90
},
{
"epoch": 0.1988071570576541,
"grad_norm": 8.501045071308724,
"learning_rate": 2.9138461936939467e-06,
"loss": 1.9523,
"step": 100
},
{
"epoch": 0.1988071570576541,
"eval_loss": 1.9134721755981445,
"eval_runtime": 63.5356,
"eval_samples_per_second": 26.662,
"eval_steps_per_second": 0.425,
"step": 100
},
{
"epoch": 0.21868787276341947,
"grad_norm": 22.082329977354508,
"learning_rate": 2.875636298742058e-06,
"loss": 1.9368,
"step": 110
},
{
"epoch": 0.23856858846918488,
"grad_norm": 15.818359711105115,
"learning_rate": 2.8307835963765403e-06,
"loss": 1.9224,
"step": 120
},
{
"epoch": 0.2584493041749503,
"grad_norm": 13.730610615135147,
"learning_rate": 2.779504675723508e-06,
"loss": 1.9146,
"step": 130
},
{
"epoch": 0.2783300198807157,
"grad_norm": 6.635422891471246,
"learning_rate": 2.722047157461906e-06,
"loss": 1.9164,
"step": 140
},
{
"epoch": 0.2982107355864811,
"grad_norm": 8.086513909738859,
"learning_rate": 2.6586884980885044e-06,
"loss": 1.9076,
"step": 150
},
{
"epoch": 0.31809145129224653,
"grad_norm": 13.60628995566901,
"learning_rate": 2.5897346501087633e-06,
"loss": 1.9047,
"step": 160
},
{
"epoch": 0.3379721669980119,
"grad_norm": 18.644035876158316,
"learning_rate": 2.5155185846233844e-06,
"loss": 1.8866,
"step": 170
},
{
"epoch": 0.35785288270377735,
"grad_norm": 17.47074603496702,
"learning_rate": 2.43639868344482e-06,
"loss": 1.8956,
"step": 180
},
{
"epoch": 0.37773359840954274,
"grad_norm": 7.178728301299398,
"learning_rate": 2.3527570085080407e-06,
"loss": 1.9043,
"step": 190
},
{
"epoch": 0.3976143141153082,
"grad_norm": 12.253857653229236,
"learning_rate": 2.264997456932413e-06,
"loss": 1.9159,
"step": 200
},
{
"epoch": 0.3976143141153082,
"eval_loss": 1.8660345077514648,
"eval_runtime": 63.4283,
"eval_samples_per_second": 26.707,
"eval_steps_per_second": 0.426,
"step": 200
},
{
"epoch": 0.41749502982107356,
"grad_norm": 11.988930445624767,
"learning_rate": 2.1735438106436967e-06,
"loss": 1.9004,
"step": 210
},
{
"epoch": 0.43737574552683894,
"grad_norm": 10.753394166683348,
"learning_rate": 2.078837689974332e-06,
"loss": 1.9172,
"step": 220
},
{
"epoch": 0.4572564612326044,
"grad_norm": 9.351072895819225,
"learning_rate": 1.981336421123892e-06,
"loss": 1.9192,
"step": 230
},
{
"epoch": 0.47713717693836977,
"grad_norm": 7.682035183703906,
"learning_rate": 1.8815108277774976e-06,
"loss": 1.8959,
"step": 240
},
{
"epoch": 0.4970178926441352,
"grad_norm": 11.950308335271014,
"learning_rate": 1.7798429575462477e-06,
"loss": 1.8733,
"step": 250
},
{
"epoch": 0.5168986083499006,
"grad_norm": 10.982955764593422,
"learning_rate": 1.6768237542084645e-06,
"loss": 1.8827,
"step": 260
},
{
"epoch": 0.536779324055666,
"grad_norm": 11.904679442862472,
"learning_rate": 1.5729506869922447e-06,
"loss": 1.8765,
"step": 270
},
{
"epoch": 0.5566600397614314,
"grad_norm": 9.827244740110832,
"learning_rate": 1.4687253483472872e-06,
"loss": 1.8841,
"step": 280
},
{
"epoch": 0.5765407554671969,
"grad_norm": 7.306293618482458,
"learning_rate": 1.3646510318060986e-06,
"loss": 1.8773,
"step": 290
},
{
"epoch": 0.5964214711729622,
"grad_norm": 9.071120593329336,
"learning_rate": 1.2612303016308466e-06,
"loss": 1.875,
"step": 300
},
{
"epoch": 0.5964214711729622,
"eval_loss": 1.8548645973205566,
"eval_runtime": 63.4556,
"eval_samples_per_second": 26.696,
"eval_steps_per_second": 0.425,
"step": 300
},
{
"epoch": 0.6163021868787276,
"grad_norm": 9.443760924559943,
"learning_rate": 1.1589625659817845e-06,
"loss": 1.8568,
"step": 310
},
{
"epoch": 0.6361829025844931,
"grad_norm": 11.674966838003883,
"learning_rate": 1.0583416653261663e-06,
"loss": 1.877,
"step": 320
},
{
"epoch": 0.6560636182902585,
"grad_norm": 8.948890555802585,
"learning_rate": 9.598534877329919e-07,
"loss": 1.8663,
"step": 330
},
{
"epoch": 0.6759443339960238,
"grad_norm": 7.926486654587874,
"learning_rate": 8.639736225690654e-07,
"loss": 1.8776,
"step": 340
},
{
"epoch": 0.6958250497017893,
"grad_norm": 6.133488811515441,
"learning_rate": 7.711650639264374e-07,
"loss": 1.8669,
"step": 350
},
{
"epoch": 0.7157057654075547,
"grad_norm": 5.995054734148766,
"learning_rate": 6.818759748711476e-07,
"loss": 1.8661,
"step": 360
},
{
"epoch": 0.73558648111332,
"grad_norm": 6.532247908566974,
"learning_rate": 5.965375233094762e-07,
"loss": 1.8429,
"step": 370
},
{
"epoch": 0.7554671968190855,
"grad_norm": 8.52788084676906,
"learning_rate": 5.155617999220938e-07,
"loss": 1.883,
"step": 380
},
{
"epoch": 0.7753479125248509,
"grad_norm": 7.412951150651719,
"learning_rate": 4.3933982822017883e-07,
"loss": 1.8518,
"step": 390
},
{
"epoch": 0.7952286282306164,
"grad_norm": 8.704060015623933,
"learning_rate": 3.6823967633276183e-07,
"loss": 1.841,
"step": 400
},
{
"epoch": 0.7952286282306164,
"eval_loss": 1.834498643875122,
"eval_runtime": 63.4011,
"eval_samples_per_second": 26.719,
"eval_steps_per_second": 0.426,
"step": 400
},
{
"epoch": 0.8151093439363817,
"grad_norm": 10.837619200928202,
"learning_rate": 3.026046796432582e-07,
"loss": 1.8274,
"step": 410
},
{
"epoch": 0.8349900596421471,
"grad_norm": 9.650000403237328,
"learning_rate": 2.4275178285790973e-07,
"loss": 1.8457,
"step": 420
},
{
"epoch": 0.8548707753479126,
"grad_norm": 8.514388424330665,
"learning_rate": 1.889700095121219e-07,
"loss": 1.8333,
"step": 430
},
{
"epoch": 0.8747514910536779,
"grad_norm": 11.640897943920702,
"learning_rate": 1.4151906630527865e-07,
"loss": 1.8412,
"step": 440
},
{
"epoch": 0.8946322067594433,
"grad_norm": 13.422663396524422,
"learning_rate": 1.00628089003575e-07,
"loss": 1.8505,
"step": 450
},
{
"epoch": 0.9145129224652088,
"grad_norm": 6.751723671685878,
"learning_rate": 6.649453596676663e-08,
"loss": 1.8411,
"step": 460
},
{
"epoch": 0.9343936381709742,
"grad_norm": 7.821816393267081,
"learning_rate": 3.928323464188621e-08,
"loss": 1.8268,
"step": 470
},
{
"epoch": 0.9542743538767395,
"grad_norm": 7.257934599669054,
"learning_rate": 1.9125585628307407e-08,
"loss": 1.8413,
"step": 480
},
{
"epoch": 0.974155069582505,
"grad_norm": 7.949497125452414,
"learning_rate": 6.118928157650749e-09,
"loss": 1.8531,
"step": 490
},
{
"epoch": 0.9940357852882704,
"grad_norm": 7.167162679479334,
"learning_rate": 3.260700525591909e-10,
"loss": 1.8309,
"step": 500
},
{
"epoch": 0.9940357852882704,
"eval_loss": 1.8293424844741821,
"eval_runtime": 63.4663,
"eval_samples_per_second": 26.691,
"eval_steps_per_second": 0.425,
"step": 500
}
],
"logging_steps": 10,
"max_steps": 503,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 2000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 162675912867840.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}