Pegasus_Legal / trainer_state.json
harsh580g's picture
Upload folder using huggingface_hub
4e3798c verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 7030,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1422475106685633,
"grad_norm": 4.714789390563965,
"learning_rate": 4.9670487106017194e-05,
"loss": 2.0134,
"step": 100
},
{
"epoch": 0.2844950213371266,
"grad_norm": 6.441630840301514,
"learning_rate": 4.89541547277937e-05,
"loss": 1.5541,
"step": 200
},
{
"epoch": 0.4267425320056899,
"grad_norm": 4.592851638793945,
"learning_rate": 4.823782234957021e-05,
"loss": 1.489,
"step": 300
},
{
"epoch": 0.5689900426742532,
"grad_norm": 3.297266960144043,
"learning_rate": 4.7521489971346707e-05,
"loss": 1.4536,
"step": 400
},
{
"epoch": 0.7112375533428165,
"grad_norm": 2.3128268718719482,
"learning_rate": 4.680515759312321e-05,
"loss": 1.4203,
"step": 500
},
{
"epoch": 0.8534850640113798,
"grad_norm": 3.049949884414673,
"learning_rate": 4.608882521489972e-05,
"loss": 1.4094,
"step": 600
},
{
"epoch": 0.9957325746799431,
"grad_norm": 2.860804319381714,
"learning_rate": 4.5372492836676226e-05,
"loss": 1.3611,
"step": 700
},
{
"epoch": 1.0,
"eval_gen_len": 192.4972,
"eval_loss": 1.2695759534835815,
"eval_rouge1": 49.1217,
"eval_rouge2": 25.983,
"eval_rougeL": 34.4639,
"eval_rougeLsum": 44.2763,
"eval_runtime": 1625.5106,
"eval_samples_per_second": 0.865,
"eval_steps_per_second": 0.108,
"step": 703
},
{
"epoch": 1.1379800853485065,
"grad_norm": 5.325246334075928,
"learning_rate": 4.4656160458452725e-05,
"loss": 1.347,
"step": 800
},
{
"epoch": 1.2802275960170697,
"grad_norm": 4.638139724731445,
"learning_rate": 4.393982808022923e-05,
"loss": 1.294,
"step": 900
},
{
"epoch": 1.422475106685633,
"grad_norm": 4.863597869873047,
"learning_rate": 4.322349570200573e-05,
"loss": 1.3274,
"step": 1000
},
{
"epoch": 1.5647226173541964,
"grad_norm": 4.364685535430908,
"learning_rate": 4.250716332378224e-05,
"loss": 1.305,
"step": 1100
},
{
"epoch": 1.7069701280227596,
"grad_norm": 3.9026546478271484,
"learning_rate": 4.179083094555874e-05,
"loss": 1.2867,
"step": 1200
},
{
"epoch": 1.8492176386913228,
"grad_norm": 18.247657775878906,
"learning_rate": 4.1074498567335244e-05,
"loss": 1.2879,
"step": 1300
},
{
"epoch": 1.991465149359886,
"grad_norm": 3.737813949584961,
"learning_rate": 4.035816618911175e-05,
"loss": 1.2731,
"step": 1400
},
{
"epoch": 2.0,
"eval_gen_len": 145.7745,
"eval_loss": 1.2143858671188354,
"eval_rouge1": 54.2525,
"eval_rouge2": 29.3843,
"eval_rougeL": 37.7286,
"eval_rougeLsum": 49.2444,
"eval_runtime": 1195.305,
"eval_samples_per_second": 1.176,
"eval_steps_per_second": 0.147,
"step": 1406
},
{
"epoch": 2.1337126600284497,
"grad_norm": 3.5325093269348145,
"learning_rate": 3.964183381088825e-05,
"loss": 1.2249,
"step": 1500
},
{
"epoch": 2.275960170697013,
"grad_norm": 4.705758094787598,
"learning_rate": 3.8925501432664756e-05,
"loss": 1.228,
"step": 1600
},
{
"epoch": 2.418207681365576,
"grad_norm": 4.521617412567139,
"learning_rate": 3.820916905444126e-05,
"loss": 1.2261,
"step": 1700
},
{
"epoch": 2.5604551920341394,
"grad_norm": 2.7349600791931152,
"learning_rate": 3.749283667621777e-05,
"loss": 1.2261,
"step": 1800
},
{
"epoch": 2.7027027027027026,
"grad_norm": 3.1735689640045166,
"learning_rate": 3.677650429799427e-05,
"loss": 1.2177,
"step": 1900
},
{
"epoch": 2.844950213371266,
"grad_norm": 4.88203763961792,
"learning_rate": 3.6060171919770775e-05,
"loss": 1.2055,
"step": 2000
},
{
"epoch": 2.987197724039829,
"grad_norm": 2.383328437805176,
"learning_rate": 3.534383954154728e-05,
"loss": 1.2075,
"step": 2100
},
{
"epoch": 3.0,
"eval_gen_len": 135.436,
"eval_loss": 1.1897257566452026,
"eval_rouge1": 54.4867,
"eval_rouge2": 29.2535,
"eval_rougeL": 37.5847,
"eval_rougeLsum": 49.4878,
"eval_runtime": 1293.6847,
"eval_samples_per_second": 1.087,
"eval_steps_per_second": 0.136,
"step": 2109
},
{
"epoch": 3.1294452347083928,
"grad_norm": 2.6979541778564453,
"learning_rate": 3.462750716332379e-05,
"loss": 1.1771,
"step": 2200
},
{
"epoch": 3.271692745376956,
"grad_norm": 3.8193747997283936,
"learning_rate": 3.391117478510029e-05,
"loss": 1.1714,
"step": 2300
},
{
"epoch": 3.413940256045519,
"grad_norm": 2.840989589691162,
"learning_rate": 3.3194842406876794e-05,
"loss": 1.161,
"step": 2400
},
{
"epoch": 3.5561877667140824,
"grad_norm": 2.8911194801330566,
"learning_rate": 3.24785100286533e-05,
"loss": 1.1764,
"step": 2500
},
{
"epoch": 3.6984352773826457,
"grad_norm": 4.388571739196777,
"learning_rate": 3.1762177650429806e-05,
"loss": 1.1403,
"step": 2600
},
{
"epoch": 3.8406827880512093,
"grad_norm": 3.452425956726074,
"learning_rate": 3.1045845272206306e-05,
"loss": 1.1293,
"step": 2700
},
{
"epoch": 3.9829302987197726,
"grad_norm": 3.1834287643432617,
"learning_rate": 3.032951289398281e-05,
"loss": 1.1413,
"step": 2800
},
{
"epoch": 4.0,
"eval_gen_len": 132.9936,
"eval_loss": 1.1736302375793457,
"eval_rouge1": 54.9178,
"eval_rouge2": 29.6386,
"eval_rougeL": 37.8747,
"eval_rougeLsum": 49.9072,
"eval_runtime": 1855.2918,
"eval_samples_per_second": 0.758,
"eval_steps_per_second": 0.095,
"step": 2812
},
{
"epoch": 4.125177809388336,
"grad_norm": 5.237063407897949,
"learning_rate": 2.9613180515759315e-05,
"loss": 1.124,
"step": 2900
},
{
"epoch": 4.2674253200568995,
"grad_norm": 4.409717082977295,
"learning_rate": 2.889684813753582e-05,
"loss": 1.1012,
"step": 3000
},
{
"epoch": 4.409672830725462,
"grad_norm": 6.317205905914307,
"learning_rate": 2.818051575931232e-05,
"loss": 1.1045,
"step": 3100
},
{
"epoch": 4.551920341394026,
"grad_norm": 3.054473400115967,
"learning_rate": 2.7464183381088828e-05,
"loss": 1.0951,
"step": 3200
},
{
"epoch": 4.694167852062589,
"grad_norm": 7.386185169219971,
"learning_rate": 2.674785100286533e-05,
"loss": 1.1265,
"step": 3300
},
{
"epoch": 4.836415362731152,
"grad_norm": 2.541593074798584,
"learning_rate": 2.6031518624641837e-05,
"loss": 1.0964,
"step": 3400
},
{
"epoch": 4.978662873399715,
"grad_norm": 2.9089510440826416,
"learning_rate": 2.5322349570200578e-05,
"loss": 1.0824,
"step": 3500
},
{
"epoch": 5.0,
"eval_gen_len": 129.9488,
"eval_loss": 1.1544321775436401,
"eval_rouge1": 55.1013,
"eval_rouge2": 29.8133,
"eval_rougeL": 37.9405,
"eval_rougeLsum": 50.0896,
"eval_runtime": 1082.5396,
"eval_samples_per_second": 1.299,
"eval_steps_per_second": 0.163,
"step": 3515
},
{
"epoch": 5.120910384068279,
"grad_norm": 4.814157962799072,
"learning_rate": 2.4606017191977078e-05,
"loss": 1.0571,
"step": 3600
},
{
"epoch": 5.2631578947368425,
"grad_norm": 3.1615419387817383,
"learning_rate": 2.388968481375358e-05,
"loss": 1.0891,
"step": 3700
},
{
"epoch": 5.405405405405405,
"grad_norm": 2.753258466720581,
"learning_rate": 2.3173352435530087e-05,
"loss": 1.0621,
"step": 3800
},
{
"epoch": 5.547652916073969,
"grad_norm": 2.6968796253204346,
"learning_rate": 2.245702005730659e-05,
"loss": 1.0714,
"step": 3900
},
{
"epoch": 5.689900426742532,
"grad_norm": 4.843164920806885,
"learning_rate": 2.1740687679083096e-05,
"loss": 1.0527,
"step": 4000
},
{
"epoch": 5.832147937411095,
"grad_norm": 4.297841548919678,
"learning_rate": 2.10243553008596e-05,
"loss": 1.0665,
"step": 4100
},
{
"epoch": 5.974395448079658,
"grad_norm": 3.3593056201934814,
"learning_rate": 2.0308022922636106e-05,
"loss": 1.0649,
"step": 4200
},
{
"epoch": 6.0,
"eval_gen_len": 129.5334,
"eval_loss": 1.147682785987854,
"eval_rouge1": 55.3737,
"eval_rouge2": 30.0994,
"eval_rougeL": 38.1751,
"eval_rougeLsum": 50.2305,
"eval_runtime": 1585.0234,
"eval_samples_per_second": 0.887,
"eval_steps_per_second": 0.111,
"step": 4218
},
{
"epoch": 6.116642958748222,
"grad_norm": 2.796093225479126,
"learning_rate": 1.959169054441261e-05,
"loss": 1.0388,
"step": 4300
},
{
"epoch": 6.2588904694167855,
"grad_norm": 4.34324312210083,
"learning_rate": 1.8875358166189115e-05,
"loss": 1.0477,
"step": 4400
},
{
"epoch": 6.401137980085348,
"grad_norm": 5.817513465881348,
"learning_rate": 1.8159025787965618e-05,
"loss": 1.0484,
"step": 4500
},
{
"epoch": 6.543385490753912,
"grad_norm": 10.839672088623047,
"learning_rate": 1.744269340974212e-05,
"loss": 1.0401,
"step": 4600
},
{
"epoch": 6.685633001422475,
"grad_norm": 5.399308681488037,
"learning_rate": 1.6726361031518624e-05,
"loss": 1.0273,
"step": 4700
},
{
"epoch": 6.827880512091038,
"grad_norm": 3.037004232406616,
"learning_rate": 1.601002865329513e-05,
"loss": 1.0158,
"step": 4800
},
{
"epoch": 6.970128022759602,
"grad_norm": 4.032598972320557,
"learning_rate": 1.5293696275071634e-05,
"loss": 1.031,
"step": 4900
},
{
"epoch": 7.0,
"eval_gen_len": 128.6415,
"eval_loss": 1.1399182081222534,
"eval_rouge1": 55.4367,
"eval_rouge2": 30.2958,
"eval_rougeL": 38.402,
"eval_rougeLsum": 50.4346,
"eval_runtime": 1073.7936,
"eval_samples_per_second": 1.309,
"eval_steps_per_second": 0.164,
"step": 4921
},
{
"epoch": 7.112375533428165,
"grad_norm": 5.663075923919678,
"learning_rate": 1.4577363896848137e-05,
"loss": 1.0312,
"step": 5000
},
{
"epoch": 7.2546230440967285,
"grad_norm": 3.34541654586792,
"learning_rate": 1.3861031518624643e-05,
"loss": 1.0059,
"step": 5100
},
{
"epoch": 7.396870554765291,
"grad_norm": 7.185023784637451,
"learning_rate": 1.3144699140401146e-05,
"loss": 1.0154,
"step": 5200
},
{
"epoch": 7.539118065433855,
"grad_norm": 3.5461268424987793,
"learning_rate": 1.242836676217765e-05,
"loss": 1.0037,
"step": 5300
},
{
"epoch": 7.681365576102419,
"grad_norm": 3.1910974979400635,
"learning_rate": 1.1712034383954155e-05,
"loss": 1.0116,
"step": 5400
},
{
"epoch": 7.823613086770981,
"grad_norm": 7.539901256561279,
"learning_rate": 1.099570200573066e-05,
"loss": 1.0162,
"step": 5500
},
{
"epoch": 7.965860597439545,
"grad_norm": 2.6586227416992188,
"learning_rate": 1.0279369627507165e-05,
"loss": 1.0169,
"step": 5600
},
{
"epoch": 8.0,
"eval_gen_len": 128.6607,
"eval_loss": 1.1396645307540894,
"eval_rouge1": 55.3171,
"eval_rouge2": 30.1359,
"eval_rougeL": 38.2241,
"eval_rougeLsum": 50.2819,
"eval_runtime": 1048.6479,
"eval_samples_per_second": 1.341,
"eval_steps_per_second": 0.168,
"step": 5624
},
{
"epoch": 8.108108108108109,
"grad_norm": 13.898772239685059,
"learning_rate": 9.563037249283668e-06,
"loss": 1.009,
"step": 5700
},
{
"epoch": 8.250355618776672,
"grad_norm": 2.7014620304107666,
"learning_rate": 8.846704871060172e-06,
"loss": 0.9918,
"step": 5800
},
{
"epoch": 8.392603129445234,
"grad_norm": 3.315486192703247,
"learning_rate": 8.130372492836677e-06,
"loss": 1.0118,
"step": 5900
},
{
"epoch": 8.534850640113799,
"grad_norm": 3.5905227661132812,
"learning_rate": 7.414040114613182e-06,
"loss": 0.9963,
"step": 6000
},
{
"epoch": 8.677098150782362,
"grad_norm": 7.71924352645874,
"learning_rate": 6.6977077363896855e-06,
"loss": 0.9937,
"step": 6100
},
{
"epoch": 8.819345661450924,
"grad_norm": 2.987886905670166,
"learning_rate": 5.981375358166189e-06,
"loss": 0.983,
"step": 6200
},
{
"epoch": 8.961593172119487,
"grad_norm": 30.301179885864258,
"learning_rate": 5.265042979942693e-06,
"loss": 1.0011,
"step": 6300
},
{
"epoch": 9.0,
"eval_gen_len": 128.5121,
"eval_loss": 1.1343382596969604,
"eval_rouge1": 55.7259,
"eval_rouge2": 30.5158,
"eval_rougeL": 38.443,
"eval_rougeLsum": 50.6675,
"eval_runtime": 1061.373,
"eval_samples_per_second": 1.325,
"eval_steps_per_second": 0.166,
"step": 6327
},
{
"epoch": 9.103840682788052,
"grad_norm": 3.3131797313690186,
"learning_rate": 4.548710601719198e-06,
"loss": 0.9831,
"step": 6400
},
{
"epoch": 9.246088193456615,
"grad_norm": 2.9355273246765137,
"learning_rate": 3.839541547277937e-06,
"loss": 0.9688,
"step": 6500
},
{
"epoch": 9.388335704125177,
"grad_norm": 3.5932939052581787,
"learning_rate": 3.1232091690544415e-06,
"loss": 0.975,
"step": 6600
},
{
"epoch": 9.530583214793742,
"grad_norm": 9.377333641052246,
"learning_rate": 2.4068767908309457e-06,
"loss": 0.9926,
"step": 6700
},
{
"epoch": 9.672830725462305,
"grad_norm": 2.7796216011047363,
"learning_rate": 1.69054441260745e-06,
"loss": 1.0019,
"step": 6800
},
{
"epoch": 9.815078236130867,
"grad_norm": 3.6320714950561523,
"learning_rate": 9.742120343839543e-07,
"loss": 0.971,
"step": 6900
},
{
"epoch": 9.95732574679943,
"grad_norm": 3.324120283126831,
"learning_rate": 2.5787965616045843e-07,
"loss": 1.004,
"step": 7000
},
{
"epoch": 10.0,
"eval_gen_len": 128.5939,
"eval_loss": 1.1363346576690674,
"eval_rouge1": 55.76,
"eval_rouge2": 30.6092,
"eval_rougeL": 38.5818,
"eval_rougeLsum": 50.678,
"eval_runtime": 876.6103,
"eval_samples_per_second": 1.604,
"eval_steps_per_second": 0.201,
"step": 7030
}
],
"logging_steps": 100,
"max_steps": 7030,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.6250347865505792e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}