xlm-roberta-checkpoint / trainer_state.json
batoulnn's picture
Upload folder using huggingface_hub
36f433d verified
{
"best_metric": 0.9186506026518713,
"best_model_checkpoint": "./arabert_author_model_full/checkpoint-32500",
"epoch": 3.973870440936309,
"eval_steps": 500,
"global_step": 36500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05443658138268917,
"grad_norm": 7.748462200164795,
"learning_rate": 1.0821992378878607e-05,
"loss": 3.0232,
"step": 500
},
{
"epoch": 0.05443658138268917,
"eval_accuracy": 0.13458950201884254,
"eval_f1_macro": 0.06564828888070251,
"eval_f1_micro": 0.13458950201884254,
"eval_loss": 2.790053606033325,
"eval_precision_macro": 0.06930701192581881,
"eval_precision_micro": 0.13458950201884254,
"eval_recall_macro": 0.14436342078225026,
"eval_recall_micro": 0.13458950201884254,
"eval_runtime": 1.8577,
"eval_samples_per_second": 399.963,
"eval_steps_per_second": 50.063,
"step": 500
},
{
"epoch": 0.10887316276537834,
"grad_norm": 41.86913299560547,
"learning_rate": 2.170930865541644e-05,
"loss": 2.2125,
"step": 1000
},
{
"epoch": 0.10887316276537834,
"eval_accuracy": 0.5733512786002691,
"eval_f1_macro": 0.3820533588398374,
"eval_f1_micro": 0.5733512786002691,
"eval_loss": 1.659558892250061,
"eval_precision_macro": 0.3659038715437873,
"eval_precision_micro": 0.5733512786002691,
"eval_recall_macro": 0.45853990573746223,
"eval_recall_micro": 0.5733512786002691,
"eval_runtime": 1.8553,
"eval_samples_per_second": 400.47,
"eval_steps_per_second": 50.126,
"step": 1000
},
{
"epoch": 0.1633097441480675,
"grad_norm": 14.793054580688477,
"learning_rate": 3.25748502994012e-05,
"loss": 1.6587,
"step": 1500
},
{
"epoch": 0.1633097441480675,
"eval_accuracy": 0.5989232839838493,
"eval_f1_macro": 0.4808864838162928,
"eval_f1_micro": 0.5989232839838493,
"eval_loss": 1.5170563459396362,
"eval_precision_macro": 0.6274791755154383,
"eval_precision_micro": 0.5989232839838493,
"eval_recall_macro": 0.49222247701556004,
"eval_recall_micro": 0.5989232839838493,
"eval_runtime": 1.8561,
"eval_samples_per_second": 400.305,
"eval_steps_per_second": 50.105,
"step": 1500
},
{
"epoch": 0.21774632553075668,
"grad_norm": 26.568124771118164,
"learning_rate": 4.344039194338596e-05,
"loss": 1.3366,
"step": 2000
},
{
"epoch": 0.21774632553075668,
"eval_accuracy": 0.6150740242261103,
"eval_f1_macro": 0.5392686072327233,
"eval_f1_micro": 0.6150740242261103,
"eval_loss": 1.3329455852508545,
"eval_precision_macro": 0.6295802748584037,
"eval_precision_micro": 0.6150740242261103,
"eval_recall_macro": 0.5725953292113953,
"eval_recall_micro": 0.6150740242261103,
"eval_runtime": 1.8573,
"eval_samples_per_second": 400.047,
"eval_steps_per_second": 50.073,
"step": 2000
},
{
"epoch": 0.2721829069134458,
"grad_norm": 27.614654541015625,
"learning_rate": 5.432770821992379e-05,
"loss": 1.3134,
"step": 2500
},
{
"epoch": 0.2721829069134458,
"eval_accuracy": 0.6917900403768507,
"eval_f1_macro": 0.5839516207825546,
"eval_f1_micro": 0.6917900403768507,
"eval_loss": 1.2291903495788574,
"eval_precision_macro": 0.6531006523438395,
"eval_precision_micro": 0.6917900403768507,
"eval_recall_macro": 0.6020036925958046,
"eval_recall_micro": 0.6917900403768507,
"eval_runtime": 1.8554,
"eval_samples_per_second": 400.461,
"eval_steps_per_second": 50.125,
"step": 2500
},
{
"epoch": 0.326619488296135,
"grad_norm": 26.257598876953125,
"learning_rate": 6.521502449646163e-05,
"loss": 1.2263,
"step": 3000
},
{
"epoch": 0.326619488296135,
"eval_accuracy": 0.6958277254374159,
"eval_f1_macro": 0.5987092681766552,
"eval_f1_micro": 0.6958277254374159,
"eval_loss": 1.247532606124878,
"eval_precision_macro": 0.691355222631176,
"eval_precision_micro": 0.6958277254374159,
"eval_recall_macro": 0.6326688053535838,
"eval_recall_micro": 0.6958277254374159,
"eval_runtime": 1.8557,
"eval_samples_per_second": 400.383,
"eval_steps_per_second": 50.115,
"step": 3000
},
{
"epoch": 0.38105606967882416,
"grad_norm": 20.401668548583984,
"learning_rate": 7.610234077299946e-05,
"loss": 1.2354,
"step": 3500
},
{
"epoch": 0.38105606967882416,
"eval_accuracy": 0.6648721399730821,
"eval_f1_macro": 0.5547791146400078,
"eval_f1_micro": 0.6648721399730821,
"eval_loss": 1.4817063808441162,
"eval_precision_macro": 0.609418529255712,
"eval_precision_micro": 0.6648721399730821,
"eval_recall_macro": 0.5953309879820717,
"eval_recall_micro": 0.6648721399730821,
"eval_runtime": 1.8585,
"eval_samples_per_second": 399.778,
"eval_steps_per_second": 50.039,
"step": 3500
},
{
"epoch": 0.43549265106151336,
"grad_norm": 12.415190696716309,
"learning_rate": 7.998139872043199e-05,
"loss": 1.2221,
"step": 4000
},
{
"epoch": 0.43549265106151336,
"eval_accuracy": 0.6621803499327052,
"eval_f1_macro": 0.5842490840574637,
"eval_f1_micro": 0.6621803499327052,
"eval_loss": 1.2946059703826904,
"eval_precision_macro": 0.6743659102946021,
"eval_precision_micro": 0.6621803499327052,
"eval_recall_macro": 0.6174956983289226,
"eval_recall_micro": 0.6621803499327052,
"eval_runtime": 1.8549,
"eval_samples_per_second": 400.563,
"eval_steps_per_second": 50.138,
"step": 4000
},
{
"epoch": 0.4899292324442025,
"grad_norm": 45.67084503173828,
"learning_rate": 7.987866825500063e-05,
"loss": 1.203,
"step": 4500
},
{
"epoch": 0.4899292324442025,
"eval_accuracy": 0.6850605652759085,
"eval_f1_macro": 0.5971314943399703,
"eval_f1_micro": 0.6850605652759085,
"eval_loss": 1.2994459867477417,
"eval_precision_macro": 0.683861588771308,
"eval_precision_micro": 0.6850605652759085,
"eval_recall_macro": 0.6242521591611176,
"eval_recall_micro": 0.6850605652759085,
"eval_runtime": 1.8624,
"eval_samples_per_second": 398.953,
"eval_steps_per_second": 49.936,
"step": 4500
},
{
"epoch": 0.5443658138268916,
"grad_norm": 5.911087989807129,
"learning_rate": 7.968584427636286e-05,
"loss": 1.0458,
"step": 5000
},
{
"epoch": 0.5443658138268916,
"eval_accuracy": 0.7577388963660835,
"eval_f1_macro": 0.6834938073457237,
"eval_f1_micro": 0.7577388963660835,
"eval_loss": 0.9275029897689819,
"eval_precision_macro": 0.6852672080248814,
"eval_precision_micro": 0.7577388963660835,
"eval_recall_macro": 0.738924762321289,
"eval_recall_micro": 0.7577388963660835,
"eval_runtime": 1.8549,
"eval_samples_per_second": 400.555,
"eval_steps_per_second": 50.137,
"step": 5000
},
{
"epoch": 0.5988023952095808,
"grad_norm": 48.83789825439453,
"learning_rate": 7.940413126469914e-05,
"loss": 1.1084,
"step": 5500
},
{
"epoch": 0.5988023952095808,
"eval_accuracy": 0.7133243606998654,
"eval_f1_macro": 0.6529664892159153,
"eval_f1_micro": 0.7133243606998654,
"eval_loss": 1.1383692026138306,
"eval_precision_macro": 0.711194884206057,
"eval_precision_micro": 0.7133243606998654,
"eval_recall_macro": 0.6790887107741691,
"eval_recall_micro": 0.7133243606998654,
"eval_runtime": 1.8565,
"eval_samples_per_second": 400.21,
"eval_steps_per_second": 50.094,
"step": 5500
},
{
"epoch": 0.65323897659227,
"grad_norm": 59.57648849487305,
"learning_rate": 7.903303594852863e-05,
"loss": 1.0011,
"step": 6000
},
{
"epoch": 0.65323897659227,
"eval_accuracy": 0.7644683714670256,
"eval_f1_macro": 0.7124643316894146,
"eval_f1_micro": 0.7644683714670256,
"eval_loss": 0.9443386197090149,
"eval_precision_macro": 0.743414646590002,
"eval_precision_micro": 0.7644683714670256,
"eval_recall_macro": 0.7314305522869405,
"eval_recall_micro": 0.7644683714670256,
"eval_runtime": 1.8599,
"eval_samples_per_second": 399.493,
"eval_steps_per_second": 50.004,
"step": 6000
},
{
"epoch": 0.7076755579749592,
"grad_norm": 5.128486156463623,
"learning_rate": 7.857387081654268e-05,
"loss": 1.0064,
"step": 6500
},
{
"epoch": 0.7076755579749592,
"eval_accuracy": 0.7954239569313594,
"eval_f1_macro": 0.7642500691509388,
"eval_f1_micro": 0.7954239569313594,
"eval_loss": 0.6533617377281189,
"eval_precision_macro": 0.7635597495236878,
"eval_precision_micro": 0.7954239569313594,
"eval_recall_macro": 0.793458207807885,
"eval_recall_micro": 0.7954239569313594,
"eval_runtime": 1.8647,
"eval_samples_per_second": 398.464,
"eval_steps_per_second": 49.875,
"step": 6500
},
{
"epoch": 0.7621121393576483,
"grad_norm": 5.791709899902344,
"learning_rate": 7.80276718780503e-05,
"loss": 0.9092,
"step": 7000
},
{
"epoch": 0.7621121393576483,
"eval_accuracy": 0.784656796769852,
"eval_f1_macro": 0.7376954018021566,
"eval_f1_micro": 0.784656796769852,
"eval_loss": 0.8241704702377319,
"eval_precision_macro": 0.751883868032923,
"eval_precision_micro": 0.784656796769852,
"eval_recall_macro": 0.7632956885406171,
"eval_recall_micro": 0.784656796769852,
"eval_runtime": 1.8548,
"eval_samples_per_second": 400.574,
"eval_steps_per_second": 50.139,
"step": 7000
},
{
"epoch": 0.8165487207403375,
"grad_norm": 77.25128936767578,
"learning_rate": 7.739702023046593e-05,
"loss": 0.8999,
"step": 7500
},
{
"epoch": 0.8165487207403375,
"eval_accuracy": 0.7671601615074024,
"eval_f1_macro": 0.708723750703533,
"eval_f1_micro": 0.7671601615074024,
"eval_loss": 0.9454395174980164,
"eval_precision_macro": 0.7607959054064888,
"eval_precision_micro": 0.7671601615074024,
"eval_recall_macro": 0.7088710160869984,
"eval_recall_micro": 0.7671601615074024,
"eval_runtime": 1.8564,
"eval_samples_per_second": 400.238,
"eval_steps_per_second": 50.097,
"step": 7500
},
{
"epoch": 0.8709853021230267,
"grad_norm": 15.056076049804688,
"learning_rate": 7.668081162066953e-05,
"loss": 0.9202,
"step": 8000
},
{
"epoch": 0.8709853021230267,
"eval_accuracy": 0.784656796769852,
"eval_f1_macro": 0.7368074575740363,
"eval_f1_micro": 0.784656796769852,
"eval_loss": 0.9090538620948792,
"eval_precision_macro": 0.7702538361499491,
"eval_precision_micro": 0.784656796769852,
"eval_recall_macro": 0.7483242810004438,
"eval_recall_micro": 0.784656796769852,
"eval_runtime": 1.8578,
"eval_samples_per_second": 399.93,
"eval_steps_per_second": 50.059,
"step": 8000
},
{
"epoch": 0.9254218835057159,
"grad_norm": 64.78616333007812,
"learning_rate": 7.588184049330584e-05,
"loss": 0.8391,
"step": 8500
},
{
"epoch": 0.9254218835057159,
"eval_accuracy": 0.8021534320323015,
"eval_f1_macro": 0.7623929352981407,
"eval_f1_micro": 0.8021534320323015,
"eval_loss": 0.7761164903640747,
"eval_precision_macro": 0.7946649104240797,
"eval_precision_micro": 0.8021534320323015,
"eval_recall_macro": 0.7814529031598654,
"eval_recall_micro": 0.8021534320323015,
"eval_runtime": 1.8709,
"eval_samples_per_second": 397.135,
"eval_steps_per_second": 49.709,
"step": 8500
},
{
"epoch": 0.979858464888405,
"grad_norm": 9.414610862731934,
"learning_rate": 7.500374892838819e-05,
"loss": 0.7599,
"step": 9000
},
{
"epoch": 0.979858464888405,
"eval_accuracy": 0.7900403768506057,
"eval_f1_macro": 0.7371605784112564,
"eval_f1_micro": 0.7900403768506057,
"eval_loss": 0.9420120716094971,
"eval_precision_macro": 0.7952387110277418,
"eval_precision_micro": 0.7900403768506057,
"eval_recall_macro": 0.7764252549275673,
"eval_recall_micro": 0.7900403768506057,
"eval_runtime": 1.8556,
"eval_samples_per_second": 400.41,
"eval_steps_per_second": 50.119,
"step": 9000
},
{
"epoch": 1.0342950462710943,
"grad_norm": 119.14054107666016,
"learning_rate": 7.404499941253026e-05,
"loss": 0.7595,
"step": 9500
},
{
"epoch": 1.0342950462710943,
"eval_accuracy": 0.8021534320323015,
"eval_f1_macro": 0.7648820230071746,
"eval_f1_micro": 0.8021534320323015,
"eval_loss": 0.7322187423706055,
"eval_precision_macro": 0.8020003432831997,
"eval_precision_micro": 0.8021534320323015,
"eval_recall_macro": 0.7894509757341642,
"eval_recall_micro": 0.8021534320323015,
"eval_runtime": 1.866,
"eval_samples_per_second": 398.178,
"eval_steps_per_second": 49.839,
"step": 9500
},
{
"epoch": 1.0887316276537833,
"grad_norm": 1.1056867837905884,
"learning_rate": 7.300943453361286e-05,
"loss": 0.7356,
"step": 10000
},
{
"epoch": 1.0887316276537833,
"eval_accuracy": 0.784656796769852,
"eval_f1_macro": 0.7282785512433163,
"eval_f1_micro": 0.784656796769852,
"eval_loss": 0.987204372882843,
"eval_precision_macro": 0.7431660072967824,
"eval_precision_micro": 0.784656796769852,
"eval_recall_macro": 0.759777569867129,
"eval_recall_micro": 0.784656796769852,
"eval_runtime": 1.86,
"eval_samples_per_second": 399.455,
"eval_steps_per_second": 49.999,
"step": 10000
},
{
"epoch": 1.1431682090364725,
"grad_norm": 0.7999147772789001,
"learning_rate": 7.189939082544474e-05,
"loss": 0.7319,
"step": 10500
},
{
"epoch": 1.1431682090364725,
"eval_accuracy": 0.8223418573351279,
"eval_f1_macro": 0.7710850473325811,
"eval_f1_micro": 0.8223418573351279,
"eval_loss": 0.8551661968231201,
"eval_precision_macro": 0.8068470518533863,
"eval_precision_micro": 0.8223418573351279,
"eval_recall_macro": 0.7781452760567589,
"eval_recall_micro": 0.8223418573351279,
"eval_runtime": 1.8597,
"eval_samples_per_second": 399.524,
"eval_steps_per_second": 50.008,
"step": 10500
},
{
"epoch": 1.1976047904191618,
"grad_norm": 75.80143737792969,
"learning_rate": 7.071737286760956e-05,
"loss": 0.6937,
"step": 11000
},
{
"epoch": 1.1976047904191618,
"eval_accuracy": 0.8358008075370121,
"eval_f1_macro": 0.8105729169252525,
"eval_f1_micro": 0.8358008075370121,
"eval_loss": 0.6681444644927979,
"eval_precision_macro": 0.8251281216307259,
"eval_precision_micro": 0.8358008075370121,
"eval_recall_macro": 0.8299367132530477,
"eval_recall_micro": 0.8358008075370121,
"eval_runtime": 1.858,
"eval_samples_per_second": 399.893,
"eval_steps_per_second": 50.054,
"step": 11000
},
{
"epoch": 1.2520413718018508,
"grad_norm": 170.24810791015625,
"learning_rate": 6.946604763441019e-05,
"loss": 0.6245,
"step": 11500
},
{
"epoch": 1.2520413718018508,
"eval_accuracy": 0.8304172274562585,
"eval_f1_macro": 0.8042113065648884,
"eval_f1_micro": 0.8304172274562585,
"eval_loss": 0.6982793211936951,
"eval_precision_macro": 0.8149073185463646,
"eval_precision_micro": 0.8304172274562585,
"eval_recall_macro": 0.825200491276461,
"eval_recall_micro": 0.8304172274562585,
"eval_runtime": 1.858,
"eval_samples_per_second": 399.886,
"eval_steps_per_second": 50.053,
"step": 11500
},
{
"epoch": 1.30647795318454,
"grad_norm": 4.239190101623535,
"learning_rate": 6.814823847740345e-05,
"loss": 0.7154,
"step": 12000
},
{
"epoch": 1.30647795318454,
"eval_accuracy": 0.8344549125168237,
"eval_f1_macro": 0.8175929788783902,
"eval_f1_micro": 0.8344549125168237,
"eval_loss": 0.5690922737121582,
"eval_precision_macro": 0.8293167371438036,
"eval_precision_micro": 0.8344549125168237,
"eval_recall_macro": 0.8434059311064652,
"eval_recall_micro": 0.8344549125168237,
"eval_runtime": 1.8586,
"eval_samples_per_second": 399.774,
"eval_steps_per_second": 50.039,
"step": 12000
},
{
"epoch": 1.360914534567229,
"grad_norm": 108.4642105102539,
"learning_rate": 6.676691875510264e-05,
"loss": 0.6284,
"step": 12500
},
{
"epoch": 1.360914534567229,
"eval_accuracy": 0.8492597577388964,
"eval_f1_macro": 0.8301394566864201,
"eval_f1_micro": 0.8492597577388964,
"eval_loss": 0.5634124279022217,
"eval_precision_macro": 0.8533406425457963,
"eval_precision_micro": 0.8492597577388964,
"eval_recall_macro": 0.8328288148155981,
"eval_recall_micro": 0.8492597577388964,
"eval_runtime": 1.857,
"eval_samples_per_second": 400.114,
"eval_steps_per_second": 50.082,
"step": 12500
},
{
"epoch": 1.4153511159499184,
"grad_norm": 8.67512035369873,
"learning_rate": 6.532520512422097e-05,
"loss": 0.5809,
"step": 13000
},
{
"epoch": 1.4153511159499184,
"eval_accuracy": 0.8277254374158816,
"eval_f1_macro": 0.8053122261939523,
"eval_f1_micro": 0.8277254374158816,
"eval_loss": 0.6742271780967712,
"eval_precision_macro": 0.8234783735489098,
"eval_precision_micro": 0.8277254374158816,
"eval_recall_macro": 0.8227076028577776,
"eval_recall_micro": 0.8277254374158816,
"eval_runtime": 1.8544,
"eval_samples_per_second": 400.672,
"eval_steps_per_second": 50.151,
"step": 13000
},
{
"epoch": 1.4697876973326074,
"grad_norm": 42.6486701965332,
"learning_rate": 6.382940301643648e-05,
"loss": 0.5704,
"step": 13500
},
{
"epoch": 1.4697876973326074,
"eval_accuracy": 0.8371467025572006,
"eval_f1_macro": 0.8196023040766853,
"eval_f1_micro": 0.8371467025572006,
"eval_loss": 0.5477439761161804,
"eval_precision_macro": 0.8154114142235019,
"eval_precision_micro": 0.8371467025572006,
"eval_recall_macro": 0.8396350173820816,
"eval_recall_micro": 0.8371467025572006,
"eval_runtime": 1.8546,
"eval_samples_per_second": 400.63,
"eval_steps_per_second": 50.146,
"step": 13500
},
{
"epoch": 1.5242242787152966,
"grad_norm": 1.2607247829437256,
"learning_rate": 6.227689332474957e-05,
"loss": 0.6075,
"step": 14000
},
{
"epoch": 1.5242242787152966,
"eval_accuracy": 0.7967698519515478,
"eval_f1_macro": 0.7677841123247988,
"eval_f1_micro": 0.7967698519515478,
"eval_loss": 1.0041254758834839,
"eval_precision_macro": 0.8114776737598274,
"eval_precision_micro": 0.7967698519515478,
"eval_recall_macro": 0.791907454908191,
"eval_recall_micro": 0.7967698519515478,
"eval_runtime": 1.8546,
"eval_samples_per_second": 400.624,
"eval_steps_per_second": 50.145,
"step": 14000
},
{
"epoch": 1.578660860097986,
"grad_norm": 4.931788921356201,
"learning_rate": 6.067412052017954e-05,
"loss": 0.5828,
"step": 14500
},
{
"epoch": 1.578660860097986,
"eval_accuracy": 0.847913862718708,
"eval_f1_macro": 0.8302058376279866,
"eval_f1_micro": 0.847913862718708,
"eval_loss": 0.5582941770553589,
"eval_precision_macro": 0.8601936850928309,
"eval_precision_micro": 0.847913862718708,
"eval_recall_macro": 0.8408390815357188,
"eval_recall_micro": 0.847913862718708,
"eval_runtime": 1.8669,
"eval_samples_per_second": 397.979,
"eval_steps_per_second": 49.814,
"step": 14500
},
{
"epoch": 1.633097441480675,
"grad_norm": 0.7160111665725708,
"learning_rate": 5.902470092162814e-05,
"loss": 0.547,
"step": 15000
},
{
"epoch": 1.633097441480675,
"eval_accuracy": 0.8277254374158816,
"eval_f1_macro": 0.824218625219791,
"eval_f1_micro": 0.8277254374158816,
"eval_loss": 0.5830731391906738,
"eval_precision_macro": 0.836821915854616,
"eval_precision_micro": 0.8277254374158816,
"eval_recall_macro": 0.8384285472623679,
"eval_recall_micro": 0.8277254374158816,
"eval_runtime": 1.8619,
"eval_samples_per_second": 399.049,
"eval_steps_per_second": 49.948,
"step": 15000
},
{
"epoch": 1.6875340228633642,
"grad_norm": 226.5104522705078,
"learning_rate": 5.733235609665259e-05,
"loss": 0.532,
"step": 15500
},
{
"epoch": 1.6875340228633642,
"eval_accuracy": 0.8721399730820996,
"eval_f1_macro": 0.855165540533041,
"eval_f1_micro": 0.8721399730820996,
"eval_loss": 0.5274588465690613,
"eval_precision_macro": 0.8790730850974743,
"eval_precision_micro": 0.8721399730820996,
"eval_recall_macro": 0.8560697826831901,
"eval_recall_micro": 0.8721399730820996,
"eval_runtime": 1.8558,
"eval_samples_per_second": 400.36,
"eval_steps_per_second": 50.112,
"step": 15500
},
{
"epoch": 1.7419706042460534,
"grad_norm": 1.0774835348129272,
"learning_rate": 5.560440381357954e-05,
"loss": 0.5079,
"step": 16000
},
{
"epoch": 1.7419706042460534,
"eval_accuracy": 0.8627187079407806,
"eval_f1_macro": 0.8615733124700616,
"eval_f1_micro": 0.8627187079407806,
"eval_loss": 0.4832541048526764,
"eval_precision_macro": 0.8823512344628659,
"eval_precision_micro": 0.8627187079407806,
"eval_recall_macro": 0.8731612062059807,
"eval_recall_micro": 0.8627187079407806,
"eval_runtime": 1.8662,
"eval_samples_per_second": 398.145,
"eval_steps_per_second": 49.835,
"step": 16000
},
{
"epoch": 1.7964071856287425,
"grad_norm": 21.044397354125977,
"learning_rate": 5.383781847704708e-05,
"loss": 0.5054,
"step": 16500
},
{
"epoch": 1.7964071856287425,
"eval_accuracy": 0.8627187079407806,
"eval_f1_macro": 0.8472535983855487,
"eval_f1_micro": 0.8627187079407806,
"eval_loss": 0.44899773597717285,
"eval_precision_macro": 0.8666042870317389,
"eval_precision_micro": 0.8627187079407806,
"eval_recall_macro": 0.8504937606032208,
"eval_recall_micro": 0.8627187079407806,
"eval_runtime": 1.8535,
"eval_samples_per_second": 400.859,
"eval_steps_per_second": 50.175,
"step": 16500
},
{
"epoch": 1.8508437670114317,
"grad_norm": 15.335619926452637,
"learning_rate": 5.204001102071416e-05,
"loss": 0.5458,
"step": 17000
},
{
"epoch": 1.8508437670114317,
"eval_accuracy": 0.8465679676985195,
"eval_f1_macro": 0.8361761359674401,
"eval_f1_micro": 0.8465679676985195,
"eval_loss": 0.5616449117660522,
"eval_precision_macro": 0.8476921577900376,
"eval_precision_micro": 0.8465679676985195,
"eval_recall_macro": 0.8576485467382537,
"eval_recall_micro": 0.8465679676985195,
"eval_runtime": 1.8595,
"eval_samples_per_second": 399.578,
"eval_steps_per_second": 50.015,
"step": 17000
},
{
"epoch": 1.905280348394121,
"grad_norm": 7.975017547607422,
"learning_rate": 5.02150378180527e-05,
"loss": 0.4398,
"step": 17500
},
{
"epoch": 1.905280348394121,
"eval_accuracy": 0.8546433378196501,
"eval_f1_macro": 0.8438383161438845,
"eval_f1_micro": 0.8546433378196501,
"eval_loss": 0.5604321956634521,
"eval_precision_macro": 0.8740578351654877,
"eval_precision_micro": 0.8546433378196501,
"eval_recall_macro": 0.8516149068431341,
"eval_recall_micro": 0.8546433378196501,
"eval_runtime": 1.8574,
"eval_samples_per_second": 400.02,
"eval_steps_per_second": 50.07,
"step": 17500
},
{
"epoch": 1.95971692977681,
"grad_norm": 1.2037925720214844,
"learning_rate": 4.837073281713624e-05,
"loss": 0.4396,
"step": 18000
},
{
"epoch": 1.95971692977681,
"eval_accuracy": 0.8775235531628532,
"eval_f1_macro": 0.8658710069776413,
"eval_f1_micro": 0.8775235531628532,
"eval_loss": 0.48805707693099976,
"eval_precision_macro": 0.8837020041596116,
"eval_precision_micro": 0.8775235531628532,
"eval_recall_macro": 0.8730744219457011,
"eval_recall_micro": 0.8775235531628532,
"eval_runtime": 1.8629,
"eval_samples_per_second": 398.833,
"eval_steps_per_second": 49.921,
"step": 18000
},
{
"epoch": 2.014153511159499,
"grad_norm": 12.86486530303955,
"learning_rate": 4.650386669229917e-05,
"loss": 0.4597,
"step": 18500
},
{
"epoch": 2.014153511159499,
"eval_accuracy": 0.8627187079407806,
"eval_f1_macro": 0.8452518141932259,
"eval_f1_micro": 0.8627187079407806,
"eval_loss": 0.5245405435562134,
"eval_precision_macro": 0.8537422462412639,
"eval_precision_micro": 0.8627187079407806,
"eval_recall_macro": 0.8721048137269747,
"eval_recall_micro": 0.8627187079407806,
"eval_runtime": 1.8529,
"eval_samples_per_second": 400.987,
"eval_steps_per_second": 50.191,
"step": 18500
},
{
"epoch": 2.0685900925421885,
"grad_norm": 10.016274452209473,
"learning_rate": 4.4622325963559036e-05,
"loss": 0.3677,
"step": 19000
},
{
"epoch": 2.0685900925421885,
"eval_accuracy": 0.8654104979811574,
"eval_f1_macro": 0.8583287093369075,
"eval_f1_micro": 0.8654104979811574,
"eval_loss": 0.5013412237167358,
"eval_precision_macro": 0.8746413088312892,
"eval_precision_micro": 0.8654104979811574,
"eval_recall_macro": 0.866876974721781,
"eval_recall_micro": 0.8654104979811574,
"eval_runtime": 1.8541,
"eval_samples_per_second": 400.738,
"eval_steps_per_second": 50.16,
"step": 19000
},
{
"epoch": 2.1230266739248775,
"grad_norm": 19.003820419311523,
"learning_rate": 4.273035593086245e-05,
"loss": 0.369,
"step": 19500
},
{
"epoch": 2.1230266739248775,
"eval_accuracy": 0.8882907133243607,
"eval_f1_macro": 0.8885969627149977,
"eval_f1_micro": 0.8882907133243607,
"eval_loss": 0.4140874445438385,
"eval_precision_macro": 0.8916130548225267,
"eval_precision_micro": 0.8882907133243607,
"eval_recall_macro": 0.896922330586075,
"eval_recall_micro": 0.8882907133243607,
"eval_runtime": 1.8568,
"eval_samples_per_second": 400.147,
"eval_steps_per_second": 50.086,
"step": 19500
},
{
"epoch": 2.1774632553075666,
"grad_norm": 26.37122917175293,
"learning_rate": 4.083222542568154e-05,
"loss": 0.3277,
"step": 20000
},
{
"epoch": 2.1774632553075666,
"eval_accuracy": 0.8842530282637954,
"eval_f1_macro": 0.8779604260015075,
"eval_f1_micro": 0.8842530282637954,
"eval_loss": 0.3947836756706238,
"eval_precision_macro": 0.8984211698195097,
"eval_precision_micro": 0.8842530282637954,
"eval_recall_macro": 0.8918217883558299,
"eval_recall_micro": 0.8842530282637954,
"eval_runtime": 1.8579,
"eval_samples_per_second": 399.911,
"eval_steps_per_second": 50.056,
"step": 20000
},
{
"epoch": 2.231899836690256,
"grad_norm": 2.8656764030456543,
"learning_rate": 3.8932217179295604e-05,
"loss": 0.388,
"step": 20500
},
{
"epoch": 2.231899836690256,
"eval_accuracy": 0.8882907133243607,
"eval_f1_macro": 0.8938725509467977,
"eval_f1_micro": 0.8882907133243607,
"eval_loss": 0.3332942724227905,
"eval_precision_macro": 0.9195771417290953,
"eval_precision_micro": 0.8882907133243607,
"eval_recall_macro": 0.9004462055748975,
"eval_recall_micro": 0.8882907133243607,
"eval_runtime": 1.8625,
"eval_samples_per_second": 398.92,
"eval_steps_per_second": 49.932,
"step": 20500
},
{
"epoch": 2.286336418072945,
"grad_norm": 26.717533111572266,
"learning_rate": 3.703461815971118e-05,
"loss": 0.3548,
"step": 21000
},
{
"epoch": 2.286336418072945,
"eval_accuracy": 0.8748317631224765,
"eval_f1_macro": 0.8615806030654037,
"eval_f1_micro": 0.8748317631224765,
"eval_loss": 0.47363847494125366,
"eval_precision_macro": 0.8836872736431415,
"eval_precision_micro": 0.8748317631224765,
"eval_recall_macro": 0.8741894628222147,
"eval_recall_micro": 0.8748317631224765,
"eval_runtime": 1.8583,
"eval_samples_per_second": 399.823,
"eval_steps_per_second": 50.045,
"step": 21000
},
{
"epoch": 2.340772999455634,
"grad_norm": 0.045266564935445786,
"learning_rate": 3.514370989902237e-05,
"loss": 0.324,
"step": 21500
},
{
"epoch": 2.340772999455634,
"eval_accuracy": 0.882907133243607,
"eval_f1_macro": 0.8804977813603039,
"eval_f1_micro": 0.882907133243607,
"eval_loss": 0.46873775124549866,
"eval_precision_macro": 0.9098135492410423,
"eval_precision_micro": 0.882907133243607,
"eval_recall_macro": 0.8910827794607792,
"eval_recall_micro": 0.882907133243607,
"eval_runtime": 1.8568,
"eval_samples_per_second": 400.161,
"eval_steps_per_second": 50.087,
"step": 21500
},
{
"epoch": 2.3952095808383236,
"grad_norm": 0.44797322154045105,
"learning_rate": 3.326750497548324e-05,
"loss": 0.3437,
"step": 22000
},
{
"epoch": 2.3952095808383236,
"eval_accuracy": 0.8667563930013459,
"eval_f1_macro": 0.8578652908353367,
"eval_f1_micro": 0.8667563930013459,
"eval_loss": 0.49417996406555176,
"eval_precision_macro": 0.8898066142345816,
"eval_precision_micro": 0.8667563930013459,
"eval_recall_macro": 0.8782685148482547,
"eval_recall_micro": 0.8667563930013459,
"eval_runtime": 1.8548,
"eval_samples_per_second": 400.577,
"eval_steps_per_second": 50.14,
"step": 22000
},
{
"epoch": 2.4496461622210126,
"grad_norm": 1.1798264980316162,
"learning_rate": 3.140271820756983e-05,
"loss": 0.3446,
"step": 22500
},
{
"epoch": 2.4496461622210126,
"eval_accuracy": 0.8815612382234186,
"eval_f1_macro": 0.88010849529162,
"eval_f1_micro": 0.8815612382234186,
"eval_loss": 0.33866673707962036,
"eval_precision_macro": 0.8910364680325128,
"eval_precision_micro": 0.8815612382234186,
"eval_recall_macro": 0.8881100002593636,
"eval_recall_micro": 0.8815612382234186,
"eval_runtime": 1.8562,
"eval_samples_per_second": 400.289,
"eval_steps_per_second": 50.104,
"step": 22500
},
{
"epoch": 2.5040827436037016,
"grad_norm": 0.09330004453659058,
"learning_rate": 2.955732939335316e-05,
"loss": 0.3087,
"step": 23000
},
{
"epoch": 2.5040827436037016,
"eval_accuracy": 0.8748317631224765,
"eval_f1_macro": 0.8598800603795531,
"eval_f1_micro": 0.8748317631224765,
"eval_loss": 0.43970754742622375,
"eval_precision_macro": 0.8698171750911001,
"eval_precision_micro": 0.8748317631224765,
"eval_recall_macro": 0.8839881377308216,
"eval_recall_micro": 0.8748317631224765,
"eval_runtime": 1.8812,
"eval_samples_per_second": 394.958,
"eval_steps_per_second": 49.436,
"step": 23000
},
{
"epoch": 2.558519324986391,
"grad_norm": 0.7801256775856018,
"learning_rate": 2.773550226360711e-05,
"loss": 0.2843,
"step": 23500
},
{
"epoch": 2.558519324986391,
"eval_accuracy": 0.8950201884253028,
"eval_f1_macro": 0.8940959979263783,
"eval_f1_micro": 0.8950201884253028,
"eval_loss": 0.32003289461135864,
"eval_precision_macro": 0.9068783310964942,
"eval_precision_micro": 0.8950201884253028,
"eval_recall_macro": 0.9091191353836482,
"eval_recall_micro": 0.8950201884253028,
"eval_runtime": 1.8582,
"eval_samples_per_second": 399.845,
"eval_steps_per_second": 50.048,
"step": 23500
},
{
"epoch": 2.61295590636908,
"grad_norm": 0.025174345821142197,
"learning_rate": 2.5941347387132282e-05,
"loss": 0.2754,
"step": 24000
},
{
"epoch": 2.61295590636908,
"eval_accuracy": 0.8909825033647375,
"eval_f1_macro": 0.8836633698575453,
"eval_f1_micro": 0.8909825033647375,
"eval_loss": 0.34719252586364746,
"eval_precision_macro": 0.9204848261037936,
"eval_precision_micro": 0.8909825033647375,
"eval_recall_macro": 0.8991668767364871,
"eval_recall_micro": 0.8909825033647375,
"eval_runtime": 1.8603,
"eval_samples_per_second": 399.388,
"eval_steps_per_second": 49.991,
"step": 24000
},
{
"epoch": 2.667392487751769,
"grad_norm": 167.9337158203125,
"learning_rate": 2.417891289612432e-05,
"loss": 0.2513,
"step": 24500
},
{
"epoch": 2.667392487751769,
"eval_accuracy": 0.882907133243607,
"eval_f1_macro": 0.8739179478771129,
"eval_f1_micro": 0.882907133243607,
"eval_loss": 0.4664686620235443,
"eval_precision_macro": 0.8920238552855676,
"eval_precision_micro": 0.882907133243607,
"eval_recall_macro": 0.880791254873124,
"eval_recall_micro": 0.882907133243607,
"eval_runtime": 1.8556,
"eval_samples_per_second": 400.418,
"eval_steps_per_second": 50.12,
"step": 24500
},
{
"epoch": 2.721829069134458,
"grad_norm": 74.27070617675781,
"learning_rate": 2.2452175352417002e-05,
"loss": 0.2604,
"step": 25000
},
{
"epoch": 2.721829069134458,
"eval_accuracy": 0.901749663526245,
"eval_f1_macro": 0.9011896656482543,
"eval_f1_micro": 0.901749663526245,
"eval_loss": 0.28674057126045227,
"eval_precision_macro": 0.9178731490342212,
"eval_precision_micro": 0.901749663526245,
"eval_recall_macro": 0.9081905054221786,
"eval_recall_micro": 0.901749663526245,
"eval_runtime": 1.8585,
"eval_samples_per_second": 399.783,
"eval_steps_per_second": 50.04,
"step": 25000
},
{
"epoch": 2.7762656505171477,
"grad_norm": 0.4500181972980499,
"learning_rate": 2.0765030775208945e-05,
"loss": 0.2841,
"step": 25500
},
{
"epoch": 2.7762656505171477,
"eval_accuracy": 0.8882907133243607,
"eval_f1_macro": 0.885935664958505,
"eval_f1_micro": 0.8882907133243607,
"eval_loss": 0.3305457532405853,
"eval_precision_macro": 0.9096755197882269,
"eval_precision_micro": 0.8882907133243607,
"eval_recall_macro": 0.8987215294675002,
"eval_recall_micro": 0.8882907133243607,
"eval_runtime": 1.8563,
"eval_samples_per_second": 400.268,
"eval_steps_per_second": 50.101,
"step": 25500
},
{
"epoch": 2.8307022318998367,
"grad_norm": 0.9561833143234253,
"learning_rate": 1.9124527545270138e-05,
"loss": 0.2722,
"step": 26000
},
{
"epoch": 2.8307022318998367,
"eval_accuracy": 0.8882907133243607,
"eval_f1_macro": 0.8906563450031565,
"eval_f1_micro": 0.8882907133243607,
"eval_loss": 0.38373151421546936,
"eval_precision_macro": 0.9167678769615956,
"eval_precision_micro": 0.8882907133243607,
"eval_recall_macro": 0.9029594325171784,
"eval_recall_micro": 0.8882907133243607,
"eval_runtime": 1.8579,
"eval_samples_per_second": 399.917,
"eval_steps_per_second": 50.057,
"step": 26000
},
{
"epoch": 2.8851388132825257,
"grad_norm": 1.9382160902023315,
"learning_rate": 1.7527793188046586e-05,
"loss": 0.2342,
"step": 26500
},
{
"epoch": 2.8851388132825257,
"eval_accuracy": 0.8936742934051144,
"eval_f1_macro": 0.8938891552134242,
"eval_f1_micro": 0.8936742934051144,
"eval_loss": 0.3442441523075104,
"eval_precision_macro": 0.9054172318747689,
"eval_precision_micro": 0.8936742934051144,
"eval_recall_macro": 0.9022807262990895,
"eval_recall_micro": 0.8936742934051144,
"eval_runtime": 1.8553,
"eval_samples_per_second": 400.483,
"eval_steps_per_second": 50.128,
"step": 26500
},
{
"epoch": 2.939575394665215,
"grad_norm": 38.32719802856445,
"learning_rate": 1.5981762627409745e-05,
"loss": 0.2352,
"step": 27000
},
{
"epoch": 2.939575394665215,
"eval_accuracy": 0.9071332436069987,
"eval_f1_macro": 0.9052958318083129,
"eval_f1_micro": 0.9071332436069987,
"eval_loss": 0.29468008875846863,
"eval_precision_macro": 0.9313363962062918,
"eval_precision_micro": 0.9071332436069987,
"eval_recall_macro": 0.9120686908810327,
"eval_recall_micro": 0.9071332436069987,
"eval_runtime": 1.8593,
"eval_samples_per_second": 399.621,
"eval_steps_per_second": 50.02,
"step": 27000
},
{
"epoch": 2.9940119760479043,
"grad_norm": 19.57600212097168,
"learning_rate": 1.4489924155351557e-05,
"loss": 0.208,
"step": 27500
},
{
"epoch": 2.9940119760479043,
"eval_accuracy": 0.892328398384926,
"eval_f1_macro": 0.8896964859840816,
"eval_f1_micro": 0.892328398384926,
"eval_loss": 0.3449091613292694,
"eval_precision_macro": 0.9079271995760917,
"eval_precision_micro": 0.892328398384926,
"eval_recall_macro": 0.897032931021147,
"eval_recall_micro": 0.892328398384926,
"eval_runtime": 1.8601,
"eval_samples_per_second": 399.439,
"eval_steps_per_second": 49.997,
"step": 27500
},
{
"epoch": 3.0484485574305933,
"grad_norm": 11.636988639831543,
"learning_rate": 1.3055643790842023e-05,
"loss": 0.1509,
"step": 28000
},
{
"epoch": 3.0484485574305933,
"eval_accuracy": 0.8936742934051144,
"eval_f1_macro": 0.8961635552963756,
"eval_f1_micro": 0.8936742934051144,
"eval_loss": 0.3678928017616272,
"eval_precision_macro": 0.9294481598993097,
"eval_precision_micro": 0.8936742934051144,
"eval_recall_macro": 0.9003474444262722,
"eval_recall_micro": 0.8936742934051144,
"eval_runtime": 1.8558,
"eval_samples_per_second": 400.371,
"eval_steps_per_second": 50.114,
"step": 28000
},
{
"epoch": 3.1028851388132823,
"grad_norm": 1.134954571723938,
"learning_rate": 1.1682157685117184e-05,
"loss": 0.1225,
"step": 28500
},
{
"epoch": 3.1028851388132823,
"eval_accuracy": 0.901749663526245,
"eval_f1_macro": 0.9055763390711317,
"eval_f1_micro": 0.901749663526245,
"eval_loss": 0.3573816120624542,
"eval_precision_macro": 0.9179910366820196,
"eval_precision_micro": 0.901749663526245,
"eval_recall_macro": 0.9057635732164224,
"eval_recall_micro": 0.901749663526245,
"eval_runtime": 1.8615,
"eval_samples_per_second": 399.14,
"eval_steps_per_second": 49.96,
"step": 28500
},
{
"epoch": 3.157321720195972,
"grad_norm": 1.2862955331802368,
"learning_rate": 1.0372564819986089e-05,
"loss": 0.2055,
"step": 29000
},
{
"epoch": 3.157321720195972,
"eval_accuracy": 0.9084791386271871,
"eval_f1_macro": 0.9056021054670421,
"eval_f1_micro": 0.9084791386271871,
"eval_loss": 0.2638719081878662,
"eval_precision_macro": 0.920340264797567,
"eval_precision_micro": 0.9084791386271871,
"eval_recall_macro": 0.9112520674648165,
"eval_recall_micro": 0.9084791386271871,
"eval_runtime": 1.8612,
"eval_samples_per_second": 399.215,
"eval_steps_per_second": 49.969,
"step": 29000
},
{
"epoch": 3.211758301578661,
"grad_norm": 0.8544738292694092,
"learning_rate": 9.132236901396202e-06,
"loss": 0.1473,
"step": 29500
},
{
"epoch": 3.211758301578661,
"eval_accuracy": 0.9125168236877523,
"eval_f1_macro": 0.9146584946117865,
"eval_f1_micro": 0.9125168236877523,
"eval_loss": 0.289911687374115,
"eval_precision_macro": 0.9314970028408138,
"eval_precision_micro": 0.9125168236877523,
"eval_recall_macro": 0.9216769513362958,
"eval_recall_micro": 0.9125168236877523,
"eval_runtime": 1.8605,
"eval_samples_per_second": 399.346,
"eval_steps_per_second": 49.985,
"step": 29500
},
{
"epoch": 3.26619488296135,
"grad_norm": 0.008034386672079563,
"learning_rate": 7.959002150395973e-06,
"loss": 0.1462,
"step": 30000
},
{
"epoch": 3.26619488296135,
"eval_accuracy": 0.8936742934051144,
"eval_f1_macro": 0.8925380101110163,
"eval_f1_micro": 0.8936742934051144,
"eval_loss": 0.3166608214378357,
"eval_precision_macro": 0.9160115249258518,
"eval_precision_micro": 0.8936742934051144,
"eval_recall_macro": 0.9081516293872548,
"eval_recall_micro": 0.8936742934051144,
"eval_runtime": 1.8602,
"eval_samples_per_second": 399.413,
"eval_steps_per_second": 49.994,
"step": 30000
},
{
"epoch": 3.3206314643440393,
"grad_norm": 35.23937225341797,
"learning_rate": 6.858061155456108e-06,
"loss": 0.1595,
"step": 30500
},
{
"epoch": 3.3206314643440393,
"eval_accuracy": 0.8963660834454913,
"eval_f1_macro": 0.8987254233226418,
"eval_f1_micro": 0.8963660834454913,
"eval_loss": 0.27786874771118164,
"eval_precision_macro": 0.9217076655624952,
"eval_precision_micro": 0.8963660834454913,
"eval_recall_macro": 0.9094003908764269,
"eval_recall_micro": 0.8963660834454913,
"eval_runtime": 1.8584,
"eval_samples_per_second": 399.805,
"eval_steps_per_second": 50.043,
"step": 30500
},
{
"epoch": 3.3750680457267284,
"grad_norm": 0.8242081999778748,
"learning_rate": 5.831897957807116e-06,
"loss": 0.1305,
"step": 31000
},
{
"epoch": 3.3750680457267284,
"eval_accuracy": 0.9057873485868102,
"eval_f1_macro": 0.9093215110275342,
"eval_f1_micro": 0.9057873485868102,
"eval_loss": 0.2698396146297455,
"eval_precision_macro": 0.9278207951585322,
"eval_precision_micro": 0.9057873485868102,
"eval_recall_macro": 0.9239273588962285,
"eval_recall_micro": 0.9057873485868102,
"eval_runtime": 1.8582,
"eval_samples_per_second": 399.849,
"eval_steps_per_second": 50.048,
"step": 31000
},
{
"epoch": 3.4295046271094174,
"grad_norm": 0.0026783442590385675,
"learning_rate": 4.882827878346214e-06,
"loss": 0.136,
"step": 31500
},
{
"epoch": 3.4295046271094174,
"eval_accuracy": 0.9084791386271871,
"eval_f1_macro": 0.917688655658605,
"eval_f1_micro": 0.9084791386271871,
"eval_loss": 0.246324360370636,
"eval_precision_macro": 0.9175595026972325,
"eval_precision_micro": 0.9084791386271871,
"eval_recall_macro": 0.9186921936508842,
"eval_recall_micro": 0.9084791386271871,
"eval_runtime": 1.8567,
"eval_samples_per_second": 400.174,
"eval_steps_per_second": 50.089,
"step": 31500
},
{
"epoch": 3.483941208492107,
"grad_norm": 16.742229461669922,
"learning_rate": 4.012992293603897e-06,
"loss": 0.143,
"step": 32000
},
{
"epoch": 3.483941208492107,
"eval_accuracy": 0.9044414535666218,
"eval_f1_macro": 0.9082864223964277,
"eval_f1_micro": 0.9044414535666218,
"eval_loss": 0.2518066465854645,
"eval_precision_macro": 0.9316502423702806,
"eval_precision_micro": 0.9044414535666218,
"eval_recall_macro": 0.9226607536872533,
"eval_recall_micro": 0.9044414535666218,
"eval_runtime": 1.8555,
"eval_samples_per_second": 400.439,
"eval_steps_per_second": 50.122,
"step": 32000
},
{
"epoch": 3.538377789874796,
"grad_norm": 0.739364504814148,
"learning_rate": 3.224353804179279e-06,
"loss": 0.1098,
"step": 32500
},
{
"epoch": 3.538377789874796,
"eval_accuracy": 0.9084791386271871,
"eval_f1_macro": 0.9186506026518713,
"eval_f1_micro": 0.9084791386271871,
"eval_loss": 0.22374330461025238,
"eval_precision_macro": 0.9389000055745785,
"eval_precision_micro": 0.9084791386271871,
"eval_recall_macro": 0.9288568809947483,
"eval_recall_micro": 0.9084791386271871,
"eval_runtime": 1.8609,
"eval_samples_per_second": 399.265,
"eval_steps_per_second": 49.975,
"step": 32500
},
{
"epoch": 3.592814371257485,
"grad_norm": 3.698509693145752,
"learning_rate": 2.518691806545661e-06,
"loss": 0.1372,
"step": 33000
},
{
"epoch": 3.592814371257485,
"eval_accuracy": 0.9138627187079408,
"eval_f1_macro": 0.9127719353333948,
"eval_f1_micro": 0.9138627187079408,
"eval_loss": 0.2564922869205475,
"eval_precision_macro": 0.9313759089420434,
"eval_precision_micro": 0.9138627187079408,
"eval_recall_macro": 0.9216642726414609,
"eval_recall_micro": 0.9138627187079408,
"eval_runtime": 1.868,
"eval_samples_per_second": 397.756,
"eval_steps_per_second": 49.786,
"step": 33000
},
{
"epoch": 3.6472509526401744,
"grad_norm": 1.8072404861450195,
"learning_rate": 1.897598478217515e-06,
"loss": 0.1578,
"step": 33500
},
{
"epoch": 3.6472509526401744,
"eval_accuracy": 0.9152086137281292,
"eval_f1_macro": 0.9164100071127091,
"eval_f1_micro": 0.9152086137281292,
"eval_loss": 0.23528015613555908,
"eval_precision_macro": 0.9262783920839164,
"eval_precision_micro": 0.9152086137281292,
"eval_recall_macro": 0.9252885945686548,
"eval_recall_micro": 0.9152086137281292,
"eval_runtime": 1.8737,
"eval_samples_per_second": 396.537,
"eval_steps_per_second": 49.634,
"step": 33500
},
{
"epoch": 3.7016875340228634,
"grad_norm": 1.3797998428344727,
"learning_rate": 1.3624751853376838e-06,
"loss": 0.1181,
"step": 34000
},
{
"epoch": 3.7016875340228634,
"eval_accuracy": 0.9071332436069987,
"eval_f1_macro": 0.9105156456724851,
"eval_f1_micro": 0.9071332436069987,
"eval_loss": 0.24153241515159607,
"eval_precision_macro": 0.9329517679306777,
"eval_precision_micro": 0.9071332436069987,
"eval_recall_macro": 0.9218709083111013,
"eval_recall_micro": 0.9071332436069987,
"eval_runtime": 1.8682,
"eval_samples_per_second": 397.7,
"eval_steps_per_second": 49.779,
"step": 34000
},
{
"epoch": 3.7561241154055525,
"grad_norm": 2.415684223175049,
"learning_rate": 9.153375046762813e-07,
"loss": 0.1255,
"step": 34500
},
{
"epoch": 3.7561241154055525,
"eval_accuracy": 0.9044414535666218,
"eval_f1_macro": 0.9081620884976839,
"eval_f1_micro": 0.9044414535666218,
"eval_loss": 0.22819305956363678,
"eval_precision_macro": 0.9305656056604823,
"eval_precision_micro": 0.9044414535666218,
"eval_recall_macro": 0.920745058359063,
"eval_recall_micro": 0.9044414535666218,
"eval_runtime": 1.866,
"eval_samples_per_second": 398.172,
"eval_steps_per_second": 49.839,
"step": 34500
},
{
"epoch": 3.810560696788242,
"grad_norm": 1.141237735748291,
"learning_rate": 5.560337978008257e-07,
"loss": 0.1248,
"step": 35000
},
{
"epoch": 3.810560696788242,
"eval_accuracy": 0.9057873485868102,
"eval_f1_macro": 0.9094758744091115,
"eval_f1_micro": 0.9057873485868102,
"eval_loss": 0.2376009076833725,
"eval_precision_macro": 0.9331901460056251,
"eval_precision_micro": 0.9057873485868102,
"eval_recall_macro": 0.9209645994598771,
"eval_recall_micro": 0.9057873485868102,
"eval_runtime": 1.8789,
"eval_samples_per_second": 395.437,
"eval_steps_per_second": 49.496,
"step": 35000
},
{
"epoch": 3.864997278170931,
"grad_norm": 3.701718330383301,
"learning_rate": 2.849185475191707e-07,
"loss": 0.122,
"step": 35500
},
{
"epoch": 3.864997278170931,
"eval_accuracy": 0.9071332436069987,
"eval_f1_macro": 0.9142001714018019,
"eval_f1_micro": 0.9071332436069987,
"eval_loss": 0.23746204376220703,
"eval_precision_macro": 0.9387460280320955,
"eval_precision_micro": 0.9071332436069987,
"eval_recall_macro": 0.9249589603621328,
"eval_recall_micro": 0.9071332436069987,
"eval_runtime": 1.8692,
"eval_samples_per_second": 397.504,
"eval_steps_per_second": 49.755,
"step": 35500
},
{
"epoch": 3.91943385955362,
"grad_norm": 14.987931251525879,
"learning_rate": 1.0341200492882675e-07,
"loss": 0.1295,
"step": 36000
},
{
"epoch": 3.91943385955362,
"eval_accuracy": 0.9071332436069987,
"eval_f1_macro": 0.9128662206220384,
"eval_f1_micro": 0.9071332436069987,
"eval_loss": 0.2367352843284607,
"eval_precision_macro": 0.9370515623670415,
"eval_precision_micro": 0.9071332436069987,
"eval_recall_macro": 0.9239407899360675,
"eval_recall_micro": 0.9071332436069987,
"eval_runtime": 1.8706,
"eval_samples_per_second": 397.195,
"eval_steps_per_second": 49.716,
"step": 36000
},
{
"epoch": 3.973870440936309,
"grad_norm": 2.1749684810638428,
"learning_rate": 1.1923701273950372e-08,
"loss": 0.136,
"step": 36500
},
{
"epoch": 3.973870440936309,
"eval_accuracy": 0.9084791386271871,
"eval_f1_macro": 0.9156857889588481,
"eval_f1_micro": 0.9084791386271871,
"eval_loss": 0.23688668012619019,
"eval_precision_macro": 0.9402261655416446,
"eval_precision_micro": 0.9084791386271871,
"eval_recall_macro": 0.926447055600228,
"eval_recall_micro": 0.9084791386271871,
"eval_runtime": 1.8706,
"eval_samples_per_second": 397.207,
"eval_steps_per_second": 49.718,
"step": 36500
}
],
"logging_steps": 500,
"max_steps": 36740,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 7.684074511496294e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}