| { |
| "best_metric": 0.9186506026518713, |
| "best_model_checkpoint": "./arabert_author_model_full/checkpoint-32500", |
| "epoch": 3.973870440936309, |
| "eval_steps": 500, |
| "global_step": 36500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05443658138268917, |
| "grad_norm": 7.748462200164795, |
| "learning_rate": 1.0821992378878607e-05, |
| "loss": 3.0232, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.05443658138268917, |
| "eval_accuracy": 0.13458950201884254, |
| "eval_f1_macro": 0.06564828888070251, |
| "eval_f1_micro": 0.13458950201884254, |
| "eval_loss": 2.790053606033325, |
| "eval_precision_macro": 0.06930701192581881, |
| "eval_precision_micro": 0.13458950201884254, |
| "eval_recall_macro": 0.14436342078225026, |
| "eval_recall_micro": 0.13458950201884254, |
| "eval_runtime": 1.8577, |
| "eval_samples_per_second": 399.963, |
| "eval_steps_per_second": 50.063, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.10887316276537834, |
| "grad_norm": 41.86913299560547, |
| "learning_rate": 2.170930865541644e-05, |
| "loss": 2.2125, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.10887316276537834, |
| "eval_accuracy": 0.5733512786002691, |
| "eval_f1_macro": 0.3820533588398374, |
| "eval_f1_micro": 0.5733512786002691, |
| "eval_loss": 1.659558892250061, |
| "eval_precision_macro": 0.3659038715437873, |
| "eval_precision_micro": 0.5733512786002691, |
| "eval_recall_macro": 0.45853990573746223, |
| "eval_recall_micro": 0.5733512786002691, |
| "eval_runtime": 1.8553, |
| "eval_samples_per_second": 400.47, |
| "eval_steps_per_second": 50.126, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.1633097441480675, |
| "grad_norm": 14.793054580688477, |
| "learning_rate": 3.25748502994012e-05, |
| "loss": 1.6587, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.1633097441480675, |
| "eval_accuracy": 0.5989232839838493, |
| "eval_f1_macro": 0.4808864838162928, |
| "eval_f1_micro": 0.5989232839838493, |
| "eval_loss": 1.5170563459396362, |
| "eval_precision_macro": 0.6274791755154383, |
| "eval_precision_micro": 0.5989232839838493, |
| "eval_recall_macro": 0.49222247701556004, |
| "eval_recall_micro": 0.5989232839838493, |
| "eval_runtime": 1.8561, |
| "eval_samples_per_second": 400.305, |
| "eval_steps_per_second": 50.105, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.21774632553075668, |
| "grad_norm": 26.568124771118164, |
| "learning_rate": 4.344039194338596e-05, |
| "loss": 1.3366, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.21774632553075668, |
| "eval_accuracy": 0.6150740242261103, |
| "eval_f1_macro": 0.5392686072327233, |
| "eval_f1_micro": 0.6150740242261103, |
| "eval_loss": 1.3329455852508545, |
| "eval_precision_macro": 0.6295802748584037, |
| "eval_precision_micro": 0.6150740242261103, |
| "eval_recall_macro": 0.5725953292113953, |
| "eval_recall_micro": 0.6150740242261103, |
| "eval_runtime": 1.8573, |
| "eval_samples_per_second": 400.047, |
| "eval_steps_per_second": 50.073, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.2721829069134458, |
| "grad_norm": 27.614654541015625, |
| "learning_rate": 5.432770821992379e-05, |
| "loss": 1.3134, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.2721829069134458, |
| "eval_accuracy": 0.6917900403768507, |
| "eval_f1_macro": 0.5839516207825546, |
| "eval_f1_micro": 0.6917900403768507, |
| "eval_loss": 1.2291903495788574, |
| "eval_precision_macro": 0.6531006523438395, |
| "eval_precision_micro": 0.6917900403768507, |
| "eval_recall_macro": 0.6020036925958046, |
| "eval_recall_micro": 0.6917900403768507, |
| "eval_runtime": 1.8554, |
| "eval_samples_per_second": 400.461, |
| "eval_steps_per_second": 50.125, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.326619488296135, |
| "grad_norm": 26.257598876953125, |
| "learning_rate": 6.521502449646163e-05, |
| "loss": 1.2263, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.326619488296135, |
| "eval_accuracy": 0.6958277254374159, |
| "eval_f1_macro": 0.5987092681766552, |
| "eval_f1_micro": 0.6958277254374159, |
| "eval_loss": 1.247532606124878, |
| "eval_precision_macro": 0.691355222631176, |
| "eval_precision_micro": 0.6958277254374159, |
| "eval_recall_macro": 0.6326688053535838, |
| "eval_recall_micro": 0.6958277254374159, |
| "eval_runtime": 1.8557, |
| "eval_samples_per_second": 400.383, |
| "eval_steps_per_second": 50.115, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.38105606967882416, |
| "grad_norm": 20.401668548583984, |
| "learning_rate": 7.610234077299946e-05, |
| "loss": 1.2354, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.38105606967882416, |
| "eval_accuracy": 0.6648721399730821, |
| "eval_f1_macro": 0.5547791146400078, |
| "eval_f1_micro": 0.6648721399730821, |
| "eval_loss": 1.4817063808441162, |
| "eval_precision_macro": 0.609418529255712, |
| "eval_precision_micro": 0.6648721399730821, |
| "eval_recall_macro": 0.5953309879820717, |
| "eval_recall_micro": 0.6648721399730821, |
| "eval_runtime": 1.8585, |
| "eval_samples_per_second": 399.778, |
| "eval_steps_per_second": 50.039, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.43549265106151336, |
| "grad_norm": 12.415190696716309, |
| "learning_rate": 7.998139872043199e-05, |
| "loss": 1.2221, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.43549265106151336, |
| "eval_accuracy": 0.6621803499327052, |
| "eval_f1_macro": 0.5842490840574637, |
| "eval_f1_micro": 0.6621803499327052, |
| "eval_loss": 1.2946059703826904, |
| "eval_precision_macro": 0.6743659102946021, |
| "eval_precision_micro": 0.6621803499327052, |
| "eval_recall_macro": 0.6174956983289226, |
| "eval_recall_micro": 0.6621803499327052, |
| "eval_runtime": 1.8549, |
| "eval_samples_per_second": 400.563, |
| "eval_steps_per_second": 50.138, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.4899292324442025, |
| "grad_norm": 45.67084503173828, |
| "learning_rate": 7.987866825500063e-05, |
| "loss": 1.203, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.4899292324442025, |
| "eval_accuracy": 0.6850605652759085, |
| "eval_f1_macro": 0.5971314943399703, |
| "eval_f1_micro": 0.6850605652759085, |
| "eval_loss": 1.2994459867477417, |
| "eval_precision_macro": 0.683861588771308, |
| "eval_precision_micro": 0.6850605652759085, |
| "eval_recall_macro": 0.6242521591611176, |
| "eval_recall_micro": 0.6850605652759085, |
| "eval_runtime": 1.8624, |
| "eval_samples_per_second": 398.953, |
| "eval_steps_per_second": 49.936, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.5443658138268916, |
| "grad_norm": 5.911087989807129, |
| "learning_rate": 7.968584427636286e-05, |
| "loss": 1.0458, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.5443658138268916, |
| "eval_accuracy": 0.7577388963660835, |
| "eval_f1_macro": 0.6834938073457237, |
| "eval_f1_micro": 0.7577388963660835, |
| "eval_loss": 0.9275029897689819, |
| "eval_precision_macro": 0.6852672080248814, |
| "eval_precision_micro": 0.7577388963660835, |
| "eval_recall_macro": 0.738924762321289, |
| "eval_recall_micro": 0.7577388963660835, |
| "eval_runtime": 1.8549, |
| "eval_samples_per_second": 400.555, |
| "eval_steps_per_second": 50.137, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.5988023952095808, |
| "grad_norm": 48.83789825439453, |
| "learning_rate": 7.940413126469914e-05, |
| "loss": 1.1084, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.5988023952095808, |
| "eval_accuracy": 0.7133243606998654, |
| "eval_f1_macro": 0.6529664892159153, |
| "eval_f1_micro": 0.7133243606998654, |
| "eval_loss": 1.1383692026138306, |
| "eval_precision_macro": 0.711194884206057, |
| "eval_precision_micro": 0.7133243606998654, |
| "eval_recall_macro": 0.6790887107741691, |
| "eval_recall_micro": 0.7133243606998654, |
| "eval_runtime": 1.8565, |
| "eval_samples_per_second": 400.21, |
| "eval_steps_per_second": 50.094, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.65323897659227, |
| "grad_norm": 59.57648849487305, |
| "learning_rate": 7.903303594852863e-05, |
| "loss": 1.0011, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.65323897659227, |
| "eval_accuracy": 0.7644683714670256, |
| "eval_f1_macro": 0.7124643316894146, |
| "eval_f1_micro": 0.7644683714670256, |
| "eval_loss": 0.9443386197090149, |
| "eval_precision_macro": 0.743414646590002, |
| "eval_precision_micro": 0.7644683714670256, |
| "eval_recall_macro": 0.7314305522869405, |
| "eval_recall_micro": 0.7644683714670256, |
| "eval_runtime": 1.8599, |
| "eval_samples_per_second": 399.493, |
| "eval_steps_per_second": 50.004, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.7076755579749592, |
| "grad_norm": 5.128486156463623, |
| "learning_rate": 7.857387081654268e-05, |
| "loss": 1.0064, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.7076755579749592, |
| "eval_accuracy": 0.7954239569313594, |
| "eval_f1_macro": 0.7642500691509388, |
| "eval_f1_micro": 0.7954239569313594, |
| "eval_loss": 0.6533617377281189, |
| "eval_precision_macro": 0.7635597495236878, |
| "eval_precision_micro": 0.7954239569313594, |
| "eval_recall_macro": 0.793458207807885, |
| "eval_recall_micro": 0.7954239569313594, |
| "eval_runtime": 1.8647, |
| "eval_samples_per_second": 398.464, |
| "eval_steps_per_second": 49.875, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.7621121393576483, |
| "grad_norm": 5.791709899902344, |
| "learning_rate": 7.80276718780503e-05, |
| "loss": 0.9092, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.7621121393576483, |
| "eval_accuracy": 0.784656796769852, |
| "eval_f1_macro": 0.7376954018021566, |
| "eval_f1_micro": 0.784656796769852, |
| "eval_loss": 0.8241704702377319, |
| "eval_precision_macro": 0.751883868032923, |
| "eval_precision_micro": 0.784656796769852, |
| "eval_recall_macro": 0.7632956885406171, |
| "eval_recall_micro": 0.784656796769852, |
| "eval_runtime": 1.8548, |
| "eval_samples_per_second": 400.574, |
| "eval_steps_per_second": 50.139, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.8165487207403375, |
| "grad_norm": 77.25128936767578, |
| "learning_rate": 7.739702023046593e-05, |
| "loss": 0.8999, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.8165487207403375, |
| "eval_accuracy": 0.7671601615074024, |
| "eval_f1_macro": 0.708723750703533, |
| "eval_f1_micro": 0.7671601615074024, |
| "eval_loss": 0.9454395174980164, |
| "eval_precision_macro": 0.7607959054064888, |
| "eval_precision_micro": 0.7671601615074024, |
| "eval_recall_macro": 0.7088710160869984, |
| "eval_recall_micro": 0.7671601615074024, |
| "eval_runtime": 1.8564, |
| "eval_samples_per_second": 400.238, |
| "eval_steps_per_second": 50.097, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.8709853021230267, |
| "grad_norm": 15.056076049804688, |
| "learning_rate": 7.668081162066953e-05, |
| "loss": 0.9202, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.8709853021230267, |
| "eval_accuracy": 0.784656796769852, |
| "eval_f1_macro": 0.7368074575740363, |
| "eval_f1_micro": 0.784656796769852, |
| "eval_loss": 0.9090538620948792, |
| "eval_precision_macro": 0.7702538361499491, |
| "eval_precision_micro": 0.784656796769852, |
| "eval_recall_macro": 0.7483242810004438, |
| "eval_recall_micro": 0.784656796769852, |
| "eval_runtime": 1.8578, |
| "eval_samples_per_second": 399.93, |
| "eval_steps_per_second": 50.059, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.9254218835057159, |
| "grad_norm": 64.78616333007812, |
| "learning_rate": 7.588184049330584e-05, |
| "loss": 0.8391, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.9254218835057159, |
| "eval_accuracy": 0.8021534320323015, |
| "eval_f1_macro": 0.7623929352981407, |
| "eval_f1_micro": 0.8021534320323015, |
| "eval_loss": 0.7761164903640747, |
| "eval_precision_macro": 0.7946649104240797, |
| "eval_precision_micro": 0.8021534320323015, |
| "eval_recall_macro": 0.7814529031598654, |
| "eval_recall_micro": 0.8021534320323015, |
| "eval_runtime": 1.8709, |
| "eval_samples_per_second": 397.135, |
| "eval_steps_per_second": 49.709, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.979858464888405, |
| "grad_norm": 9.414610862731934, |
| "learning_rate": 7.500374892838819e-05, |
| "loss": 0.7599, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.979858464888405, |
| "eval_accuracy": 0.7900403768506057, |
| "eval_f1_macro": 0.7371605784112564, |
| "eval_f1_micro": 0.7900403768506057, |
| "eval_loss": 0.9420120716094971, |
| "eval_precision_macro": 0.7952387110277418, |
| "eval_precision_micro": 0.7900403768506057, |
| "eval_recall_macro": 0.7764252549275673, |
| "eval_recall_micro": 0.7900403768506057, |
| "eval_runtime": 1.8556, |
| "eval_samples_per_second": 400.41, |
| "eval_steps_per_second": 50.119, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.0342950462710943, |
| "grad_norm": 119.14054107666016, |
| "learning_rate": 7.404499941253026e-05, |
| "loss": 0.7595, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.0342950462710943, |
| "eval_accuracy": 0.8021534320323015, |
| "eval_f1_macro": 0.7648820230071746, |
| "eval_f1_micro": 0.8021534320323015, |
| "eval_loss": 0.7322187423706055, |
| "eval_precision_macro": 0.8020003432831997, |
| "eval_precision_micro": 0.8021534320323015, |
| "eval_recall_macro": 0.7894509757341642, |
| "eval_recall_micro": 0.8021534320323015, |
| "eval_runtime": 1.866, |
| "eval_samples_per_second": 398.178, |
| "eval_steps_per_second": 49.839, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.0887316276537833, |
| "grad_norm": 1.1056867837905884, |
| "learning_rate": 7.300943453361286e-05, |
| "loss": 0.7356, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.0887316276537833, |
| "eval_accuracy": 0.784656796769852, |
| "eval_f1_macro": 0.7282785512433163, |
| "eval_f1_micro": 0.784656796769852, |
| "eval_loss": 0.987204372882843, |
| "eval_precision_macro": 0.7431660072967824, |
| "eval_precision_micro": 0.784656796769852, |
| "eval_recall_macro": 0.759777569867129, |
| "eval_recall_micro": 0.784656796769852, |
| "eval_runtime": 1.86, |
| "eval_samples_per_second": 399.455, |
| "eval_steps_per_second": 49.999, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.1431682090364725, |
| "grad_norm": 0.7999147772789001, |
| "learning_rate": 7.189939082544474e-05, |
| "loss": 0.7319, |
| "step": 10500 |
| }, |
| { |
| "epoch": 1.1431682090364725, |
| "eval_accuracy": 0.8223418573351279, |
| "eval_f1_macro": 0.7710850473325811, |
| "eval_f1_micro": 0.8223418573351279, |
| "eval_loss": 0.8551661968231201, |
| "eval_precision_macro": 0.8068470518533863, |
| "eval_precision_micro": 0.8223418573351279, |
| "eval_recall_macro": 0.7781452760567589, |
| "eval_recall_micro": 0.8223418573351279, |
| "eval_runtime": 1.8597, |
| "eval_samples_per_second": 399.524, |
| "eval_steps_per_second": 50.008, |
| "step": 10500 |
| }, |
| { |
| "epoch": 1.1976047904191618, |
| "grad_norm": 75.80143737792969, |
| "learning_rate": 7.071737286760956e-05, |
| "loss": 0.6937, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.1976047904191618, |
| "eval_accuracy": 0.8358008075370121, |
| "eval_f1_macro": 0.8105729169252525, |
| "eval_f1_micro": 0.8358008075370121, |
| "eval_loss": 0.6681444644927979, |
| "eval_precision_macro": 0.8251281216307259, |
| "eval_precision_micro": 0.8358008075370121, |
| "eval_recall_macro": 0.8299367132530477, |
| "eval_recall_micro": 0.8358008075370121, |
| "eval_runtime": 1.858, |
| "eval_samples_per_second": 399.893, |
| "eval_steps_per_second": 50.054, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.2520413718018508, |
| "grad_norm": 170.24810791015625, |
| "learning_rate": 6.946604763441019e-05, |
| "loss": 0.6245, |
| "step": 11500 |
| }, |
| { |
| "epoch": 1.2520413718018508, |
| "eval_accuracy": 0.8304172274562585, |
| "eval_f1_macro": 0.8042113065648884, |
| "eval_f1_micro": 0.8304172274562585, |
| "eval_loss": 0.6982793211936951, |
| "eval_precision_macro": 0.8149073185463646, |
| "eval_precision_micro": 0.8304172274562585, |
| "eval_recall_macro": 0.825200491276461, |
| "eval_recall_micro": 0.8304172274562585, |
| "eval_runtime": 1.858, |
| "eval_samples_per_second": 399.886, |
| "eval_steps_per_second": 50.053, |
| "step": 11500 |
| }, |
| { |
| "epoch": 1.30647795318454, |
| "grad_norm": 4.239190101623535, |
| "learning_rate": 6.814823847740345e-05, |
| "loss": 0.7154, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.30647795318454, |
| "eval_accuracy": 0.8344549125168237, |
| "eval_f1_macro": 0.8175929788783902, |
| "eval_f1_micro": 0.8344549125168237, |
| "eval_loss": 0.5690922737121582, |
| "eval_precision_macro": 0.8293167371438036, |
| "eval_precision_micro": 0.8344549125168237, |
| "eval_recall_macro": 0.8434059311064652, |
| "eval_recall_micro": 0.8344549125168237, |
| "eval_runtime": 1.8586, |
| "eval_samples_per_second": 399.774, |
| "eval_steps_per_second": 50.039, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.360914534567229, |
| "grad_norm": 108.4642105102539, |
| "learning_rate": 6.676691875510264e-05, |
| "loss": 0.6284, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.360914534567229, |
| "eval_accuracy": 0.8492597577388964, |
| "eval_f1_macro": 0.8301394566864201, |
| "eval_f1_micro": 0.8492597577388964, |
| "eval_loss": 0.5634124279022217, |
| "eval_precision_macro": 0.8533406425457963, |
| "eval_precision_micro": 0.8492597577388964, |
| "eval_recall_macro": 0.8328288148155981, |
| "eval_recall_micro": 0.8492597577388964, |
| "eval_runtime": 1.857, |
| "eval_samples_per_second": 400.114, |
| "eval_steps_per_second": 50.082, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.4153511159499184, |
| "grad_norm": 8.67512035369873, |
| "learning_rate": 6.532520512422097e-05, |
| "loss": 0.5809, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.4153511159499184, |
| "eval_accuracy": 0.8277254374158816, |
| "eval_f1_macro": 0.8053122261939523, |
| "eval_f1_micro": 0.8277254374158816, |
| "eval_loss": 0.6742271780967712, |
| "eval_precision_macro": 0.8234783735489098, |
| "eval_precision_micro": 0.8277254374158816, |
| "eval_recall_macro": 0.8227076028577776, |
| "eval_recall_micro": 0.8277254374158816, |
| "eval_runtime": 1.8544, |
| "eval_samples_per_second": 400.672, |
| "eval_steps_per_second": 50.151, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.4697876973326074, |
| "grad_norm": 42.6486701965332, |
| "learning_rate": 6.382940301643648e-05, |
| "loss": 0.5704, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.4697876973326074, |
| "eval_accuracy": 0.8371467025572006, |
| "eval_f1_macro": 0.8196023040766853, |
| "eval_f1_micro": 0.8371467025572006, |
| "eval_loss": 0.5477439761161804, |
| "eval_precision_macro": 0.8154114142235019, |
| "eval_precision_micro": 0.8371467025572006, |
| "eval_recall_macro": 0.8396350173820816, |
| "eval_recall_micro": 0.8371467025572006, |
| "eval_runtime": 1.8546, |
| "eval_samples_per_second": 400.63, |
| "eval_steps_per_second": 50.146, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.5242242787152966, |
| "grad_norm": 1.2607247829437256, |
| "learning_rate": 6.227689332474957e-05, |
| "loss": 0.6075, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.5242242787152966, |
| "eval_accuracy": 0.7967698519515478, |
| "eval_f1_macro": 0.7677841123247988, |
| "eval_f1_micro": 0.7967698519515478, |
| "eval_loss": 1.0041254758834839, |
| "eval_precision_macro": 0.8114776737598274, |
| "eval_precision_micro": 0.7967698519515478, |
| "eval_recall_macro": 0.791907454908191, |
| "eval_recall_micro": 0.7967698519515478, |
| "eval_runtime": 1.8546, |
| "eval_samples_per_second": 400.624, |
| "eval_steps_per_second": 50.145, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.578660860097986, |
| "grad_norm": 4.931788921356201, |
| "learning_rate": 6.067412052017954e-05, |
| "loss": 0.5828, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.578660860097986, |
| "eval_accuracy": 0.847913862718708, |
| "eval_f1_macro": 0.8302058376279866, |
| "eval_f1_micro": 0.847913862718708, |
| "eval_loss": 0.5582941770553589, |
| "eval_precision_macro": 0.8601936850928309, |
| "eval_precision_micro": 0.847913862718708, |
| "eval_recall_macro": 0.8408390815357188, |
| "eval_recall_micro": 0.847913862718708, |
| "eval_runtime": 1.8669, |
| "eval_samples_per_second": 397.979, |
| "eval_steps_per_second": 49.814, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.633097441480675, |
| "grad_norm": 0.7160111665725708, |
| "learning_rate": 5.902470092162814e-05, |
| "loss": 0.547, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.633097441480675, |
| "eval_accuracy": 0.8277254374158816, |
| "eval_f1_macro": 0.824218625219791, |
| "eval_f1_micro": 0.8277254374158816, |
| "eval_loss": 0.5830731391906738, |
| "eval_precision_macro": 0.836821915854616, |
| "eval_precision_micro": 0.8277254374158816, |
| "eval_recall_macro": 0.8384285472623679, |
| "eval_recall_micro": 0.8277254374158816, |
| "eval_runtime": 1.8619, |
| "eval_samples_per_second": 399.049, |
| "eval_steps_per_second": 49.948, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.6875340228633642, |
| "grad_norm": 226.5104522705078, |
| "learning_rate": 5.733235609665259e-05, |
| "loss": 0.532, |
| "step": 15500 |
| }, |
| { |
| "epoch": 1.6875340228633642, |
| "eval_accuracy": 0.8721399730820996, |
| "eval_f1_macro": 0.855165540533041, |
| "eval_f1_micro": 0.8721399730820996, |
| "eval_loss": 0.5274588465690613, |
| "eval_precision_macro": 0.8790730850974743, |
| "eval_precision_micro": 0.8721399730820996, |
| "eval_recall_macro": 0.8560697826831901, |
| "eval_recall_micro": 0.8721399730820996, |
| "eval_runtime": 1.8558, |
| "eval_samples_per_second": 400.36, |
| "eval_steps_per_second": 50.112, |
| "step": 15500 |
| }, |
| { |
| "epoch": 1.7419706042460534, |
| "grad_norm": 1.0774835348129272, |
| "learning_rate": 5.560440381357954e-05, |
| "loss": 0.5079, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.7419706042460534, |
| "eval_accuracy": 0.8627187079407806, |
| "eval_f1_macro": 0.8615733124700616, |
| "eval_f1_micro": 0.8627187079407806, |
| "eval_loss": 0.4832541048526764, |
| "eval_precision_macro": 0.8823512344628659, |
| "eval_precision_micro": 0.8627187079407806, |
| "eval_recall_macro": 0.8731612062059807, |
| "eval_recall_micro": 0.8627187079407806, |
| "eval_runtime": 1.8662, |
| "eval_samples_per_second": 398.145, |
| "eval_steps_per_second": 49.835, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.7964071856287425, |
| "grad_norm": 21.044397354125977, |
| "learning_rate": 5.383781847704708e-05, |
| "loss": 0.5054, |
| "step": 16500 |
| }, |
| { |
| "epoch": 1.7964071856287425, |
| "eval_accuracy": 0.8627187079407806, |
| "eval_f1_macro": 0.8472535983855487, |
| "eval_f1_micro": 0.8627187079407806, |
| "eval_loss": 0.44899773597717285, |
| "eval_precision_macro": 0.8666042870317389, |
| "eval_precision_micro": 0.8627187079407806, |
| "eval_recall_macro": 0.8504937606032208, |
| "eval_recall_micro": 0.8627187079407806, |
| "eval_runtime": 1.8535, |
| "eval_samples_per_second": 400.859, |
| "eval_steps_per_second": 50.175, |
| "step": 16500 |
| }, |
| { |
| "epoch": 1.8508437670114317, |
| "grad_norm": 15.335619926452637, |
| "learning_rate": 5.204001102071416e-05, |
| "loss": 0.5458, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.8508437670114317, |
| "eval_accuracy": 0.8465679676985195, |
| "eval_f1_macro": 0.8361761359674401, |
| "eval_f1_micro": 0.8465679676985195, |
| "eval_loss": 0.5616449117660522, |
| "eval_precision_macro": 0.8476921577900376, |
| "eval_precision_micro": 0.8465679676985195, |
| "eval_recall_macro": 0.8576485467382537, |
| "eval_recall_micro": 0.8465679676985195, |
| "eval_runtime": 1.8595, |
| "eval_samples_per_second": 399.578, |
| "eval_steps_per_second": 50.015, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.905280348394121, |
| "grad_norm": 7.975017547607422, |
| "learning_rate": 5.02150378180527e-05, |
| "loss": 0.4398, |
| "step": 17500 |
| }, |
| { |
| "epoch": 1.905280348394121, |
| "eval_accuracy": 0.8546433378196501, |
| "eval_f1_macro": 0.8438383161438845, |
| "eval_f1_micro": 0.8546433378196501, |
| "eval_loss": 0.5604321956634521, |
| "eval_precision_macro": 0.8740578351654877, |
| "eval_precision_micro": 0.8546433378196501, |
| "eval_recall_macro": 0.8516149068431341, |
| "eval_recall_micro": 0.8546433378196501, |
| "eval_runtime": 1.8574, |
| "eval_samples_per_second": 400.02, |
| "eval_steps_per_second": 50.07, |
| "step": 17500 |
| }, |
| { |
| "epoch": 1.95971692977681, |
| "grad_norm": 1.2037925720214844, |
| "learning_rate": 4.837073281713624e-05, |
| "loss": 0.4396, |
| "step": 18000 |
| }, |
| { |
| "epoch": 1.95971692977681, |
| "eval_accuracy": 0.8775235531628532, |
| "eval_f1_macro": 0.8658710069776413, |
| "eval_f1_micro": 0.8775235531628532, |
| "eval_loss": 0.48805707693099976, |
| "eval_precision_macro": 0.8837020041596116, |
| "eval_precision_micro": 0.8775235531628532, |
| "eval_recall_macro": 0.8730744219457011, |
| "eval_recall_micro": 0.8775235531628532, |
| "eval_runtime": 1.8629, |
| "eval_samples_per_second": 398.833, |
| "eval_steps_per_second": 49.921, |
| "step": 18000 |
| }, |
| { |
| "epoch": 2.014153511159499, |
| "grad_norm": 12.86486530303955, |
| "learning_rate": 4.650386669229917e-05, |
| "loss": 0.4597, |
| "step": 18500 |
| }, |
| { |
| "epoch": 2.014153511159499, |
| "eval_accuracy": 0.8627187079407806, |
| "eval_f1_macro": 0.8452518141932259, |
| "eval_f1_micro": 0.8627187079407806, |
| "eval_loss": 0.5245405435562134, |
| "eval_precision_macro": 0.8537422462412639, |
| "eval_precision_micro": 0.8627187079407806, |
| "eval_recall_macro": 0.8721048137269747, |
| "eval_recall_micro": 0.8627187079407806, |
| "eval_runtime": 1.8529, |
| "eval_samples_per_second": 400.987, |
| "eval_steps_per_second": 50.191, |
| "step": 18500 |
| }, |
| { |
| "epoch": 2.0685900925421885, |
| "grad_norm": 10.016274452209473, |
| "learning_rate": 4.4622325963559036e-05, |
| "loss": 0.3677, |
| "step": 19000 |
| }, |
| { |
| "epoch": 2.0685900925421885, |
| "eval_accuracy": 0.8654104979811574, |
| "eval_f1_macro": 0.8583287093369075, |
| "eval_f1_micro": 0.8654104979811574, |
| "eval_loss": 0.5013412237167358, |
| "eval_precision_macro": 0.8746413088312892, |
| "eval_precision_micro": 0.8654104979811574, |
| "eval_recall_macro": 0.866876974721781, |
| "eval_recall_micro": 0.8654104979811574, |
| "eval_runtime": 1.8541, |
| "eval_samples_per_second": 400.738, |
| "eval_steps_per_second": 50.16, |
| "step": 19000 |
| }, |
| { |
| "epoch": 2.1230266739248775, |
| "grad_norm": 19.003820419311523, |
| "learning_rate": 4.273035593086245e-05, |
| "loss": 0.369, |
| "step": 19500 |
| }, |
| { |
| "epoch": 2.1230266739248775, |
| "eval_accuracy": 0.8882907133243607, |
| "eval_f1_macro": 0.8885969627149977, |
| "eval_f1_micro": 0.8882907133243607, |
| "eval_loss": 0.4140874445438385, |
| "eval_precision_macro": 0.8916130548225267, |
| "eval_precision_micro": 0.8882907133243607, |
| "eval_recall_macro": 0.896922330586075, |
| "eval_recall_micro": 0.8882907133243607, |
| "eval_runtime": 1.8568, |
| "eval_samples_per_second": 400.147, |
| "eval_steps_per_second": 50.086, |
| "step": 19500 |
| }, |
| { |
| "epoch": 2.1774632553075666, |
| "grad_norm": 26.37122917175293, |
| "learning_rate": 4.083222542568154e-05, |
| "loss": 0.3277, |
| "step": 20000 |
| }, |
| { |
| "epoch": 2.1774632553075666, |
| "eval_accuracy": 0.8842530282637954, |
| "eval_f1_macro": 0.8779604260015075, |
| "eval_f1_micro": 0.8842530282637954, |
| "eval_loss": 0.3947836756706238, |
| "eval_precision_macro": 0.8984211698195097, |
| "eval_precision_micro": 0.8842530282637954, |
| "eval_recall_macro": 0.8918217883558299, |
| "eval_recall_micro": 0.8842530282637954, |
| "eval_runtime": 1.8579, |
| "eval_samples_per_second": 399.911, |
| "eval_steps_per_second": 50.056, |
| "step": 20000 |
| }, |
| { |
| "epoch": 2.231899836690256, |
| "grad_norm": 2.8656764030456543, |
| "learning_rate": 3.8932217179295604e-05, |
| "loss": 0.388, |
| "step": 20500 |
| }, |
| { |
| "epoch": 2.231899836690256, |
| "eval_accuracy": 0.8882907133243607, |
| "eval_f1_macro": 0.8938725509467977, |
| "eval_f1_micro": 0.8882907133243607, |
| "eval_loss": 0.3332942724227905, |
| "eval_precision_macro": 0.9195771417290953, |
| "eval_precision_micro": 0.8882907133243607, |
| "eval_recall_macro": 0.9004462055748975, |
| "eval_recall_micro": 0.8882907133243607, |
| "eval_runtime": 1.8625, |
| "eval_samples_per_second": 398.92, |
| "eval_steps_per_second": 49.932, |
| "step": 20500 |
| }, |
| { |
| "epoch": 2.286336418072945, |
| "grad_norm": 26.717533111572266, |
| "learning_rate": 3.703461815971118e-05, |
| "loss": 0.3548, |
| "step": 21000 |
| }, |
| { |
| "epoch": 2.286336418072945, |
| "eval_accuracy": 0.8748317631224765, |
| "eval_f1_macro": 0.8615806030654037, |
| "eval_f1_micro": 0.8748317631224765, |
| "eval_loss": 0.47363847494125366, |
| "eval_precision_macro": 0.8836872736431415, |
| "eval_precision_micro": 0.8748317631224765, |
| "eval_recall_macro": 0.8741894628222147, |
| "eval_recall_micro": 0.8748317631224765, |
| "eval_runtime": 1.8583, |
| "eval_samples_per_second": 399.823, |
| "eval_steps_per_second": 50.045, |
| "step": 21000 |
| }, |
| { |
| "epoch": 2.340772999455634, |
| "grad_norm": 0.045266564935445786, |
| "learning_rate": 3.514370989902237e-05, |
| "loss": 0.324, |
| "step": 21500 |
| }, |
| { |
| "epoch": 2.340772999455634, |
| "eval_accuracy": 0.882907133243607, |
| "eval_f1_macro": 0.8804977813603039, |
| "eval_f1_micro": 0.882907133243607, |
| "eval_loss": 0.46873775124549866, |
| "eval_precision_macro": 0.9098135492410423, |
| "eval_precision_micro": 0.882907133243607, |
| "eval_recall_macro": 0.8910827794607792, |
| "eval_recall_micro": 0.882907133243607, |
| "eval_runtime": 1.8568, |
| "eval_samples_per_second": 400.161, |
| "eval_steps_per_second": 50.087, |
| "step": 21500 |
| }, |
| { |
| "epoch": 2.3952095808383236, |
| "grad_norm": 0.44797322154045105, |
| "learning_rate": 3.326750497548324e-05, |
| "loss": 0.3437, |
| "step": 22000 |
| }, |
| { |
| "epoch": 2.3952095808383236, |
| "eval_accuracy": 0.8667563930013459, |
| "eval_f1_macro": 0.8578652908353367, |
| "eval_f1_micro": 0.8667563930013459, |
| "eval_loss": 0.49417996406555176, |
| "eval_precision_macro": 0.8898066142345816, |
| "eval_precision_micro": 0.8667563930013459, |
| "eval_recall_macro": 0.8782685148482547, |
| "eval_recall_micro": 0.8667563930013459, |
| "eval_runtime": 1.8548, |
| "eval_samples_per_second": 400.577, |
| "eval_steps_per_second": 50.14, |
| "step": 22000 |
| }, |
| { |
| "epoch": 2.4496461622210126, |
| "grad_norm": 1.1798264980316162, |
| "learning_rate": 3.140271820756983e-05, |
| "loss": 0.3446, |
| "step": 22500 |
| }, |
| { |
| "epoch": 2.4496461622210126, |
| "eval_accuracy": 0.8815612382234186, |
| "eval_f1_macro": 0.88010849529162, |
| "eval_f1_micro": 0.8815612382234186, |
| "eval_loss": 0.33866673707962036, |
| "eval_precision_macro": 0.8910364680325128, |
| "eval_precision_micro": 0.8815612382234186, |
| "eval_recall_macro": 0.8881100002593636, |
| "eval_recall_micro": 0.8815612382234186, |
| "eval_runtime": 1.8562, |
| "eval_samples_per_second": 400.289, |
| "eval_steps_per_second": 50.104, |
| "step": 22500 |
| }, |
| { |
| "epoch": 2.5040827436037016, |
| "grad_norm": 0.09330004453659058, |
| "learning_rate": 2.955732939335316e-05, |
| "loss": 0.3087, |
| "step": 23000 |
| }, |
| { |
| "epoch": 2.5040827436037016, |
| "eval_accuracy": 0.8748317631224765, |
| "eval_f1_macro": 0.8598800603795531, |
| "eval_f1_micro": 0.8748317631224765, |
| "eval_loss": 0.43970754742622375, |
| "eval_precision_macro": 0.8698171750911001, |
| "eval_precision_micro": 0.8748317631224765, |
| "eval_recall_macro": 0.8839881377308216, |
| "eval_recall_micro": 0.8748317631224765, |
| "eval_runtime": 1.8812, |
| "eval_samples_per_second": 394.958, |
| "eval_steps_per_second": 49.436, |
| "step": 23000 |
| }, |
| { |
| "epoch": 2.558519324986391, |
| "grad_norm": 0.7801256775856018, |
| "learning_rate": 2.773550226360711e-05, |
| "loss": 0.2843, |
| "step": 23500 |
| }, |
| { |
| "epoch": 2.558519324986391, |
| "eval_accuracy": 0.8950201884253028, |
| "eval_f1_macro": 0.8940959979263783, |
| "eval_f1_micro": 0.8950201884253028, |
| "eval_loss": 0.32003289461135864, |
| "eval_precision_macro": 0.9068783310964942, |
| "eval_precision_micro": 0.8950201884253028, |
| "eval_recall_macro": 0.9091191353836482, |
| "eval_recall_micro": 0.8950201884253028, |
| "eval_runtime": 1.8582, |
| "eval_samples_per_second": 399.845, |
| "eval_steps_per_second": 50.048, |
| "step": 23500 |
| }, |
| { |
| "epoch": 2.61295590636908, |
| "grad_norm": 0.025174345821142197, |
| "learning_rate": 2.5941347387132282e-05, |
| "loss": 0.2754, |
| "step": 24000 |
| }, |
| { |
| "epoch": 2.61295590636908, |
| "eval_accuracy": 0.8909825033647375, |
| "eval_f1_macro": 0.8836633698575453, |
| "eval_f1_micro": 0.8909825033647375, |
| "eval_loss": 0.34719252586364746, |
| "eval_precision_macro": 0.9204848261037936, |
| "eval_precision_micro": 0.8909825033647375, |
| "eval_recall_macro": 0.8991668767364871, |
| "eval_recall_micro": 0.8909825033647375, |
| "eval_runtime": 1.8603, |
| "eval_samples_per_second": 399.388, |
| "eval_steps_per_second": 49.991, |
| "step": 24000 |
| }, |
| { |
| "epoch": 2.667392487751769, |
| "grad_norm": 167.9337158203125, |
| "learning_rate": 2.417891289612432e-05, |
| "loss": 0.2513, |
| "step": 24500 |
| }, |
| { |
| "epoch": 2.667392487751769, |
| "eval_accuracy": 0.882907133243607, |
| "eval_f1_macro": 0.8739179478771129, |
| "eval_f1_micro": 0.882907133243607, |
| "eval_loss": 0.4664686620235443, |
| "eval_precision_macro": 0.8920238552855676, |
| "eval_precision_micro": 0.882907133243607, |
| "eval_recall_macro": 0.880791254873124, |
| "eval_recall_micro": 0.882907133243607, |
| "eval_runtime": 1.8556, |
| "eval_samples_per_second": 400.418, |
| "eval_steps_per_second": 50.12, |
| "step": 24500 |
| }, |
| { |
| "epoch": 2.721829069134458, |
| "grad_norm": 74.27070617675781, |
| "learning_rate": 2.2452175352417002e-05, |
| "loss": 0.2604, |
| "step": 25000 |
| }, |
| { |
| "epoch": 2.721829069134458, |
| "eval_accuracy": 0.901749663526245, |
| "eval_f1_macro": 0.9011896656482543, |
| "eval_f1_micro": 0.901749663526245, |
| "eval_loss": 0.28674057126045227, |
| "eval_precision_macro": 0.9178731490342212, |
| "eval_precision_micro": 0.901749663526245, |
| "eval_recall_macro": 0.9081905054221786, |
| "eval_recall_micro": 0.901749663526245, |
| "eval_runtime": 1.8585, |
| "eval_samples_per_second": 399.783, |
| "eval_steps_per_second": 50.04, |
| "step": 25000 |
| }, |
| { |
| "epoch": 2.7762656505171477, |
| "grad_norm": 0.4500181972980499, |
| "learning_rate": 2.0765030775208945e-05, |
| "loss": 0.2841, |
| "step": 25500 |
| }, |
| { |
| "epoch": 2.7762656505171477, |
| "eval_accuracy": 0.8882907133243607, |
| "eval_f1_macro": 0.885935664958505, |
| "eval_f1_micro": 0.8882907133243607, |
| "eval_loss": 0.3305457532405853, |
| "eval_precision_macro": 0.9096755197882269, |
| "eval_precision_micro": 0.8882907133243607, |
| "eval_recall_macro": 0.8987215294675002, |
| "eval_recall_micro": 0.8882907133243607, |
| "eval_runtime": 1.8563, |
| "eval_samples_per_second": 400.268, |
| "eval_steps_per_second": 50.101, |
| "step": 25500 |
| }, |
| { |
| "epoch": 2.8307022318998367, |
| "grad_norm": 0.9561833143234253, |
| "learning_rate": 1.9124527545270138e-05, |
| "loss": 0.2722, |
| "step": 26000 |
| }, |
| { |
| "epoch": 2.8307022318998367, |
| "eval_accuracy": 0.8882907133243607, |
| "eval_f1_macro": 0.8906563450031565, |
| "eval_f1_micro": 0.8882907133243607, |
| "eval_loss": 0.38373151421546936, |
| "eval_precision_macro": 0.9167678769615956, |
| "eval_precision_micro": 0.8882907133243607, |
| "eval_recall_macro": 0.9029594325171784, |
| "eval_recall_micro": 0.8882907133243607, |
| "eval_runtime": 1.8579, |
| "eval_samples_per_second": 399.917, |
| "eval_steps_per_second": 50.057, |
| "step": 26000 |
| }, |
| { |
| "epoch": 2.8851388132825257, |
| "grad_norm": 1.9382160902023315, |
| "learning_rate": 1.7527793188046586e-05, |
| "loss": 0.2342, |
| "step": 26500 |
| }, |
| { |
| "epoch": 2.8851388132825257, |
| "eval_accuracy": 0.8936742934051144, |
| "eval_f1_macro": 0.8938891552134242, |
| "eval_f1_micro": 0.8936742934051144, |
| "eval_loss": 0.3442441523075104, |
| "eval_precision_macro": 0.9054172318747689, |
| "eval_precision_micro": 0.8936742934051144, |
| "eval_recall_macro": 0.9022807262990895, |
| "eval_recall_micro": 0.8936742934051144, |
| "eval_runtime": 1.8553, |
| "eval_samples_per_second": 400.483, |
| "eval_steps_per_second": 50.128, |
| "step": 26500 |
| }, |
| { |
| "epoch": 2.939575394665215, |
| "grad_norm": 38.32719802856445, |
| "learning_rate": 1.5981762627409745e-05, |
| "loss": 0.2352, |
| "step": 27000 |
| }, |
| { |
| "epoch": 2.939575394665215, |
| "eval_accuracy": 0.9071332436069987, |
| "eval_f1_macro": 0.9052958318083129, |
| "eval_f1_micro": 0.9071332436069987, |
| "eval_loss": 0.29468008875846863, |
| "eval_precision_macro": 0.9313363962062918, |
| "eval_precision_micro": 0.9071332436069987, |
| "eval_recall_macro": 0.9120686908810327, |
| "eval_recall_micro": 0.9071332436069987, |
| "eval_runtime": 1.8593, |
| "eval_samples_per_second": 399.621, |
| "eval_steps_per_second": 50.02, |
| "step": 27000 |
| }, |
| { |
| "epoch": 2.9940119760479043, |
| "grad_norm": 19.57600212097168, |
| "learning_rate": 1.4489924155351557e-05, |
| "loss": 0.208, |
| "step": 27500 |
| }, |
| { |
| "epoch": 2.9940119760479043, |
| "eval_accuracy": 0.892328398384926, |
| "eval_f1_macro": 0.8896964859840816, |
| "eval_f1_micro": 0.892328398384926, |
| "eval_loss": 0.3449091613292694, |
| "eval_precision_macro": 0.9079271995760917, |
| "eval_precision_micro": 0.892328398384926, |
| "eval_recall_macro": 0.897032931021147, |
| "eval_recall_micro": 0.892328398384926, |
| "eval_runtime": 1.8601, |
| "eval_samples_per_second": 399.439, |
| "eval_steps_per_second": 49.997, |
| "step": 27500 |
| }, |
| { |
| "epoch": 3.0484485574305933, |
| "grad_norm": 11.636988639831543, |
| "learning_rate": 1.3055643790842023e-05, |
| "loss": 0.1509, |
| "step": 28000 |
| }, |
| { |
| "epoch": 3.0484485574305933, |
| "eval_accuracy": 0.8936742934051144, |
| "eval_f1_macro": 0.8961635552963756, |
| "eval_f1_micro": 0.8936742934051144, |
| "eval_loss": 0.3678928017616272, |
| "eval_precision_macro": 0.9294481598993097, |
| "eval_precision_micro": 0.8936742934051144, |
| "eval_recall_macro": 0.9003474444262722, |
| "eval_recall_micro": 0.8936742934051144, |
| "eval_runtime": 1.8558, |
| "eval_samples_per_second": 400.371, |
| "eval_steps_per_second": 50.114, |
| "step": 28000 |
| }, |
| { |
| "epoch": 3.1028851388132823, |
| "grad_norm": 1.134954571723938, |
| "learning_rate": 1.1682157685117184e-05, |
| "loss": 0.1225, |
| "step": 28500 |
| }, |
| { |
| "epoch": 3.1028851388132823, |
| "eval_accuracy": 0.901749663526245, |
| "eval_f1_macro": 0.9055763390711317, |
| "eval_f1_micro": 0.901749663526245, |
| "eval_loss": 0.3573816120624542, |
| "eval_precision_macro": 0.9179910366820196, |
| "eval_precision_micro": 0.901749663526245, |
| "eval_recall_macro": 0.9057635732164224, |
| "eval_recall_micro": 0.901749663526245, |
| "eval_runtime": 1.8615, |
| "eval_samples_per_second": 399.14, |
| "eval_steps_per_second": 49.96, |
| "step": 28500 |
| }, |
| { |
| "epoch": 3.157321720195972, |
| "grad_norm": 1.2862955331802368, |
| "learning_rate": 1.0372564819986089e-05, |
| "loss": 0.2055, |
| "step": 29000 |
| }, |
| { |
| "epoch": 3.157321720195972, |
| "eval_accuracy": 0.9084791386271871, |
| "eval_f1_macro": 0.9056021054670421, |
| "eval_f1_micro": 0.9084791386271871, |
| "eval_loss": 0.2638719081878662, |
| "eval_precision_macro": 0.920340264797567, |
| "eval_precision_micro": 0.9084791386271871, |
| "eval_recall_macro": 0.9112520674648165, |
| "eval_recall_micro": 0.9084791386271871, |
| "eval_runtime": 1.8612, |
| "eval_samples_per_second": 399.215, |
| "eval_steps_per_second": 49.969, |
| "step": 29000 |
| }, |
| { |
| "epoch": 3.211758301578661, |
| "grad_norm": 0.8544738292694092, |
| "learning_rate": 9.132236901396202e-06, |
| "loss": 0.1473, |
| "step": 29500 |
| }, |
| { |
| "epoch": 3.211758301578661, |
| "eval_accuracy": 0.9125168236877523, |
| "eval_f1_macro": 0.9146584946117865, |
| "eval_f1_micro": 0.9125168236877523, |
| "eval_loss": 0.289911687374115, |
| "eval_precision_macro": 0.9314970028408138, |
| "eval_precision_micro": 0.9125168236877523, |
| "eval_recall_macro": 0.9216769513362958, |
| "eval_recall_micro": 0.9125168236877523, |
| "eval_runtime": 1.8605, |
| "eval_samples_per_second": 399.346, |
| "eval_steps_per_second": 49.985, |
| "step": 29500 |
| }, |
| { |
| "epoch": 3.26619488296135, |
| "grad_norm": 0.008034386672079563, |
| "learning_rate": 7.959002150395973e-06, |
| "loss": 0.1462, |
| "step": 30000 |
| }, |
| { |
| "epoch": 3.26619488296135, |
| "eval_accuracy": 0.8936742934051144, |
| "eval_f1_macro": 0.8925380101110163, |
| "eval_f1_micro": 0.8936742934051144, |
| "eval_loss": 0.3166608214378357, |
| "eval_precision_macro": 0.9160115249258518, |
| "eval_precision_micro": 0.8936742934051144, |
| "eval_recall_macro": 0.9081516293872548, |
| "eval_recall_micro": 0.8936742934051144, |
| "eval_runtime": 1.8602, |
| "eval_samples_per_second": 399.413, |
| "eval_steps_per_second": 49.994, |
| "step": 30000 |
| }, |
| { |
| "epoch": 3.3206314643440393, |
| "grad_norm": 35.23937225341797, |
| "learning_rate": 6.858061155456108e-06, |
| "loss": 0.1595, |
| "step": 30500 |
| }, |
| { |
| "epoch": 3.3206314643440393, |
| "eval_accuracy": 0.8963660834454913, |
| "eval_f1_macro": 0.8987254233226418, |
| "eval_f1_micro": 0.8963660834454913, |
| "eval_loss": 0.27786874771118164, |
| "eval_precision_macro": 0.9217076655624952, |
| "eval_precision_micro": 0.8963660834454913, |
| "eval_recall_macro": 0.9094003908764269, |
| "eval_recall_micro": 0.8963660834454913, |
| "eval_runtime": 1.8584, |
| "eval_samples_per_second": 399.805, |
| "eval_steps_per_second": 50.043, |
| "step": 30500 |
| }, |
| { |
| "epoch": 3.3750680457267284, |
| "grad_norm": 0.8242081999778748, |
| "learning_rate": 5.831897957807116e-06, |
| "loss": 0.1305, |
| "step": 31000 |
| }, |
| { |
| "epoch": 3.3750680457267284, |
| "eval_accuracy": 0.9057873485868102, |
| "eval_f1_macro": 0.9093215110275342, |
| "eval_f1_micro": 0.9057873485868102, |
| "eval_loss": 0.2698396146297455, |
| "eval_precision_macro": 0.9278207951585322, |
| "eval_precision_micro": 0.9057873485868102, |
| "eval_recall_macro": 0.9239273588962285, |
| "eval_recall_micro": 0.9057873485868102, |
| "eval_runtime": 1.8582, |
| "eval_samples_per_second": 399.849, |
| "eval_steps_per_second": 50.048, |
| "step": 31000 |
| }, |
| { |
| "epoch": 3.4295046271094174, |
| "grad_norm": 0.0026783442590385675, |
| "learning_rate": 4.882827878346214e-06, |
| "loss": 0.136, |
| "step": 31500 |
| }, |
| { |
| "epoch": 3.4295046271094174, |
| "eval_accuracy": 0.9084791386271871, |
| "eval_f1_macro": 0.917688655658605, |
| "eval_f1_micro": 0.9084791386271871, |
| "eval_loss": 0.246324360370636, |
| "eval_precision_macro": 0.9175595026972325, |
| "eval_precision_micro": 0.9084791386271871, |
| "eval_recall_macro": 0.9186921936508842, |
| "eval_recall_micro": 0.9084791386271871, |
| "eval_runtime": 1.8567, |
| "eval_samples_per_second": 400.174, |
| "eval_steps_per_second": 50.089, |
| "step": 31500 |
| }, |
| { |
| "epoch": 3.483941208492107, |
| "grad_norm": 16.742229461669922, |
| "learning_rate": 4.012992293603897e-06, |
| "loss": 0.143, |
| "step": 32000 |
| }, |
| { |
| "epoch": 3.483941208492107, |
| "eval_accuracy": 0.9044414535666218, |
| "eval_f1_macro": 0.9082864223964277, |
| "eval_f1_micro": 0.9044414535666218, |
| "eval_loss": 0.2518066465854645, |
| "eval_precision_macro": 0.9316502423702806, |
| "eval_precision_micro": 0.9044414535666218, |
| "eval_recall_macro": 0.9226607536872533, |
| "eval_recall_micro": 0.9044414535666218, |
| "eval_runtime": 1.8555, |
| "eval_samples_per_second": 400.439, |
| "eval_steps_per_second": 50.122, |
| "step": 32000 |
| }, |
| { |
| "epoch": 3.538377789874796, |
| "grad_norm": 0.739364504814148, |
| "learning_rate": 3.224353804179279e-06, |
| "loss": 0.1098, |
| "step": 32500 |
| }, |
| { |
| "epoch": 3.538377789874796, |
| "eval_accuracy": 0.9084791386271871, |
| "eval_f1_macro": 0.9186506026518713, |
| "eval_f1_micro": 0.9084791386271871, |
| "eval_loss": 0.22374330461025238, |
| "eval_precision_macro": 0.9389000055745785, |
| "eval_precision_micro": 0.9084791386271871, |
| "eval_recall_macro": 0.9288568809947483, |
| "eval_recall_micro": 0.9084791386271871, |
| "eval_runtime": 1.8609, |
| "eval_samples_per_second": 399.265, |
| "eval_steps_per_second": 49.975, |
| "step": 32500 |
| }, |
| { |
| "epoch": 3.592814371257485, |
| "grad_norm": 3.698509693145752, |
| "learning_rate": 2.518691806545661e-06, |
| "loss": 0.1372, |
| "step": 33000 |
| }, |
| { |
| "epoch": 3.592814371257485, |
| "eval_accuracy": 0.9138627187079408, |
| "eval_f1_macro": 0.9127719353333948, |
| "eval_f1_micro": 0.9138627187079408, |
| "eval_loss": 0.2564922869205475, |
| "eval_precision_macro": 0.9313759089420434, |
| "eval_precision_micro": 0.9138627187079408, |
| "eval_recall_macro": 0.9216642726414609, |
| "eval_recall_micro": 0.9138627187079408, |
| "eval_runtime": 1.868, |
| "eval_samples_per_second": 397.756, |
| "eval_steps_per_second": 49.786, |
| "step": 33000 |
| }, |
| { |
| "epoch": 3.6472509526401744, |
| "grad_norm": 1.8072404861450195, |
| "learning_rate": 1.897598478217515e-06, |
| "loss": 0.1578, |
| "step": 33500 |
| }, |
| { |
| "epoch": 3.6472509526401744, |
| "eval_accuracy": 0.9152086137281292, |
| "eval_f1_macro": 0.9164100071127091, |
| "eval_f1_micro": 0.9152086137281292, |
| "eval_loss": 0.23528015613555908, |
| "eval_precision_macro": 0.9262783920839164, |
| "eval_precision_micro": 0.9152086137281292, |
| "eval_recall_macro": 0.9252885945686548, |
| "eval_recall_micro": 0.9152086137281292, |
| "eval_runtime": 1.8737, |
| "eval_samples_per_second": 396.537, |
| "eval_steps_per_second": 49.634, |
| "step": 33500 |
| }, |
| { |
| "epoch": 3.7016875340228634, |
| "grad_norm": 1.3797998428344727, |
| "learning_rate": 1.3624751853376838e-06, |
| "loss": 0.1181, |
| "step": 34000 |
| }, |
| { |
| "epoch": 3.7016875340228634, |
| "eval_accuracy": 0.9071332436069987, |
| "eval_f1_macro": 0.9105156456724851, |
| "eval_f1_micro": 0.9071332436069987, |
| "eval_loss": 0.24153241515159607, |
| "eval_precision_macro": 0.9329517679306777, |
| "eval_precision_micro": 0.9071332436069987, |
| "eval_recall_macro": 0.9218709083111013, |
| "eval_recall_micro": 0.9071332436069987, |
| "eval_runtime": 1.8682, |
| "eval_samples_per_second": 397.7, |
| "eval_steps_per_second": 49.779, |
| "step": 34000 |
| }, |
| { |
| "epoch": 3.7561241154055525, |
| "grad_norm": 2.415684223175049, |
| "learning_rate": 9.153375046762813e-07, |
| "loss": 0.1255, |
| "step": 34500 |
| }, |
| { |
| "epoch": 3.7561241154055525, |
| "eval_accuracy": 0.9044414535666218, |
| "eval_f1_macro": 0.9081620884976839, |
| "eval_f1_micro": 0.9044414535666218, |
| "eval_loss": 0.22819305956363678, |
| "eval_precision_macro": 0.9305656056604823, |
| "eval_precision_micro": 0.9044414535666218, |
| "eval_recall_macro": 0.920745058359063, |
| "eval_recall_micro": 0.9044414535666218, |
| "eval_runtime": 1.866, |
| "eval_samples_per_second": 398.172, |
| "eval_steps_per_second": 49.839, |
| "step": 34500 |
| }, |
| { |
| "epoch": 3.810560696788242, |
| "grad_norm": 1.141237735748291, |
| "learning_rate": 5.560337978008257e-07, |
| "loss": 0.1248, |
| "step": 35000 |
| }, |
| { |
| "epoch": 3.810560696788242, |
| "eval_accuracy": 0.9057873485868102, |
| "eval_f1_macro": 0.9094758744091115, |
| "eval_f1_micro": 0.9057873485868102, |
| "eval_loss": 0.2376009076833725, |
| "eval_precision_macro": 0.9331901460056251, |
| "eval_precision_micro": 0.9057873485868102, |
| "eval_recall_macro": 0.9209645994598771, |
| "eval_recall_micro": 0.9057873485868102, |
| "eval_runtime": 1.8789, |
| "eval_samples_per_second": 395.437, |
| "eval_steps_per_second": 49.496, |
| "step": 35000 |
| }, |
| { |
| "epoch": 3.864997278170931, |
| "grad_norm": 3.701718330383301, |
| "learning_rate": 2.849185475191707e-07, |
| "loss": 0.122, |
| "step": 35500 |
| }, |
| { |
| "epoch": 3.864997278170931, |
| "eval_accuracy": 0.9071332436069987, |
| "eval_f1_macro": 0.9142001714018019, |
| "eval_f1_micro": 0.9071332436069987, |
| "eval_loss": 0.23746204376220703, |
| "eval_precision_macro": 0.9387460280320955, |
| "eval_precision_micro": 0.9071332436069987, |
| "eval_recall_macro": 0.9249589603621328, |
| "eval_recall_micro": 0.9071332436069987, |
| "eval_runtime": 1.8692, |
| "eval_samples_per_second": 397.504, |
| "eval_steps_per_second": 49.755, |
| "step": 35500 |
| }, |
| { |
| "epoch": 3.91943385955362, |
| "grad_norm": 14.987931251525879, |
| "learning_rate": 1.0341200492882675e-07, |
| "loss": 0.1295, |
| "step": 36000 |
| }, |
| { |
| "epoch": 3.91943385955362, |
| "eval_accuracy": 0.9071332436069987, |
| "eval_f1_macro": 0.9128662206220384, |
| "eval_f1_micro": 0.9071332436069987, |
| "eval_loss": 0.2367352843284607, |
| "eval_precision_macro": 0.9370515623670415, |
| "eval_precision_micro": 0.9071332436069987, |
| "eval_recall_macro": 0.9239407899360675, |
| "eval_recall_micro": 0.9071332436069987, |
| "eval_runtime": 1.8706, |
| "eval_samples_per_second": 397.195, |
| "eval_steps_per_second": 49.716, |
| "step": 36000 |
| }, |
| { |
| "epoch": 3.973870440936309, |
| "grad_norm": 2.1749684810638428, |
| "learning_rate": 1.1923701273950372e-08, |
| "loss": 0.136, |
| "step": 36500 |
| }, |
| { |
| "epoch": 3.973870440936309, |
| "eval_accuracy": 0.9084791386271871, |
| "eval_f1_macro": 0.9156857889588481, |
| "eval_f1_micro": 0.9084791386271871, |
| "eval_loss": 0.23688668012619019, |
| "eval_precision_macro": 0.9402261655416446, |
| "eval_precision_micro": 0.9084791386271871, |
| "eval_recall_macro": 0.926447055600228, |
| "eval_recall_micro": 0.9084791386271871, |
| "eval_runtime": 1.8706, |
| "eval_samples_per_second": 397.207, |
| "eval_steps_per_second": 49.718, |
| "step": 36500 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 36740, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.684074511496294e+16, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|