{ "best_global_step": 26604, "best_metric": 0.9163582001286115, "best_model_checkpoint": "output/indra_stmt_classifier/checkpoint-26604", "epoch": 10.0, "eval_steps": 500, "global_step": 29560, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 84.59678649902344, "learning_rate": 2.7001014884979704e-05, "loss": 2.2672, "step": 2956 }, { "epoch": 1.0, "eval_gate1_accuracy": 0.8585125680228038, "eval_gate1_f1": 0.8505829142307659, "eval_gate1_precision": 0.8473952560532426, "eval_gate1_recall": 0.854657238060827, "eval_gate1_support": 3859, "eval_gate2_accuracy": 0.7587457890645245, "eval_gate2_f1": 0.5592993246296689, "eval_gate2_precision": 0.6163758861756914, "eval_gate2_recall": 0.5543459088227541, "eval_gate2_support": 3859, "eval_gate3_accuracy": 0.8548596112311015, "eval_gate3_f1": 0.6971239789155547, "eval_gate3_precision": 0.700932514781479, "eval_gate3_recall": 0.7071816387030568, "eval_gate3_support": 6945, "eval_gate4_span_FN": 230, "eval_gate4_span_FP": 347, "eval_gate4_span_TP": 777, "eval_gate4_span_f1": 0.7292350958998283, "eval_gate4_span_precision": 0.6912811387838854, "eval_gate4_span_recall": 0.7715988083339463, "eval_loss": NaN, "eval_runtime": 10.4486, "eval_samples_per_second": 1131.439, "eval_steps_per_second": 35.411, "step": 2956 }, { "epoch": 2.0, "grad_norm": 88.20085144042969, "learning_rate": 2.4001014884979702e-05, "loss": 1.2241, "step": 5912 }, { "epoch": 2.0, "eval_gate1_accuracy": 0.8929774552993003, "eval_gate1_f1": 0.8820620404399862, "eval_gate1_precision": 0.8988718775181306, "eval_gate1_recall": 0.871612670119234, "eval_gate1_support": 3859, "eval_gate2_accuracy": 0.808240476807463, "eval_gate2_f1": 0.6881412582252169, "eval_gate2_precision": 0.6832538631102347, "eval_gate2_recall": 0.6996653263704344, "eval_gate2_support": 3859, "eval_gate3_accuracy": 0.8666666666666667, "eval_gate3_f1": 0.744109590199225, "eval_gate3_precision": 0.7653765174358127, "eval_gate3_recall": 0.7614317320508905, "eval_gate3_support": 6945, "eval_gate4_span_FN": 221, "eval_gate4_span_FP": 295, "eval_gate4_span_TP": 786, "eval_gate4_span_f1": 0.7528735582174595, "eval_gate4_span_precision": 0.7271045328332368, "eval_gate4_span_recall": 0.7805362462683164, "eval_loss": NaN, "eval_runtime": 10.4006, "eval_samples_per_second": 1136.67, "eval_steps_per_second": 35.575, "step": 5912 }, { "epoch": 3.0, "grad_norm": 67.10189056396484, "learning_rate": 2.10010148849797e-05, "loss": 0.8652, "step": 8868 }, { "epoch": 3.0, "eval_gate1_accuracy": 0.9059341798393367, "eval_gate1_f1": 0.8997563137620789, "eval_gate1_precision": 0.899055894281647, "eval_gate1_recall": 0.9004810366878294, "eval_gate1_support": 3859, "eval_gate2_accuracy": 0.8333765224151335, "eval_gate2_f1": 0.7799911701221585, "eval_gate2_precision": 0.8104194561625689, "eval_gate2_recall": 0.7600982965627961, "eval_gate2_support": 3859, "eval_gate3_accuracy": 0.8732901367890569, "eval_gate3_f1": 0.7904223821154268, "eval_gate3_precision": 0.8444284314763446, "eval_gate3_recall": 0.7593577482285429, "eval_gate3_support": 6945, "eval_gate4_span_FN": 236, "eval_gate4_span_FP": 228, "eval_gate4_span_TP": 771, "eval_gate4_span_f1": 0.7686939132376799, "eval_gate4_span_precision": 0.7717717717640463, "eval_gate4_span_recall": 0.7656405163776997, "eval_loss": NaN, "eval_runtime": 10.3684, "eval_samples_per_second": 1140.196, "eval_steps_per_second": 35.685, "step": 8868 }, { "epoch": 4.0, "grad_norm": 173.31228637695312, "learning_rate": 1.8001014884979702e-05, "loss": 0.6376, "step": 11824 }, { "epoch": 4.0, "eval_gate1_accuracy": 0.9077481212749416, "eval_gate1_f1": 0.9007457126723182, "eval_gate1_precision": 0.9041686699243279, "eval_gate1_recall": 0.8977536805400228, "eval_gate1_support": 3859, "eval_gate2_accuracy": 0.8419279606115574, "eval_gate2_f1": 0.7689803309009853, "eval_gate2_precision": 0.7719738299874924, "eval_gate2_recall": 0.7762247755795892, "eval_gate2_support": 3859, "eval_gate3_accuracy": 0.8780417566594673, "eval_gate3_f1": 0.8038529609303686, "eval_gate3_precision": 0.8238117797554888, "eval_gate3_recall": 0.7978850610103051, "eval_gate3_support": 6945, "eval_gate4_span_FN": 218, "eval_gate4_span_FP": 226, "eval_gate4_span_TP": 789, "eval_gate4_span_f1": 0.7804154252594212, "eval_gate4_span_precision": 0.777339901470174, "eval_gate4_span_recall": 0.7835153922464397, "eval_loss": NaN, "eval_runtime": 10.3445, "eval_samples_per_second": 1142.825, "eval_steps_per_second": 35.768, "step": 11824 }, { "epoch": 5.0, "grad_norm": 22.405181884765625, "learning_rate": 1.5001014884979702e-05, "loss": 0.4713, "step": 14780 }, { "epoch": 5.0, "eval_gate1_accuracy": 0.9124125421093547, "eval_gate1_f1": 0.905395262879988, "eval_gate1_precision": 0.9108288424356044, "eval_gate1_recall": 0.9009235044132069, "eval_gate1_support": 3859, "eval_gate2_accuracy": 0.8608447784400104, "eval_gate2_f1": 0.7846052784615595, "eval_gate2_precision": 0.7779462341191327, "eval_gate2_recall": 0.8035688815922226, "eval_gate2_support": 3859, "eval_gate3_accuracy": 0.8822174226061915, "eval_gate3_f1": 0.799720850034007, "eval_gate3_precision": 0.7922933108551248, "eval_gate3_recall": 0.8164975006789633, "eval_gate3_support": 6945, "eval_gate4_span_FN": 217, "eval_gate4_span_FP": 204, "eval_gate4_span_TP": 790, "eval_gate4_span_f1": 0.7896051923936183, "eval_gate4_span_precision": 0.7947686116620245, "eval_gate4_span_recall": 0.7845084409058142, "eval_loss": NaN, "eval_runtime": 10.3367, "eval_samples_per_second": 1143.691, "eval_steps_per_second": 35.795, "step": 14780 }, { "epoch": 6.0, "grad_norm": 150.47354125976562, "learning_rate": 1.2001014884979702e-05, "loss": 0.3573, "step": 17736 }, { "epoch": 6.0, "eval_gate1_accuracy": 0.9168178284529671, "eval_gate1_f1": 0.91133041633164, "eval_gate1_precision": 0.9107173896884344, "eval_gate1_recall": 0.9119611098684928, "eval_gate1_support": 3859, "eval_gate2_accuracy": 0.8647317958020212, "eval_gate2_f1": 0.7980336380648058, "eval_gate2_precision": 0.8117843067907142, "eval_gate2_recall": 0.7909589217708924, "eval_gate2_support": 3859, "eval_gate3_accuracy": 0.8846652267818574, "eval_gate3_f1": 0.7968635418767978, "eval_gate3_precision": 0.8025916830061807, "eval_gate3_recall": 0.7966439892081336, "eval_gate3_support": 6945, "eval_gate4_span_FN": 198, "eval_gate4_span_FP": 242, "eval_gate4_span_TP": 809, "eval_gate4_span_f1": 0.786200189358105, "eval_gate4_span_precision": 0.7697431018004782, "eval_gate4_span_recall": 0.8033763654339288, "eval_loss": NaN, "eval_runtime": 10.3401, "eval_samples_per_second": 1143.318, "eval_steps_per_second": 35.783, "step": 17736 }, { "epoch": 7.0, "grad_norm": 270.212646484375, "learning_rate": 9.001014884979702e-06, "loss": 0.2711, "step": 20692 }, { "epoch": 7.0, "eval_gate1_accuracy": 0.9191500388701737, "eval_gate1_f1": 0.9137570612727655, "eval_gate1_precision": 0.9134164984755443, "eval_gate1_recall": 0.9141029060064348, "eval_gate1_support": 3859, "eval_gate2_accuracy": 0.8693962166364343, "eval_gate2_f1": 0.8124767328435765, "eval_gate2_precision": 0.8061838445118481, "eval_gate2_recall": 0.8279748920673979, "eval_gate2_support": 3859, "eval_gate3_accuracy": 0.8862491000719942, "eval_gate3_f1": 0.8027532601826777, "eval_gate3_precision": 0.7866745156811082, "eval_gate3_recall": 0.8226565082669126, "eval_gate3_support": 6945, "eval_gate4_span_FN": 251, "eval_gate4_span_FP": 213, "eval_gate4_span_TP": 756, "eval_gate4_span_f1": 0.7651821812289223, "eval_gate4_span_precision": 0.7801857585058805, "eval_gate4_span_recall": 0.750744786487083, "eval_loss": NaN, "eval_runtime": 10.4102, "eval_samples_per_second": 1135.613, "eval_steps_per_second": 35.542, "step": 20692 }, { "epoch": 8.0, "grad_norm": 30.083831787109375, "learning_rate": 6.001014884979703e-06, "loss": 0.2072, "step": 23648 }, { "epoch": 8.0, "eval_gate1_accuracy": 0.9194091733609744, "eval_gate1_f1": 0.9136575067455691, "eval_gate1_precision": 0.9151861735925785, "eval_gate1_recall": 0.9122216295888463, "eval_gate1_support": 3859, "eval_gate2_accuracy": 0.8683596786732314, "eval_gate2_f1": 0.7953213528224212, "eval_gate2_precision": 0.7941072643208182, "eval_gate2_recall": 0.8039876906998746, "eval_gate2_support": 3859, "eval_gate3_accuracy": 0.8859611231101512, "eval_gate3_f1": 0.8067851499228712, "eval_gate3_precision": 0.8044692044507203, "eval_gate3_recall": 0.8120360484679332, "eval_gate3_support": 6945, "eval_gate4_span_FN": 211, "eval_gate4_span_FP": 223, "eval_gate4_span_TP": 796, "eval_gate4_span_f1": 0.7857847926232181, "eval_gate4_span_precision": 0.7811579980296255, "eval_gate4_span_recall": 0.7904667328620609, "eval_loss": NaN, "eval_runtime": 10.415, "eval_samples_per_second": 1135.098, "eval_steps_per_second": 35.526, "step": 23648 }, { "epoch": 9.0, "grad_norm": 303.0519104003906, "learning_rate": 3.0010148849797024e-06, "loss": 0.1509, "step": 26604 }, { "epoch": 9.0, "eval_gate1_accuracy": 0.9217413837781808, "eval_gate1_f1": 0.9163582001286115, "eval_gate1_precision": 0.9168310136033854, "eval_gate1_recall": 0.915894857280501, "eval_gate1_support": 3859, "eval_gate2_accuracy": 0.8717284270536408, "eval_gate2_f1": 0.8542694094275488, "eval_gate2_precision": 0.8495135391288109, "eval_gate2_recall": 0.8711254994444528, "eval_gate2_support": 3859, "eval_gate3_accuracy": 0.8866810655147588, "eval_gate3_f1": 0.8118642371342876, "eval_gate3_precision": 0.8115033608543574, "eval_gate3_recall": 0.8169535217733972, "eval_gate3_support": 6945, "eval_gate4_span_FN": 216, "eval_gate4_span_FP": 233, "eval_gate4_span_TP": 791, "eval_gate4_span_f1": 0.7789266321172492, "eval_gate4_span_precision": 0.7724609374924565, "eval_gate4_span_recall": 0.7855014895651886, "eval_loss": NaN, "eval_runtime": 10.45, "eval_samples_per_second": 1131.292, "eval_steps_per_second": 35.407, "step": 26604 }, { "epoch": 10.0, "grad_norm": 4.396928310394287, "learning_rate": 1.0148849797023006e-09, "loss": 0.122, "step": 29560 }, { "epoch": 10.0, "eval_gate1_accuracy": 0.9212231147965795, "eval_gate1_f1": 0.9159218285491496, "eval_gate1_precision": 0.915806583633259, "eval_gate1_recall": 0.9160376628069029, "eval_gate1_support": 3859, "eval_gate2_accuracy": 0.8722466960352423, "eval_gate2_f1": 0.856680976809179, "eval_gate2_precision": 0.8659863944374855, "eval_gate2_recall": 0.8537404898111944, "eval_gate2_support": 3859, "eval_gate3_accuracy": 0.8875449964002879, "eval_gate3_f1": 0.8118433314020443, "eval_gate3_precision": 0.8153146247248837, "eval_gate3_recall": 0.8127560834123785, "eval_gate3_support": 6945, "eval_gate4_span_FN": 213, "eval_gate4_span_FP": 233, "eval_gate4_span_TP": 794, "eval_gate4_span_f1": 0.7807276252779591, "eval_gate4_span_precision": 0.7731256085611187, "eval_gate4_span_recall": 0.788480635543312, "eval_loss": NaN, "eval_runtime": 10.3467, "eval_samples_per_second": 1142.581, "eval_steps_per_second": 35.76, "step": 29560 }, { "epoch": 10.0, "step": 29560, "total_flos": 5.801469413818084e+16, "train_loss": 0.6573912851542678, "train_runtime": 2140.4043, "train_samples_per_second": 441.828, "train_steps_per_second": 13.81 }, { "epoch": 10.0, "eval_gate1_accuracy": 0.9217413837781808, "eval_gate1_f1": 0.9163582001286115, "eval_gate1_precision": 0.9168310136033854, "eval_gate1_recall": 0.915894857280501, "eval_gate1_support": 3859, "eval_gate2_accuracy": 0.8717284270536408, "eval_gate2_f1": 0.8542694094275488, "eval_gate2_precision": 0.8495135391288109, "eval_gate2_recall": 0.8711254994444528, "eval_gate2_support": 3859, "eval_gate3_accuracy": 0.8866810655147588, "eval_gate3_f1": 0.8118642371342876, "eval_gate3_precision": 0.8115033608543574, "eval_gate3_recall": 0.8169535217733972, "eval_gate3_support": 6945, "eval_gate4_span_FN": 216, "eval_gate4_span_FP": 233, "eval_gate4_span_TP": 791, "eval_gate4_span_f1": 0.7789266321172492, "eval_gate4_span_precision": 0.7724609374924565, "eval_gate4_span_recall": 0.7855014895651886, "eval_loss": NaN, "eval_runtime": 10.4531, "eval_samples_per_second": 1130.954, "eval_steps_per_second": 35.396, "step": 29560 }, { "epoch": 10.0, "eval_gate1_accuracy": 0.9251295336787565, "eval_gate1_f1": 0.9199371922438249, "eval_gate1_precision": 0.920966070966071, "eval_gate1_recall": 0.9189513092953367, "eval_gate1_support": 3860, "eval_gate2_accuracy": 0.8709844559585492, "eval_gate2_f1": 0.8487797208181732, "eval_gate2_precision": 0.8500529357712848, "eval_gate2_recall": 0.8532136788618205, "eval_gate2_support": 3860, "eval_gate3_accuracy": 0.8920086393088553, "eval_gate3_f1": 0.6992827001481418, "eval_gate3_precision": 0.715591871957983, "eval_gate3_recall": 0.7063340262769819, "eval_gate3_support": 6945, "eval_gate4_span_FN": 196, "eval_gate4_span_FP": 204, "eval_gate4_span_TP": 815, "eval_gate4_span_f1": 0.8029556600167974, "eval_gate4_span_precision": 0.7998037291383728, "eval_gate4_span_recall": 0.8061325420296129, "eval_loss": NaN, "eval_runtime": 10.4274, "eval_samples_per_second": 1133.841, "eval_steps_per_second": 35.483, "step": 29560 } ], "logging_steps": 500, "max_steps": 29560, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.801469413818084e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }