Invalid JSON: Unexpected token 'N', ..."al_loss": NaN,
"... is not valid JSON
| { | |
| "best_global_step": 26604, | |
| "best_metric": 0.9163582001286115, | |
| "best_model_checkpoint": "output/indra_stmt_classifier/checkpoint-26604", | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 29560, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 84.59678649902344, | |
| "learning_rate": 2.7001014884979704e-05, | |
| "loss": 2.2672, | |
| "step": 2956 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_gate1_accuracy": 0.8585125680228038, | |
| "eval_gate1_f1": 0.8505829142307659, | |
| "eval_gate1_precision": 0.8473952560532426, | |
| "eval_gate1_recall": 0.854657238060827, | |
| "eval_gate1_support": 3859, | |
| "eval_gate2_accuracy": 0.7587457890645245, | |
| "eval_gate2_f1": 0.5592993246296689, | |
| "eval_gate2_precision": 0.6163758861756914, | |
| "eval_gate2_recall": 0.5543459088227541, | |
| "eval_gate2_support": 3859, | |
| "eval_gate3_accuracy": 0.8548596112311015, | |
| "eval_gate3_f1": 0.6971239789155547, | |
| "eval_gate3_precision": 0.700932514781479, | |
| "eval_gate3_recall": 0.7071816387030568, | |
| "eval_gate3_support": 6945, | |
| "eval_gate4_span_FN": 230, | |
| "eval_gate4_span_FP": 347, | |
| "eval_gate4_span_TP": 777, | |
| "eval_gate4_span_f1": 0.7292350958998283, | |
| "eval_gate4_span_precision": 0.6912811387838854, | |
| "eval_gate4_span_recall": 0.7715988083339463, | |
| "eval_loss": NaN, | |
| "eval_runtime": 10.4486, | |
| "eval_samples_per_second": 1131.439, | |
| "eval_steps_per_second": 35.411, | |
| "step": 2956 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 88.20085144042969, | |
| "learning_rate": 2.4001014884979702e-05, | |
| "loss": 1.2241, | |
| "step": 5912 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_gate1_accuracy": 0.8929774552993003, | |
| "eval_gate1_f1": 0.8820620404399862, | |
| "eval_gate1_precision": 0.8988718775181306, | |
| "eval_gate1_recall": 0.871612670119234, | |
| "eval_gate1_support": 3859, | |
| "eval_gate2_accuracy": 0.808240476807463, | |
| "eval_gate2_f1": 0.6881412582252169, | |
| "eval_gate2_precision": 0.6832538631102347, | |
| "eval_gate2_recall": 0.6996653263704344, | |
| "eval_gate2_support": 3859, | |
| "eval_gate3_accuracy": 0.8666666666666667, | |
| "eval_gate3_f1": 0.744109590199225, | |
| "eval_gate3_precision": 0.7653765174358127, | |
| "eval_gate3_recall": 0.7614317320508905, | |
| "eval_gate3_support": 6945, | |
| "eval_gate4_span_FN": 221, | |
| "eval_gate4_span_FP": 295, | |
| "eval_gate4_span_TP": 786, | |
| "eval_gate4_span_f1": 0.7528735582174595, | |
| "eval_gate4_span_precision": 0.7271045328332368, | |
| "eval_gate4_span_recall": 0.7805362462683164, | |
| "eval_loss": NaN, | |
| "eval_runtime": 10.4006, | |
| "eval_samples_per_second": 1136.67, | |
| "eval_steps_per_second": 35.575, | |
| "step": 5912 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 67.10189056396484, | |
| "learning_rate": 2.10010148849797e-05, | |
| "loss": 0.8652, | |
| "step": 8868 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_gate1_accuracy": 0.9059341798393367, | |
| "eval_gate1_f1": 0.8997563137620789, | |
| "eval_gate1_precision": 0.899055894281647, | |
| "eval_gate1_recall": 0.9004810366878294, | |
| "eval_gate1_support": 3859, | |
| "eval_gate2_accuracy": 0.8333765224151335, | |
| "eval_gate2_f1": 0.7799911701221585, | |
| "eval_gate2_precision": 0.8104194561625689, | |
| "eval_gate2_recall": 0.7600982965627961, | |
| "eval_gate2_support": 3859, | |
| "eval_gate3_accuracy": 0.8732901367890569, | |
| "eval_gate3_f1": 0.7904223821154268, | |
| "eval_gate3_precision": 0.8444284314763446, | |
| "eval_gate3_recall": 0.7593577482285429, | |
| "eval_gate3_support": 6945, | |
| "eval_gate4_span_FN": 236, | |
| "eval_gate4_span_FP": 228, | |
| "eval_gate4_span_TP": 771, | |
| "eval_gate4_span_f1": 0.7686939132376799, | |
| "eval_gate4_span_precision": 0.7717717717640463, | |
| "eval_gate4_span_recall": 0.7656405163776997, | |
| "eval_loss": NaN, | |
| "eval_runtime": 10.3684, | |
| "eval_samples_per_second": 1140.196, | |
| "eval_steps_per_second": 35.685, | |
| "step": 8868 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 173.31228637695312, | |
| "learning_rate": 1.8001014884979702e-05, | |
| "loss": 0.6376, | |
| "step": 11824 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_gate1_accuracy": 0.9077481212749416, | |
| "eval_gate1_f1": 0.9007457126723182, | |
| "eval_gate1_precision": 0.9041686699243279, | |
| "eval_gate1_recall": 0.8977536805400228, | |
| "eval_gate1_support": 3859, | |
| "eval_gate2_accuracy": 0.8419279606115574, | |
| "eval_gate2_f1": 0.7689803309009853, | |
| "eval_gate2_precision": 0.7719738299874924, | |
| "eval_gate2_recall": 0.7762247755795892, | |
| "eval_gate2_support": 3859, | |
| "eval_gate3_accuracy": 0.8780417566594673, | |
| "eval_gate3_f1": 0.8038529609303686, | |
| "eval_gate3_precision": 0.8238117797554888, | |
| "eval_gate3_recall": 0.7978850610103051, | |
| "eval_gate3_support": 6945, | |
| "eval_gate4_span_FN": 218, | |
| "eval_gate4_span_FP": 226, | |
| "eval_gate4_span_TP": 789, | |
| "eval_gate4_span_f1": 0.7804154252594212, | |
| "eval_gate4_span_precision": 0.777339901470174, | |
| "eval_gate4_span_recall": 0.7835153922464397, | |
| "eval_loss": NaN, | |
| "eval_runtime": 10.3445, | |
| "eval_samples_per_second": 1142.825, | |
| "eval_steps_per_second": 35.768, | |
| "step": 11824 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 22.405181884765625, | |
| "learning_rate": 1.5001014884979702e-05, | |
| "loss": 0.4713, | |
| "step": 14780 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_gate1_accuracy": 0.9124125421093547, | |
| "eval_gate1_f1": 0.905395262879988, | |
| "eval_gate1_precision": 0.9108288424356044, | |
| "eval_gate1_recall": 0.9009235044132069, | |
| "eval_gate1_support": 3859, | |
| "eval_gate2_accuracy": 0.8608447784400104, | |
| "eval_gate2_f1": 0.7846052784615595, | |
| "eval_gate2_precision": 0.7779462341191327, | |
| "eval_gate2_recall": 0.8035688815922226, | |
| "eval_gate2_support": 3859, | |
| "eval_gate3_accuracy": 0.8822174226061915, | |
| "eval_gate3_f1": 0.799720850034007, | |
| "eval_gate3_precision": 0.7922933108551248, | |
| "eval_gate3_recall": 0.8164975006789633, | |
| "eval_gate3_support": 6945, | |
| "eval_gate4_span_FN": 217, | |
| "eval_gate4_span_FP": 204, | |
| "eval_gate4_span_TP": 790, | |
| "eval_gate4_span_f1": 0.7896051923936183, | |
| "eval_gate4_span_precision": 0.7947686116620245, | |
| "eval_gate4_span_recall": 0.7845084409058142, | |
| "eval_loss": NaN, | |
| "eval_runtime": 10.3367, | |
| "eval_samples_per_second": 1143.691, | |
| "eval_steps_per_second": 35.795, | |
| "step": 14780 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 150.47354125976562, | |
| "learning_rate": 1.2001014884979702e-05, | |
| "loss": 0.3573, | |
| "step": 17736 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_gate1_accuracy": 0.9168178284529671, | |
| "eval_gate1_f1": 0.91133041633164, | |
| "eval_gate1_precision": 0.9107173896884344, | |
| "eval_gate1_recall": 0.9119611098684928, | |
| "eval_gate1_support": 3859, | |
| "eval_gate2_accuracy": 0.8647317958020212, | |
| "eval_gate2_f1": 0.7980336380648058, | |
| "eval_gate2_precision": 0.8117843067907142, | |
| "eval_gate2_recall": 0.7909589217708924, | |
| "eval_gate2_support": 3859, | |
| "eval_gate3_accuracy": 0.8846652267818574, | |
| "eval_gate3_f1": 0.7968635418767978, | |
| "eval_gate3_precision": 0.8025916830061807, | |
| "eval_gate3_recall": 0.7966439892081336, | |
| "eval_gate3_support": 6945, | |
| "eval_gate4_span_FN": 198, | |
| "eval_gate4_span_FP": 242, | |
| "eval_gate4_span_TP": 809, | |
| "eval_gate4_span_f1": 0.786200189358105, | |
| "eval_gate4_span_precision": 0.7697431018004782, | |
| "eval_gate4_span_recall": 0.8033763654339288, | |
| "eval_loss": NaN, | |
| "eval_runtime": 10.3401, | |
| "eval_samples_per_second": 1143.318, | |
| "eval_steps_per_second": 35.783, | |
| "step": 17736 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 270.212646484375, | |
| "learning_rate": 9.001014884979702e-06, | |
| "loss": 0.2711, | |
| "step": 20692 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_gate1_accuracy": 0.9191500388701737, | |
| "eval_gate1_f1": 0.9137570612727655, | |
| "eval_gate1_precision": 0.9134164984755443, | |
| "eval_gate1_recall": 0.9141029060064348, | |
| "eval_gate1_support": 3859, | |
| "eval_gate2_accuracy": 0.8693962166364343, | |
| "eval_gate2_f1": 0.8124767328435765, | |
| "eval_gate2_precision": 0.8061838445118481, | |
| "eval_gate2_recall": 0.8279748920673979, | |
| "eval_gate2_support": 3859, | |
| "eval_gate3_accuracy": 0.8862491000719942, | |
| "eval_gate3_f1": 0.8027532601826777, | |
| "eval_gate3_precision": 0.7866745156811082, | |
| "eval_gate3_recall": 0.8226565082669126, | |
| "eval_gate3_support": 6945, | |
| "eval_gate4_span_FN": 251, | |
| "eval_gate4_span_FP": 213, | |
| "eval_gate4_span_TP": 756, | |
| "eval_gate4_span_f1": 0.7651821812289223, | |
| "eval_gate4_span_precision": 0.7801857585058805, | |
| "eval_gate4_span_recall": 0.750744786487083, | |
| "eval_loss": NaN, | |
| "eval_runtime": 10.4102, | |
| "eval_samples_per_second": 1135.613, | |
| "eval_steps_per_second": 35.542, | |
| "step": 20692 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 30.083831787109375, | |
| "learning_rate": 6.001014884979703e-06, | |
| "loss": 0.2072, | |
| "step": 23648 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_gate1_accuracy": 0.9194091733609744, | |
| "eval_gate1_f1": 0.9136575067455691, | |
| "eval_gate1_precision": 0.9151861735925785, | |
| "eval_gate1_recall": 0.9122216295888463, | |
| "eval_gate1_support": 3859, | |
| "eval_gate2_accuracy": 0.8683596786732314, | |
| "eval_gate2_f1": 0.7953213528224212, | |
| "eval_gate2_precision": 0.7941072643208182, | |
| "eval_gate2_recall": 0.8039876906998746, | |
| "eval_gate2_support": 3859, | |
| "eval_gate3_accuracy": 0.8859611231101512, | |
| "eval_gate3_f1": 0.8067851499228712, | |
| "eval_gate3_precision": 0.8044692044507203, | |
| "eval_gate3_recall": 0.8120360484679332, | |
| "eval_gate3_support": 6945, | |
| "eval_gate4_span_FN": 211, | |
| "eval_gate4_span_FP": 223, | |
| "eval_gate4_span_TP": 796, | |
| "eval_gate4_span_f1": 0.7857847926232181, | |
| "eval_gate4_span_precision": 0.7811579980296255, | |
| "eval_gate4_span_recall": 0.7904667328620609, | |
| "eval_loss": NaN, | |
| "eval_runtime": 10.415, | |
| "eval_samples_per_second": 1135.098, | |
| "eval_steps_per_second": 35.526, | |
| "step": 23648 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 303.0519104003906, | |
| "learning_rate": 3.0010148849797024e-06, | |
| "loss": 0.1509, | |
| "step": 26604 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_gate1_accuracy": 0.9217413837781808, | |
| "eval_gate1_f1": 0.9163582001286115, | |
| "eval_gate1_precision": 0.9168310136033854, | |
| "eval_gate1_recall": 0.915894857280501, | |
| "eval_gate1_support": 3859, | |
| "eval_gate2_accuracy": 0.8717284270536408, | |
| "eval_gate2_f1": 0.8542694094275488, | |
| "eval_gate2_precision": 0.8495135391288109, | |
| "eval_gate2_recall": 0.8711254994444528, | |
| "eval_gate2_support": 3859, | |
| "eval_gate3_accuracy": 0.8866810655147588, | |
| "eval_gate3_f1": 0.8118642371342876, | |
| "eval_gate3_precision": 0.8115033608543574, | |
| "eval_gate3_recall": 0.8169535217733972, | |
| "eval_gate3_support": 6945, | |
| "eval_gate4_span_FN": 216, | |
| "eval_gate4_span_FP": 233, | |
| "eval_gate4_span_TP": 791, | |
| "eval_gate4_span_f1": 0.7789266321172492, | |
| "eval_gate4_span_precision": 0.7724609374924565, | |
| "eval_gate4_span_recall": 0.7855014895651886, | |
| "eval_loss": NaN, | |
| "eval_runtime": 10.45, | |
| "eval_samples_per_second": 1131.292, | |
| "eval_steps_per_second": 35.407, | |
| "step": 26604 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 4.396928310394287, | |
| "learning_rate": 1.0148849797023006e-09, | |
| "loss": 0.122, | |
| "step": 29560 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_gate1_accuracy": 0.9212231147965795, | |
| "eval_gate1_f1": 0.9159218285491496, | |
| "eval_gate1_precision": 0.915806583633259, | |
| "eval_gate1_recall": 0.9160376628069029, | |
| "eval_gate1_support": 3859, | |
| "eval_gate2_accuracy": 0.8722466960352423, | |
| "eval_gate2_f1": 0.856680976809179, | |
| "eval_gate2_precision": 0.8659863944374855, | |
| "eval_gate2_recall": 0.8537404898111944, | |
| "eval_gate2_support": 3859, | |
| "eval_gate3_accuracy": 0.8875449964002879, | |
| "eval_gate3_f1": 0.8118433314020443, | |
| "eval_gate3_precision": 0.8153146247248837, | |
| "eval_gate3_recall": 0.8127560834123785, | |
| "eval_gate3_support": 6945, | |
| "eval_gate4_span_FN": 213, | |
| "eval_gate4_span_FP": 233, | |
| "eval_gate4_span_TP": 794, | |
| "eval_gate4_span_f1": 0.7807276252779591, | |
| "eval_gate4_span_precision": 0.7731256085611187, | |
| "eval_gate4_span_recall": 0.788480635543312, | |
| "eval_loss": NaN, | |
| "eval_runtime": 10.3467, | |
| "eval_samples_per_second": 1142.581, | |
| "eval_steps_per_second": 35.76, | |
| "step": 29560 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 29560, | |
| "total_flos": 5.801469413818084e+16, | |
| "train_loss": 0.6573912851542678, | |
| "train_runtime": 2140.4043, | |
| "train_samples_per_second": 441.828, | |
| "train_steps_per_second": 13.81 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_gate1_accuracy": 0.9217413837781808, | |
| "eval_gate1_f1": 0.9163582001286115, | |
| "eval_gate1_precision": 0.9168310136033854, | |
| "eval_gate1_recall": 0.915894857280501, | |
| "eval_gate1_support": 3859, | |
| "eval_gate2_accuracy": 0.8717284270536408, | |
| "eval_gate2_f1": 0.8542694094275488, | |
| "eval_gate2_precision": 0.8495135391288109, | |
| "eval_gate2_recall": 0.8711254994444528, | |
| "eval_gate2_support": 3859, | |
| "eval_gate3_accuracy": 0.8866810655147588, | |
| "eval_gate3_f1": 0.8118642371342876, | |
| "eval_gate3_precision": 0.8115033608543574, | |
| "eval_gate3_recall": 0.8169535217733972, | |
| "eval_gate3_support": 6945, | |
| "eval_gate4_span_FN": 216, | |
| "eval_gate4_span_FP": 233, | |
| "eval_gate4_span_TP": 791, | |
| "eval_gate4_span_f1": 0.7789266321172492, | |
| "eval_gate4_span_precision": 0.7724609374924565, | |
| "eval_gate4_span_recall": 0.7855014895651886, | |
| "eval_loss": NaN, | |
| "eval_runtime": 10.4531, | |
| "eval_samples_per_second": 1130.954, | |
| "eval_steps_per_second": 35.396, | |
| "step": 29560 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_gate1_accuracy": 0.9251295336787565, | |
| "eval_gate1_f1": 0.9199371922438249, | |
| "eval_gate1_precision": 0.920966070966071, | |
| "eval_gate1_recall": 0.9189513092953367, | |
| "eval_gate1_support": 3860, | |
| "eval_gate2_accuracy": 0.8709844559585492, | |
| "eval_gate2_f1": 0.8487797208181732, | |
| "eval_gate2_precision": 0.8500529357712848, | |
| "eval_gate2_recall": 0.8532136788618205, | |
| "eval_gate2_support": 3860, | |
| "eval_gate3_accuracy": 0.8920086393088553, | |
| "eval_gate3_f1": 0.6992827001481418, | |
| "eval_gate3_precision": 0.715591871957983, | |
| "eval_gate3_recall": 0.7063340262769819, | |
| "eval_gate3_support": 6945, | |
| "eval_gate4_span_FN": 196, | |
| "eval_gate4_span_FP": 204, | |
| "eval_gate4_span_TP": 815, | |
| "eval_gate4_span_f1": 0.8029556600167974, | |
| "eval_gate4_span_precision": 0.7998037291383728, | |
| "eval_gate4_span_recall": 0.8061325420296129, | |
| "eval_loss": NaN, | |
| "eval_runtime": 10.4274, | |
| "eval_samples_per_second": 1133.841, | |
| "eval_steps_per_second": 35.483, | |
| "step": 29560 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 29560, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.801469413818084e+16, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |