Jennny's picture
upload checkpoint-2200 to repo root
413bcaa verified
{
"best_metric": 0.9359891558099255,
"best_model_checkpoint": "./my_unified_model_classification/checkpoint-1974",
"epoch": 2.0,
"eval_steps": 329,
"global_step": 2200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.045454545454545456,
"grad_norm": 72.5,
"learning_rate": 2.2831050228310503e-06,
"loss": 1.0312,
"step": 50
},
{
"epoch": 0.09090909090909091,
"grad_norm": 64.5,
"learning_rate": 4.566210045662101e-06,
"loss": 0.6885,
"step": 100
},
{
"epoch": 0.13636363636363635,
"grad_norm": 65.0,
"learning_rate": 6.849315068493151e-06,
"loss": 0.6293,
"step": 150
},
{
"epoch": 0.18181818181818182,
"grad_norm": 28.75,
"learning_rate": 9.132420091324201e-06,
"loss": 0.6143,
"step": 200
},
{
"epoch": 0.22727272727272727,
"grad_norm": 11.125,
"learning_rate": 9.843513377082282e-06,
"loss": 0.5555,
"step": 250
},
{
"epoch": 0.2727272727272727,
"grad_norm": 12.75,
"learning_rate": 9.591115598182736e-06,
"loss": 0.5559,
"step": 300
},
{
"epoch": 0.2990909090909091,
"eval_accuracy": 0.7053038485589221,
"eval_auc": 0.6867176541550628,
"eval_f1": 0.7980049875311721,
"eval_false_negatives": 2798,
"eval_false_positives": 2386,
"eval_loss": 0.5816290378570557,
"eval_precision": 0.8110248693172818,
"eval_recall": 0.7853965332106151,
"eval_runtime": 619.0057,
"eval_samples_per_second": 28.418,
"eval_specificity": 0.47594992312760814,
"eval_steps_per_second": 0.223,
"eval_true_negatives": 2167,
"eval_true_positives": 10240,
"step": 329
},
{
"epoch": 0.2990909090909091,
"step": 329,
"train_accuracy": 0.716,
"train_auc": 0.7171233056981181,
"train_f1": 0.8075880758807588,
"train_false_negatives": 149,
"train_false_positives": 135,
"train_loss": 0.5534917116165161,
"train_precision": 0.8153214774281806,
"train_recall": 0.8,
"train_runtime": 35.2538,
"train_samples_per_second": 28.366,
"train_specificity": 0.47058823529411764,
"train_steps_per_second": 0.227,
"train_true_negatives": 120,
"train_true_positives": 596
},
{
"epoch": 0.3181818181818182,
"grad_norm": 40.75,
"learning_rate": 9.338717819283191e-06,
"loss": 0.5417,
"step": 350
},
{
"epoch": 0.36363636363636365,
"grad_norm": 33.5,
"learning_rate": 9.086320040383645e-06,
"loss": 0.5218,
"step": 400
},
{
"epoch": 0.4090909090909091,
"grad_norm": 28.625,
"learning_rate": 8.8339222614841e-06,
"loss": 0.4994,
"step": 450
},
{
"epoch": 0.45454545454545453,
"grad_norm": 24.125,
"learning_rate": 8.581524482584555e-06,
"loss": 0.4921,
"step": 500
},
{
"epoch": 0.5,
"grad_norm": 74.5,
"learning_rate": 8.329126703685009e-06,
"loss": 0.4891,
"step": 550
},
{
"epoch": 0.5454545454545454,
"grad_norm": 33.25,
"learning_rate": 8.076728924785463e-06,
"loss": 0.4794,
"step": 600
},
{
"epoch": 0.5909090909090909,
"grad_norm": 30.625,
"learning_rate": 7.824331145885916e-06,
"loss": 0.4549,
"step": 650
},
{
"epoch": 0.5981818181818181,
"eval_accuracy": 0.7972827013813882,
"eval_auc": 0.8168990273813824,
"eval_f1": 0.864852573334344,
"eval_false_negatives": 1628,
"eval_false_positives": 1938,
"eval_loss": 0.4511999785900116,
"eval_precision": 0.8548097093197483,
"eval_recall": 0.8751342230403436,
"eval_runtime": 619.1094,
"eval_samples_per_second": 28.413,
"eval_specificity": 0.5743465846694488,
"eval_steps_per_second": 0.223,
"eval_true_negatives": 2615,
"eval_true_positives": 11410,
"step": 658
},
{
"epoch": 0.5981818181818181,
"step": 658,
"train_accuracy": 0.799,
"train_auc": 0.8304007002911323,
"train_f1": 0.8620452985586822,
"train_false_negatives": 88,
"train_false_positives": 113,
"train_loss": 0.44616425037384033,
"train_precision": 0.8475033738191633,
"train_recall": 0.8770949720670391,
"train_runtime": 35.294,
"train_samples_per_second": 28.333,
"train_specificity": 0.602112676056338,
"train_steps_per_second": 0.227,
"train_true_negatives": 171,
"train_true_positives": 628
},
{
"epoch": 0.6363636363636364,
"grad_norm": 17.375,
"learning_rate": 7.571933366986371e-06,
"loss": 0.4336,
"step": 700
},
{
"epoch": 0.6818181818181818,
"grad_norm": 23.25,
"learning_rate": 7.319535588086826e-06,
"loss": 0.4089,
"step": 750
},
{
"epoch": 0.7272727272727273,
"grad_norm": 26.25,
"learning_rate": 7.06713780918728e-06,
"loss": 0.3864,
"step": 800
},
{
"epoch": 0.7727272727272727,
"grad_norm": 22.5,
"learning_rate": 6.8147400302877344e-06,
"loss": 0.37,
"step": 850
},
{
"epoch": 0.8181818181818182,
"grad_norm": 37.0,
"learning_rate": 6.562342251388188e-06,
"loss": 0.3612,
"step": 900
},
{
"epoch": 0.8636363636363636,
"grad_norm": 20.5,
"learning_rate": 6.309944472488643e-06,
"loss": 0.3456,
"step": 950
},
{
"epoch": 0.8972727272727272,
"eval_accuracy": 0.8711841282473992,
"eval_auc": 0.9114312226670747,
"eval_f1": 0.9161113579150008,
"eval_false_negatives": 665,
"eval_false_positives": 1601,
"eval_loss": 0.3193175494670868,
"eval_precision": 0.8854300844425361,
"eval_recall": 0.9489952446694279,
"eval_runtime": 618.9502,
"eval_samples_per_second": 28.421,
"eval_specificity": 0.6483637162310565,
"eval_steps_per_second": 0.223,
"eval_true_negatives": 2952,
"eval_true_positives": 12373,
"step": 987
},
{
"epoch": 0.8972727272727272,
"step": 987,
"train_accuracy": 0.893,
"train_auc": 0.945685678230828,
"train_f1": 0.92909211398277,
"train_false_negatives": 37,
"train_false_positives": 70,
"train_loss": 0.256592333316803,
"train_precision": 0.9092088197146563,
"train_recall": 0.9498644986449865,
"train_runtime": 35.2309,
"train_samples_per_second": 28.384,
"train_specificity": 0.732824427480916,
"train_steps_per_second": 0.227,
"train_true_negatives": 192,
"train_true_positives": 701
},
{
"epoch": 0.9090909090909091,
"grad_norm": 28.75,
"learning_rate": 6.057546693589097e-06,
"loss": 0.3191,
"step": 1000
},
{
"epoch": 0.9545454545454546,
"grad_norm": 30.125,
"learning_rate": 5.805148914689552e-06,
"loss": 0.3099,
"step": 1050
},
{
"epoch": 1.0,
"grad_norm": 47.75,
"learning_rate": 5.5527511357900055e-06,
"loss": 0.2908,
"step": 1100
},
{
"epoch": 1.0454545454545454,
"grad_norm": 29.875,
"learning_rate": 5.300353356890459e-06,
"loss": 0.2196,
"step": 1150
},
{
"epoch": 1.0909090909090908,
"grad_norm": 24.875,
"learning_rate": 5.047955577990914e-06,
"loss": 0.2147,
"step": 1200
},
{
"epoch": 1.1363636363636362,
"grad_norm": 19.875,
"learning_rate": 4.795557799091368e-06,
"loss": 0.2107,
"step": 1250
},
{
"epoch": 1.1818181818181819,
"grad_norm": 20.5,
"learning_rate": 4.543160020191823e-06,
"loss": 0.212,
"step": 1300
},
{
"epoch": 1.1963636363636363,
"eval_accuracy": 0.8913649025069638,
"eval_auc": 0.9333465842314582,
"eval_f1": 0.9275834628064724,
"eval_false_negatives": 799,
"eval_false_positives": 1112,
"eval_loss": 0.28013044595718384,
"eval_precision": 0.9167103587746236,
"eval_recall": 0.938717594723117,
"eval_runtime": 618.7723,
"eval_samples_per_second": 28.429,
"eval_specificity": 0.7557654293872172,
"eval_steps_per_second": 0.223,
"eval_true_negatives": 3441,
"eval_true_positives": 12239,
"step": 1316
},
{
"epoch": 1.1963636363636363,
"step": 1316,
"train_accuracy": 0.924,
"train_auc": 0.966124817361522,
"train_f1": 0.9491978609625669,
"train_false_negatives": 33,
"train_false_positives": 43,
"train_loss": 0.1971082091331482,
"train_precision": 0.9428950863213812,
"train_recall": 0.955585464333782,
"train_runtime": 35.2125,
"train_samples_per_second": 28.399,
"train_specificity": 0.8326848249027238,
"train_steps_per_second": 0.227,
"train_true_negatives": 214,
"train_true_positives": 710
},
{
"epoch": 1.2272727272727273,
"grad_norm": 21.5,
"learning_rate": 4.290762241292277e-06,
"loss": 0.2041,
"step": 1350
},
{
"epoch": 1.2727272727272727,
"grad_norm": 24.875,
"learning_rate": 4.038364462392731e-06,
"loss": 0.2047,
"step": 1400
},
{
"epoch": 1.3181818181818181,
"grad_norm": 14.1875,
"learning_rate": 3.7859666834931856e-06,
"loss": 0.1941,
"step": 1450
},
{
"epoch": 1.3636363636363638,
"grad_norm": 21.0,
"learning_rate": 3.53356890459364e-06,
"loss": 0.1948,
"step": 1500
},
{
"epoch": 1.4090909090909092,
"grad_norm": 24.625,
"learning_rate": 3.281171125694094e-06,
"loss": 0.1905,
"step": 1550
},
{
"epoch": 1.4545454545454546,
"grad_norm": 24.125,
"learning_rate": 3.0287733467945485e-06,
"loss": 0.1943,
"step": 1600
},
{
"epoch": 1.4954545454545456,
"eval_accuracy": 0.9007446989938036,
"eval_auc": 0.9430535156708126,
"eval_f1": 0.9344643795510847,
"eval_false_negatives": 590,
"eval_false_positives": 1156,
"eval_loss": 0.2589464485645294,
"eval_precision": 0.9150249926492208,
"eval_recall": 0.9547476606841541,
"eval_runtime": 619.3355,
"eval_samples_per_second": 28.403,
"eval_specificity": 0.7461014715572151,
"eval_steps_per_second": 0.223,
"eval_true_negatives": 3397,
"eval_true_positives": 12448,
"step": 1645
},
{
"epoch": 1.4954545454545456,
"step": 1645,
"train_accuracy": 0.953,
"train_auc": 0.9871008686077808,
"train_f1": 0.9681787406905891,
"train_false_negatives": 17,
"train_false_positives": 30,
"train_loss": 0.13334013521671295,
"train_precision": 0.959731543624161,
"train_recall": 0.976775956284153,
"train_runtime": 35.2541,
"train_samples_per_second": 28.365,
"train_specificity": 0.8880597014925373,
"train_steps_per_second": 0.227,
"train_true_negatives": 238,
"train_true_positives": 715
},
{
"epoch": 1.5,
"grad_norm": 23.75,
"learning_rate": 2.7763755678950027e-06,
"loss": 0.191,
"step": 1650
},
{
"epoch": 1.5454545454545454,
"grad_norm": 20.625,
"learning_rate": 2.523977788995457e-06,
"loss": 0.1878,
"step": 1700
},
{
"epoch": 1.5909090909090908,
"grad_norm": 19.75,
"learning_rate": 2.2715800100959113e-06,
"loss": 0.1813,
"step": 1750
},
{
"epoch": 1.6363636363636362,
"grad_norm": 17.75,
"learning_rate": 2.0191822311963656e-06,
"loss": 0.1894,
"step": 1800
},
{
"epoch": 1.6818181818181817,
"grad_norm": 23.75,
"learning_rate": 1.76678445229682e-06,
"loss": 0.1794,
"step": 1850
},
{
"epoch": 1.7272727272727273,
"grad_norm": 24.125,
"learning_rate": 1.5143866733972742e-06,
"loss": 0.1862,
"step": 1900
},
{
"epoch": 1.7727272727272727,
"grad_norm": 18.375,
"learning_rate": 1.2619888944977285e-06,
"loss": 0.1736,
"step": 1950
},
{
"epoch": 1.7945454545454544,
"eval_accuracy": 0.9033596725598317,
"eval_auc": 0.9469119511342725,
"eval_f1": 0.9359891558099255,
"eval_false_negatives": 609,
"eval_false_positives": 1091,
"eval_loss": 0.2513716220855713,
"eval_precision": 0.9193047337278106,
"eval_recall": 0.9532903819604234,
"eval_runtime": 618.7166,
"eval_samples_per_second": 28.431,
"eval_specificity": 0.760377772896991,
"eval_steps_per_second": 0.223,
"eval_true_negatives": 3462,
"eval_true_positives": 12429,
"step": 1974
},
{
"epoch": 1.7945454545454544,
"step": 1974,
"train_accuracy": 0.959,
"train_auc": 0.9890839364523575,
"train_f1": 0.9726848767488341,
"train_false_negatives": 11,
"train_false_positives": 30,
"train_loss": 0.11946262419223785,
"train_precision": 0.9605263157894737,
"train_recall": 0.9851551956815114,
"train_runtime": 35.2738,
"train_samples_per_second": 28.35,
"train_specificity": 0.8841698841698842,
"train_steps_per_second": 0.227,
"train_true_negatives": 229,
"train_true_positives": 730
},
{
"epoch": 1.8181818181818183,
"grad_norm": 33.75,
"learning_rate": 1.0095911155981828e-06,
"loss": 0.1764,
"step": 2000
},
{
"epoch": 1.8636363636363638,
"grad_norm": 24.25,
"learning_rate": 7.571933366986371e-07,
"loss": 0.1744,
"step": 2050
},
{
"epoch": 1.9090909090909092,
"grad_norm": 31.875,
"learning_rate": 5.047955577990914e-07,
"loss": 0.1796,
"step": 2100
},
{
"epoch": 1.9545454545454546,
"grad_norm": 23.25,
"learning_rate": 2.523977788995457e-07,
"loss": 0.1867,
"step": 2150
},
{
"epoch": 2.0,
"grad_norm": 36.0,
"learning_rate": 0.0,
"loss": 0.179,
"step": 2200
}
],
"logging_steps": 50,
"max_steps": 2200,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 329,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.2065282585130959e+19,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}