Thibaut's picture
End of training
db248f3 verified
{
"best_metric": 0.41509433962264153,
"best_model_checkpoint": "./Validated_Balanced_Raw_Data_model_boost9_outputs/checkpoint-1600",
"epoch": 25.0,
"eval_steps": 500,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.625,
"grad_norm": 1.5967129468917847,
"learning_rate": 1.5e-05,
"loss": 1.3942,
"step": 50
},
{
"epoch": 1.0,
"eval_accuracy": 0.33490566037735847,
"eval_loss": 1.3565607070922852,
"eval_runtime": 1.5583,
"eval_samples_per_second": 136.048,
"eval_steps_per_second": 17.327,
"step": 80
},
{
"epoch": 1.25,
"grad_norm": 1.937333583831787,
"learning_rate": 3e-05,
"loss": 1.3639,
"step": 100
},
{
"epoch": 1.875,
"grad_norm": 1.9313533306121826,
"learning_rate": 2.994876739510005e-05,
"loss": 1.3192,
"step": 150
},
{
"epoch": 2.0,
"eval_accuracy": 0.3584905660377358,
"eval_loss": 1.3104463815689087,
"eval_runtime": 1.5299,
"eval_samples_per_second": 138.573,
"eval_steps_per_second": 17.648,
"step": 160
},
{
"epoch": 2.5,
"grad_norm": 1.8240773677825928,
"learning_rate": 2.9795419551040836e-05,
"loss": 1.2795,
"step": 200
},
{
"epoch": 3.0,
"eval_accuracy": 0.37264150943396224,
"eval_loss": 1.2999355792999268,
"eval_runtime": 1.5285,
"eval_samples_per_second": 138.698,
"eval_steps_per_second": 17.664,
"step": 240
},
{
"epoch": 3.125,
"grad_norm": 1.688889980316162,
"learning_rate": 2.9541003989089956e-05,
"loss": 1.2794,
"step": 250
},
{
"epoch": 3.75,
"grad_norm": 2.2275140285491943,
"learning_rate": 2.9187258625509518e-05,
"loss": 1.2419,
"step": 300
},
{
"epoch": 4.0,
"eval_accuracy": 0.37264150943396224,
"eval_loss": 1.2860321998596191,
"eval_runtime": 1.5373,
"eval_samples_per_second": 137.908,
"eval_steps_per_second": 17.564,
"step": 320
},
{
"epoch": 4.375,
"grad_norm": 1.8031086921691895,
"learning_rate": 2.873659989982586e-05,
"loss": 1.2749,
"step": 350
},
{
"epoch": 5.0,
"grad_norm": 2.7317395210266113,
"learning_rate": 2.8192106268097336e-05,
"loss": 1.2213,
"step": 400
},
{
"epoch": 5.0,
"eval_accuracy": 0.36792452830188677,
"eval_loss": 1.2893822193145752,
"eval_runtime": 1.538,
"eval_samples_per_second": 137.84,
"eval_steps_per_second": 17.555,
"step": 400
},
{
"epoch": 5.625,
"grad_norm": 1.6971545219421387,
"learning_rate": 2.7557497173937928e-05,
"loss": 1.2287,
"step": 450
},
{
"epoch": 6.0,
"eval_accuracy": 0.3632075471698113,
"eval_loss": 1.2862772941589355,
"eval_runtime": 1.5343,
"eval_samples_per_second": 138.176,
"eval_steps_per_second": 17.598,
"step": 480
},
{
"epoch": 6.25,
"grad_norm": 1.7909101247787476,
"learning_rate": 2.6837107640945904e-05,
"loss": 1.2138,
"step": 500
},
{
"epoch": 6.875,
"grad_norm": 2.154249668121338,
"learning_rate": 2.6035858660096975e-05,
"loss": 1.2123,
"step": 550
},
{
"epoch": 7.0,
"eval_accuracy": 0.3915094339622642,
"eval_loss": 1.287874460220337,
"eval_runtime": 1.5329,
"eval_samples_per_second": 138.296,
"eval_steps_per_second": 17.613,
"step": 560
},
{
"epoch": 7.5,
"grad_norm": 1.533173680305481,
"learning_rate": 2.5159223574386117e-05,
"loss": 1.2124,
"step": 600
},
{
"epoch": 8.0,
"eval_accuracy": 0.3867924528301887,
"eval_loss": 1.2767480611801147,
"eval_runtime": 1.527,
"eval_samples_per_second": 138.831,
"eval_steps_per_second": 17.681,
"step": 640
},
{
"epoch": 8.125,
"grad_norm": 1.6685694456100464,
"learning_rate": 2.4213190690345018e-05,
"loss": 1.2018,
"step": 650
},
{
"epoch": 8.75,
"grad_norm": 2.190777540206909,
"learning_rate": 2.320422237183641e-05,
"loss": 1.2144,
"step": 700
},
{
"epoch": 9.0,
"eval_accuracy": 0.37264150943396224,
"eval_loss": 1.2851072549819946,
"eval_runtime": 1.5312,
"eval_samples_per_second": 138.456,
"eval_steps_per_second": 17.634,
"step": 720
},
{
"epoch": 9.375,
"grad_norm": 2.0433597564697266,
"learning_rate": 2.2139210895556104e-05,
"loss": 1.1531,
"step": 750
},
{
"epoch": 10.0,
"grad_norm": 2.4729323387145996,
"learning_rate": 2.1025431369794546e-05,
"loss": 1.2202,
"step": 800
},
{
"epoch": 10.0,
"eval_accuracy": 0.39622641509433965,
"eval_loss": 1.2682827711105347,
"eval_runtime": 1.5206,
"eval_samples_per_second": 139.414,
"eval_steps_per_second": 17.756,
"step": 800
},
{
"epoch": 10.625,
"grad_norm": 1.8216651678085327,
"learning_rate": 1.9870492038070255e-05,
"loss": 1.1804,
"step": 850
},
{
"epoch": 11.0,
"eval_accuracy": 0.4009433962264151,
"eval_loss": 1.2658637762069702,
"eval_runtime": 1.5239,
"eval_samples_per_second": 139.115,
"eval_steps_per_second": 17.718,
"step": 880
},
{
"epoch": 11.25,
"grad_norm": 2.511573553085327,
"learning_rate": 1.8682282307111988e-05,
"loss": 1.1541,
"step": 900
},
{
"epoch": 11.875,
"grad_norm": 2.372868299484253,
"learning_rate": 1.746891885421101e-05,
"loss": 1.2031,
"step": 950
},
{
"epoch": 12.0,
"eval_accuracy": 0.39622641509433965,
"eval_loss": 1.265770435333252,
"eval_runtime": 1.5336,
"eval_samples_per_second": 138.236,
"eval_steps_per_second": 17.606,
"step": 960
},
{
"epoch": 12.5,
"grad_norm": 1.9403022527694702,
"learning_rate": 1.623869018208499e-05,
"loss": 1.1428,
"step": 1000
},
{
"epoch": 13.0,
"eval_accuracy": 0.4056603773584906,
"eval_loss": 1.262069582939148,
"eval_runtime": 1.5275,
"eval_samples_per_second": 138.792,
"eval_steps_per_second": 17.676,
"step": 1040
},
{
"epoch": 13.125,
"grad_norm": 2.0001461505889893,
"learning_rate": 1.5e-05,
"loss": 1.1744,
"step": 1050
},
{
"epoch": 13.75,
"grad_norm": 1.5777283906936646,
"learning_rate": 1.3761309817915017e-05,
"loss": 1.1224,
"step": 1100
},
{
"epoch": 14.0,
"eval_accuracy": 0.41037735849056606,
"eval_loss": 1.2655014991760254,
"eval_runtime": 1.5235,
"eval_samples_per_second": 139.151,
"eval_steps_per_second": 17.722,
"step": 1120
},
{
"epoch": 14.375,
"grad_norm": 1.9835065603256226,
"learning_rate": 1.2531081145788989e-05,
"loss": 1.1765,
"step": 1150
},
{
"epoch": 15.0,
"grad_norm": 3.021399974822998,
"learning_rate": 1.1317717692888014e-05,
"loss": 1.1486,
"step": 1200
},
{
"epoch": 15.0,
"eval_accuracy": 0.39622641509433965,
"eval_loss": 1.2606432437896729,
"eval_runtime": 1.5215,
"eval_samples_per_second": 139.34,
"eval_steps_per_second": 17.746,
"step": 1200
},
{
"epoch": 15.625,
"grad_norm": 1.9539563655853271,
"learning_rate": 1.0129507961929749e-05,
"loss": 1.1451,
"step": 1250
},
{
"epoch": 16.0,
"eval_accuracy": 0.4056603773584906,
"eval_loss": 1.2635830640792847,
"eval_runtime": 1.5268,
"eval_samples_per_second": 138.851,
"eval_steps_per_second": 17.684,
"step": 1280
},
{
"epoch": 16.25,
"grad_norm": 2.3783926963806152,
"learning_rate": 8.974568630205462e-06,
"loss": 1.1363,
"step": 1300
},
{
"epoch": 16.875,
"grad_norm": 2.221468448638916,
"learning_rate": 7.860789104443897e-06,
"loss": 1.1717,
"step": 1350
},
{
"epoch": 17.0,
"eval_accuracy": 0.4056603773584906,
"eval_loss": 1.2595568895339966,
"eval_runtime": 1.5272,
"eval_samples_per_second": 138.816,
"eval_steps_per_second": 17.679,
"step": 1360
},
{
"epoch": 17.5,
"grad_norm": 1.5900912284851074,
"learning_rate": 6.795777628163599e-06,
"loss": 1.1231,
"step": 1400
},
{
"epoch": 18.0,
"eval_accuracy": 0.4056603773584906,
"eval_loss": 1.26264488697052,
"eval_runtime": 1.5254,
"eval_samples_per_second": 138.976,
"eval_steps_per_second": 17.7,
"step": 1440
},
{
"epoch": 18.125,
"grad_norm": 1.9134879112243652,
"learning_rate": 5.786809309654983e-06,
"loss": 1.1455,
"step": 1450
},
{
"epoch": 18.75,
"grad_norm": 1.3620388507843018,
"learning_rate": 4.840776425613887e-06,
"loss": 1.1468,
"step": 1500
},
{
"epoch": 19.0,
"eval_accuracy": 0.39622641509433965,
"eval_loss": 1.2616825103759766,
"eval_runtime": 1.5238,
"eval_samples_per_second": 139.125,
"eval_steps_per_second": 17.719,
"step": 1520
},
{
"epoch": 19.375,
"grad_norm": 1.9459707736968994,
"learning_rate": 3.964141339903026e-06,
"loss": 1.167,
"step": 1550
},
{
"epoch": 20.0,
"grad_norm": 2.5087108612060547,
"learning_rate": 3.162892359054098e-06,
"loss": 1.0958,
"step": 1600
},
{
"epoch": 20.0,
"eval_accuracy": 0.41509433962264153,
"eval_loss": 1.2586045265197754,
"eval_runtime": 1.5245,
"eval_samples_per_second": 139.065,
"eval_steps_per_second": 17.711,
"step": 1600
},
{
"epoch": 20.625,
"grad_norm": 1.9791457653045654,
"learning_rate": 2.442502826062072e-06,
"loss": 1.1456,
"step": 1650
},
{
"epoch": 21.0,
"eval_accuracy": 0.41037735849056606,
"eval_loss": 1.258667230606079,
"eval_runtime": 1.5176,
"eval_samples_per_second": 139.693,
"eval_steps_per_second": 17.791,
"step": 1680
},
{
"epoch": 21.25,
"grad_norm": 1.4690279960632324,
"learning_rate": 1.8078937319026655e-06,
"loss": 1.1492,
"step": 1700
},
{
"epoch": 21.875,
"grad_norm": 2.0290393829345703,
"learning_rate": 1.2634001001741375e-06,
"loss": 1.127,
"step": 1750
},
{
"epoch": 22.0,
"eval_accuracy": 0.41509433962264153,
"eval_loss": 1.258967399597168,
"eval_runtime": 1.5264,
"eval_samples_per_second": 138.887,
"eval_steps_per_second": 17.688,
"step": 1760
},
{
"epoch": 22.5,
"grad_norm": 1.6253653764724731,
"learning_rate": 8.127413744904805e-07,
"loss": 1.1308,
"step": 1800
},
{
"epoch": 23.0,
"eval_accuracy": 0.41509433962264153,
"eval_loss": 1.2586345672607422,
"eval_runtime": 1.5342,
"eval_samples_per_second": 138.183,
"eval_steps_per_second": 17.599,
"step": 1840
},
{
"epoch": 23.125,
"grad_norm": 1.7840685844421387,
"learning_rate": 4.589960109100444e-07,
"loss": 1.1989,
"step": 1850
},
{
"epoch": 23.75,
"grad_norm": 1.6836535930633545,
"learning_rate": 2.0458044895916516e-07,
"loss": 1.1433,
"step": 1900
},
{
"epoch": 24.0,
"eval_accuracy": 0.41509433962264153,
"eval_loss": 1.2584929466247559,
"eval_runtime": 1.5229,
"eval_samples_per_second": 139.206,
"eval_steps_per_second": 17.729,
"step": 1920
},
{
"epoch": 24.375,
"grad_norm": 2.0122158527374268,
"learning_rate": 5.1232604899952296e-08,
"loss": 1.1303,
"step": 1950
},
{
"epoch": 25.0,
"grad_norm": 2.9466681480407715,
"learning_rate": 0.0,
"loss": 1.1492,
"step": 2000
},
{
"epoch": 25.0,
"eval_accuracy": 0.41509433962264153,
"eval_loss": 1.2584505081176758,
"eval_runtime": 2.8679,
"eval_samples_per_second": 73.922,
"eval_steps_per_second": 9.415,
"step": 2000
},
{
"epoch": 25.0,
"step": 2000,
"total_flos": 3.739939937176781e+18,
"train_loss": 1.1909779739379882,
"train_runtime": 509.0926,
"train_samples_per_second": 31.232,
"train_steps_per_second": 3.929
}
],
"logging_steps": 50,
"max_steps": 2000,
"num_input_tokens_seen": 0,
"num_train_epochs": 25,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.739939937176781e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}