{ "best_metric": 0.41509433962264153, "best_model_checkpoint": "./Validated_Balanced_Raw_Data_model_boost9_outputs/checkpoint-1600", "epoch": 25.0, "eval_steps": 500, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.625, "grad_norm": 1.5967129468917847, "learning_rate": 1.5e-05, "loss": 1.3942, "step": 50 }, { "epoch": 1.0, "eval_accuracy": 0.33490566037735847, "eval_loss": 1.3565607070922852, "eval_runtime": 1.5583, "eval_samples_per_second": 136.048, "eval_steps_per_second": 17.327, "step": 80 }, { "epoch": 1.25, "grad_norm": 1.937333583831787, "learning_rate": 3e-05, "loss": 1.3639, "step": 100 }, { "epoch": 1.875, "grad_norm": 1.9313533306121826, "learning_rate": 2.994876739510005e-05, "loss": 1.3192, "step": 150 }, { "epoch": 2.0, "eval_accuracy": 0.3584905660377358, "eval_loss": 1.3104463815689087, "eval_runtime": 1.5299, "eval_samples_per_second": 138.573, "eval_steps_per_second": 17.648, "step": 160 }, { "epoch": 2.5, "grad_norm": 1.8240773677825928, "learning_rate": 2.9795419551040836e-05, "loss": 1.2795, "step": 200 }, { "epoch": 3.0, "eval_accuracy": 0.37264150943396224, "eval_loss": 1.2999355792999268, "eval_runtime": 1.5285, "eval_samples_per_second": 138.698, "eval_steps_per_second": 17.664, "step": 240 }, { "epoch": 3.125, "grad_norm": 1.688889980316162, "learning_rate": 2.9541003989089956e-05, "loss": 1.2794, "step": 250 }, { "epoch": 3.75, "grad_norm": 2.2275140285491943, "learning_rate": 2.9187258625509518e-05, "loss": 1.2419, "step": 300 }, { "epoch": 4.0, "eval_accuracy": 0.37264150943396224, "eval_loss": 1.2860321998596191, "eval_runtime": 1.5373, "eval_samples_per_second": 137.908, "eval_steps_per_second": 17.564, "step": 320 }, { "epoch": 4.375, "grad_norm": 1.8031086921691895, "learning_rate": 2.873659989982586e-05, "loss": 1.2749, "step": 350 }, { "epoch": 5.0, "grad_norm": 2.7317395210266113, "learning_rate": 2.8192106268097336e-05, "loss": 1.2213, "step": 400 }, { "epoch": 5.0, "eval_accuracy": 0.36792452830188677, "eval_loss": 1.2893822193145752, "eval_runtime": 1.538, "eval_samples_per_second": 137.84, "eval_steps_per_second": 17.555, "step": 400 }, { "epoch": 5.625, "grad_norm": 1.6971545219421387, "learning_rate": 2.7557497173937928e-05, "loss": 1.2287, "step": 450 }, { "epoch": 6.0, "eval_accuracy": 0.3632075471698113, "eval_loss": 1.2862772941589355, "eval_runtime": 1.5343, "eval_samples_per_second": 138.176, "eval_steps_per_second": 17.598, "step": 480 }, { "epoch": 6.25, "grad_norm": 1.7909101247787476, "learning_rate": 2.6837107640945904e-05, "loss": 1.2138, "step": 500 }, { "epoch": 6.875, "grad_norm": 2.154249668121338, "learning_rate": 2.6035858660096975e-05, "loss": 1.2123, "step": 550 }, { "epoch": 7.0, "eval_accuracy": 0.3915094339622642, "eval_loss": 1.287874460220337, "eval_runtime": 1.5329, "eval_samples_per_second": 138.296, "eval_steps_per_second": 17.613, "step": 560 }, { "epoch": 7.5, "grad_norm": 1.533173680305481, "learning_rate": 2.5159223574386117e-05, "loss": 1.2124, "step": 600 }, { "epoch": 8.0, "eval_accuracy": 0.3867924528301887, "eval_loss": 1.2767480611801147, "eval_runtime": 1.527, "eval_samples_per_second": 138.831, "eval_steps_per_second": 17.681, "step": 640 }, { "epoch": 8.125, "grad_norm": 1.6685694456100464, "learning_rate": 2.4213190690345018e-05, "loss": 1.2018, "step": 650 }, { "epoch": 8.75, "grad_norm": 2.190777540206909, "learning_rate": 2.320422237183641e-05, "loss": 1.2144, "step": 700 }, { "epoch": 9.0, "eval_accuracy": 0.37264150943396224, "eval_loss": 1.2851072549819946, "eval_runtime": 1.5312, "eval_samples_per_second": 138.456, "eval_steps_per_second": 17.634, "step": 720 }, { "epoch": 9.375, "grad_norm": 2.0433597564697266, "learning_rate": 2.2139210895556104e-05, "loss": 1.1531, "step": 750 }, { "epoch": 10.0, "grad_norm": 2.4729323387145996, "learning_rate": 2.1025431369794546e-05, "loss": 1.2202, "step": 800 }, { "epoch": 10.0, "eval_accuracy": 0.39622641509433965, "eval_loss": 1.2682827711105347, "eval_runtime": 1.5206, "eval_samples_per_second": 139.414, "eval_steps_per_second": 17.756, "step": 800 }, { "epoch": 10.625, "grad_norm": 1.8216651678085327, "learning_rate": 1.9870492038070255e-05, "loss": 1.1804, "step": 850 }, { "epoch": 11.0, "eval_accuracy": 0.4009433962264151, "eval_loss": 1.2658637762069702, "eval_runtime": 1.5239, "eval_samples_per_second": 139.115, "eval_steps_per_second": 17.718, "step": 880 }, { "epoch": 11.25, "grad_norm": 2.511573553085327, "learning_rate": 1.8682282307111988e-05, "loss": 1.1541, "step": 900 }, { "epoch": 11.875, "grad_norm": 2.372868299484253, "learning_rate": 1.746891885421101e-05, "loss": 1.2031, "step": 950 }, { "epoch": 12.0, "eval_accuracy": 0.39622641509433965, "eval_loss": 1.265770435333252, "eval_runtime": 1.5336, "eval_samples_per_second": 138.236, "eval_steps_per_second": 17.606, "step": 960 }, { "epoch": 12.5, "grad_norm": 1.9403022527694702, "learning_rate": 1.623869018208499e-05, "loss": 1.1428, "step": 1000 }, { "epoch": 13.0, "eval_accuracy": 0.4056603773584906, "eval_loss": 1.262069582939148, "eval_runtime": 1.5275, "eval_samples_per_second": 138.792, "eval_steps_per_second": 17.676, "step": 1040 }, { "epoch": 13.125, "grad_norm": 2.0001461505889893, "learning_rate": 1.5e-05, "loss": 1.1744, "step": 1050 }, { "epoch": 13.75, "grad_norm": 1.5777283906936646, "learning_rate": 1.3761309817915017e-05, "loss": 1.1224, "step": 1100 }, { "epoch": 14.0, "eval_accuracy": 0.41037735849056606, "eval_loss": 1.2655014991760254, "eval_runtime": 1.5235, "eval_samples_per_second": 139.151, "eval_steps_per_second": 17.722, "step": 1120 }, { "epoch": 14.375, "grad_norm": 1.9835065603256226, "learning_rate": 1.2531081145788989e-05, "loss": 1.1765, "step": 1150 }, { "epoch": 15.0, "grad_norm": 3.021399974822998, "learning_rate": 1.1317717692888014e-05, "loss": 1.1486, "step": 1200 }, { "epoch": 15.0, "eval_accuracy": 0.39622641509433965, "eval_loss": 1.2606432437896729, "eval_runtime": 1.5215, "eval_samples_per_second": 139.34, "eval_steps_per_second": 17.746, "step": 1200 }, { "epoch": 15.625, "grad_norm": 1.9539563655853271, "learning_rate": 1.0129507961929749e-05, "loss": 1.1451, "step": 1250 }, { "epoch": 16.0, "eval_accuracy": 0.4056603773584906, "eval_loss": 1.2635830640792847, "eval_runtime": 1.5268, "eval_samples_per_second": 138.851, "eval_steps_per_second": 17.684, "step": 1280 }, { "epoch": 16.25, "grad_norm": 2.3783926963806152, "learning_rate": 8.974568630205462e-06, "loss": 1.1363, "step": 1300 }, { "epoch": 16.875, "grad_norm": 2.221468448638916, "learning_rate": 7.860789104443897e-06, "loss": 1.1717, "step": 1350 }, { "epoch": 17.0, "eval_accuracy": 0.4056603773584906, "eval_loss": 1.2595568895339966, "eval_runtime": 1.5272, "eval_samples_per_second": 138.816, "eval_steps_per_second": 17.679, "step": 1360 }, { "epoch": 17.5, "grad_norm": 1.5900912284851074, "learning_rate": 6.795777628163599e-06, "loss": 1.1231, "step": 1400 }, { "epoch": 18.0, "eval_accuracy": 0.4056603773584906, "eval_loss": 1.26264488697052, "eval_runtime": 1.5254, "eval_samples_per_second": 138.976, "eval_steps_per_second": 17.7, "step": 1440 }, { "epoch": 18.125, "grad_norm": 1.9134879112243652, "learning_rate": 5.786809309654983e-06, "loss": 1.1455, "step": 1450 }, { "epoch": 18.75, "grad_norm": 1.3620388507843018, "learning_rate": 4.840776425613887e-06, "loss": 1.1468, "step": 1500 }, { "epoch": 19.0, "eval_accuracy": 0.39622641509433965, "eval_loss": 1.2616825103759766, "eval_runtime": 1.5238, "eval_samples_per_second": 139.125, "eval_steps_per_second": 17.719, "step": 1520 }, { "epoch": 19.375, "grad_norm": 1.9459707736968994, "learning_rate": 3.964141339903026e-06, "loss": 1.167, "step": 1550 }, { "epoch": 20.0, "grad_norm": 2.5087108612060547, "learning_rate": 3.162892359054098e-06, "loss": 1.0958, "step": 1600 }, { "epoch": 20.0, "eval_accuracy": 0.41509433962264153, "eval_loss": 1.2586045265197754, "eval_runtime": 1.5245, "eval_samples_per_second": 139.065, "eval_steps_per_second": 17.711, "step": 1600 }, { "epoch": 20.625, "grad_norm": 1.9791457653045654, "learning_rate": 2.442502826062072e-06, "loss": 1.1456, "step": 1650 }, { "epoch": 21.0, "eval_accuracy": 0.41037735849056606, "eval_loss": 1.258667230606079, "eval_runtime": 1.5176, "eval_samples_per_second": 139.693, "eval_steps_per_second": 17.791, "step": 1680 }, { "epoch": 21.25, "grad_norm": 1.4690279960632324, "learning_rate": 1.8078937319026655e-06, "loss": 1.1492, "step": 1700 }, { "epoch": 21.875, "grad_norm": 2.0290393829345703, "learning_rate": 1.2634001001741375e-06, "loss": 1.127, "step": 1750 }, { "epoch": 22.0, "eval_accuracy": 0.41509433962264153, "eval_loss": 1.258967399597168, "eval_runtime": 1.5264, "eval_samples_per_second": 138.887, "eval_steps_per_second": 17.688, "step": 1760 }, { "epoch": 22.5, "grad_norm": 1.6253653764724731, "learning_rate": 8.127413744904805e-07, "loss": 1.1308, "step": 1800 }, { "epoch": 23.0, "eval_accuracy": 0.41509433962264153, "eval_loss": 1.2586345672607422, "eval_runtime": 1.5342, "eval_samples_per_second": 138.183, "eval_steps_per_second": 17.599, "step": 1840 }, { "epoch": 23.125, "grad_norm": 1.7840685844421387, "learning_rate": 4.589960109100444e-07, "loss": 1.1989, "step": 1850 }, { "epoch": 23.75, "grad_norm": 1.6836535930633545, "learning_rate": 2.0458044895916516e-07, "loss": 1.1433, "step": 1900 }, { "epoch": 24.0, "eval_accuracy": 0.41509433962264153, "eval_loss": 1.2584929466247559, "eval_runtime": 1.5229, "eval_samples_per_second": 139.206, "eval_steps_per_second": 17.729, "step": 1920 }, { "epoch": 24.375, "grad_norm": 2.0122158527374268, "learning_rate": 5.1232604899952296e-08, "loss": 1.1303, "step": 1950 }, { "epoch": 25.0, "grad_norm": 2.9466681480407715, "learning_rate": 0.0, "loss": 1.1492, "step": 2000 }, { "epoch": 25.0, "eval_accuracy": 0.41509433962264153, "eval_loss": 1.2584505081176758, "eval_runtime": 2.8679, "eval_samples_per_second": 73.922, "eval_steps_per_second": 9.415, "step": 2000 }, { "epoch": 25.0, "step": 2000, "total_flos": 3.739939937176781e+18, "train_loss": 1.1909779739379882, "train_runtime": 509.0926, "train_samples_per_second": 31.232, "train_steps_per_second": 3.929 } ], "logging_steps": 50, "max_steps": 2000, "num_input_tokens_seen": 0, "num_train_epochs": 25, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.739939937176781e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }