| { | |
| "best_metric": 0.41509433962264153, | |
| "best_model_checkpoint": "./Validated_Balanced_Raw_Data_model_boost9_outputs/checkpoint-1600", | |
| "epoch": 25.0, | |
| "eval_steps": 500, | |
| "global_step": 2000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 1.5967129468917847, | |
| "learning_rate": 1.5e-05, | |
| "loss": 1.3942, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.33490566037735847, | |
| "eval_loss": 1.3565607070922852, | |
| "eval_runtime": 1.5583, | |
| "eval_samples_per_second": 136.048, | |
| "eval_steps_per_second": 17.327, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 1.937333583831787, | |
| "learning_rate": 3e-05, | |
| "loss": 1.3639, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.875, | |
| "grad_norm": 1.9313533306121826, | |
| "learning_rate": 2.994876739510005e-05, | |
| "loss": 1.3192, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.3584905660377358, | |
| "eval_loss": 1.3104463815689087, | |
| "eval_runtime": 1.5299, | |
| "eval_samples_per_second": 138.573, | |
| "eval_steps_per_second": 17.648, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 1.8240773677825928, | |
| "learning_rate": 2.9795419551040836e-05, | |
| "loss": 1.2795, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.37264150943396224, | |
| "eval_loss": 1.2999355792999268, | |
| "eval_runtime": 1.5285, | |
| "eval_samples_per_second": 138.698, | |
| "eval_steps_per_second": 17.664, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.125, | |
| "grad_norm": 1.688889980316162, | |
| "learning_rate": 2.9541003989089956e-05, | |
| "loss": 1.2794, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "grad_norm": 2.2275140285491943, | |
| "learning_rate": 2.9187258625509518e-05, | |
| "loss": 1.2419, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.37264150943396224, | |
| "eval_loss": 1.2860321998596191, | |
| "eval_runtime": 1.5373, | |
| "eval_samples_per_second": 137.908, | |
| "eval_steps_per_second": 17.564, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 4.375, | |
| "grad_norm": 1.8031086921691895, | |
| "learning_rate": 2.873659989982586e-05, | |
| "loss": 1.2749, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 2.7317395210266113, | |
| "learning_rate": 2.8192106268097336e-05, | |
| "loss": 1.2213, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.36792452830188677, | |
| "eval_loss": 1.2893822193145752, | |
| "eval_runtime": 1.538, | |
| "eval_samples_per_second": 137.84, | |
| "eval_steps_per_second": 17.555, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 5.625, | |
| "grad_norm": 1.6971545219421387, | |
| "learning_rate": 2.7557497173937928e-05, | |
| "loss": 1.2287, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.3632075471698113, | |
| "eval_loss": 1.2862772941589355, | |
| "eval_runtime": 1.5343, | |
| "eval_samples_per_second": 138.176, | |
| "eval_steps_per_second": 17.598, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "grad_norm": 1.7909101247787476, | |
| "learning_rate": 2.6837107640945904e-05, | |
| "loss": 1.2138, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 6.875, | |
| "grad_norm": 2.154249668121338, | |
| "learning_rate": 2.6035858660096975e-05, | |
| "loss": 1.2123, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.3915094339622642, | |
| "eval_loss": 1.287874460220337, | |
| "eval_runtime": 1.5329, | |
| "eval_samples_per_second": 138.296, | |
| "eval_steps_per_second": 17.613, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "grad_norm": 1.533173680305481, | |
| "learning_rate": 2.5159223574386117e-05, | |
| "loss": 1.2124, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.3867924528301887, | |
| "eval_loss": 1.2767480611801147, | |
| "eval_runtime": 1.527, | |
| "eval_samples_per_second": 138.831, | |
| "eval_steps_per_second": 17.681, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 8.125, | |
| "grad_norm": 1.6685694456100464, | |
| "learning_rate": 2.4213190690345018e-05, | |
| "loss": 1.2018, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "grad_norm": 2.190777540206909, | |
| "learning_rate": 2.320422237183641e-05, | |
| "loss": 1.2144, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.37264150943396224, | |
| "eval_loss": 1.2851072549819946, | |
| "eval_runtime": 1.5312, | |
| "eval_samples_per_second": 138.456, | |
| "eval_steps_per_second": 17.634, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 9.375, | |
| "grad_norm": 2.0433597564697266, | |
| "learning_rate": 2.2139210895556104e-05, | |
| "loss": 1.1531, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 2.4729323387145996, | |
| "learning_rate": 2.1025431369794546e-05, | |
| "loss": 1.2202, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.39622641509433965, | |
| "eval_loss": 1.2682827711105347, | |
| "eval_runtime": 1.5206, | |
| "eval_samples_per_second": 139.414, | |
| "eval_steps_per_second": 17.756, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 10.625, | |
| "grad_norm": 1.8216651678085327, | |
| "learning_rate": 1.9870492038070255e-05, | |
| "loss": 1.1804, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.4009433962264151, | |
| "eval_loss": 1.2658637762069702, | |
| "eval_runtime": 1.5239, | |
| "eval_samples_per_second": 139.115, | |
| "eval_steps_per_second": 17.718, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 11.25, | |
| "grad_norm": 2.511573553085327, | |
| "learning_rate": 1.8682282307111988e-05, | |
| "loss": 1.1541, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 11.875, | |
| "grad_norm": 2.372868299484253, | |
| "learning_rate": 1.746891885421101e-05, | |
| "loss": 1.2031, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.39622641509433965, | |
| "eval_loss": 1.265770435333252, | |
| "eval_runtime": 1.5336, | |
| "eval_samples_per_second": 138.236, | |
| "eval_steps_per_second": 17.606, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "grad_norm": 1.9403022527694702, | |
| "learning_rate": 1.623869018208499e-05, | |
| "loss": 1.1428, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.4056603773584906, | |
| "eval_loss": 1.262069582939148, | |
| "eval_runtime": 1.5275, | |
| "eval_samples_per_second": 138.792, | |
| "eval_steps_per_second": 17.676, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 13.125, | |
| "grad_norm": 2.0001461505889893, | |
| "learning_rate": 1.5e-05, | |
| "loss": 1.1744, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 13.75, | |
| "grad_norm": 1.5777283906936646, | |
| "learning_rate": 1.3761309817915017e-05, | |
| "loss": 1.1224, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.41037735849056606, | |
| "eval_loss": 1.2655014991760254, | |
| "eval_runtime": 1.5235, | |
| "eval_samples_per_second": 139.151, | |
| "eval_steps_per_second": 17.722, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 14.375, | |
| "grad_norm": 1.9835065603256226, | |
| "learning_rate": 1.2531081145788989e-05, | |
| "loss": 1.1765, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 3.021399974822998, | |
| "learning_rate": 1.1317717692888014e-05, | |
| "loss": 1.1486, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.39622641509433965, | |
| "eval_loss": 1.2606432437896729, | |
| "eval_runtime": 1.5215, | |
| "eval_samples_per_second": 139.34, | |
| "eval_steps_per_second": 17.746, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 15.625, | |
| "grad_norm": 1.9539563655853271, | |
| "learning_rate": 1.0129507961929749e-05, | |
| "loss": 1.1451, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.4056603773584906, | |
| "eval_loss": 1.2635830640792847, | |
| "eval_runtime": 1.5268, | |
| "eval_samples_per_second": 138.851, | |
| "eval_steps_per_second": 17.684, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 16.25, | |
| "grad_norm": 2.3783926963806152, | |
| "learning_rate": 8.974568630205462e-06, | |
| "loss": 1.1363, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 16.875, | |
| "grad_norm": 2.221468448638916, | |
| "learning_rate": 7.860789104443897e-06, | |
| "loss": 1.1717, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.4056603773584906, | |
| "eval_loss": 1.2595568895339966, | |
| "eval_runtime": 1.5272, | |
| "eval_samples_per_second": 138.816, | |
| "eval_steps_per_second": 17.679, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 17.5, | |
| "grad_norm": 1.5900912284851074, | |
| "learning_rate": 6.795777628163599e-06, | |
| "loss": 1.1231, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.4056603773584906, | |
| "eval_loss": 1.26264488697052, | |
| "eval_runtime": 1.5254, | |
| "eval_samples_per_second": 138.976, | |
| "eval_steps_per_second": 17.7, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 18.125, | |
| "grad_norm": 1.9134879112243652, | |
| "learning_rate": 5.786809309654983e-06, | |
| "loss": 1.1455, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 18.75, | |
| "grad_norm": 1.3620388507843018, | |
| "learning_rate": 4.840776425613887e-06, | |
| "loss": 1.1468, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.39622641509433965, | |
| "eval_loss": 1.2616825103759766, | |
| "eval_runtime": 1.5238, | |
| "eval_samples_per_second": 139.125, | |
| "eval_steps_per_second": 17.719, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 19.375, | |
| "grad_norm": 1.9459707736968994, | |
| "learning_rate": 3.964141339903026e-06, | |
| "loss": 1.167, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 2.5087108612060547, | |
| "learning_rate": 3.162892359054098e-06, | |
| "loss": 1.0958, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.41509433962264153, | |
| "eval_loss": 1.2586045265197754, | |
| "eval_runtime": 1.5245, | |
| "eval_samples_per_second": 139.065, | |
| "eval_steps_per_second": 17.711, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 20.625, | |
| "grad_norm": 1.9791457653045654, | |
| "learning_rate": 2.442502826062072e-06, | |
| "loss": 1.1456, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.41037735849056606, | |
| "eval_loss": 1.258667230606079, | |
| "eval_runtime": 1.5176, | |
| "eval_samples_per_second": 139.693, | |
| "eval_steps_per_second": 17.791, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 21.25, | |
| "grad_norm": 1.4690279960632324, | |
| "learning_rate": 1.8078937319026655e-06, | |
| "loss": 1.1492, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 21.875, | |
| "grad_norm": 2.0290393829345703, | |
| "learning_rate": 1.2634001001741375e-06, | |
| "loss": 1.127, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.41509433962264153, | |
| "eval_loss": 1.258967399597168, | |
| "eval_runtime": 1.5264, | |
| "eval_samples_per_second": 138.887, | |
| "eval_steps_per_second": 17.688, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 22.5, | |
| "grad_norm": 1.6253653764724731, | |
| "learning_rate": 8.127413744904805e-07, | |
| "loss": 1.1308, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.41509433962264153, | |
| "eval_loss": 1.2586345672607422, | |
| "eval_runtime": 1.5342, | |
| "eval_samples_per_second": 138.183, | |
| "eval_steps_per_second": 17.599, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 23.125, | |
| "grad_norm": 1.7840685844421387, | |
| "learning_rate": 4.589960109100444e-07, | |
| "loss": 1.1989, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 23.75, | |
| "grad_norm": 1.6836535930633545, | |
| "learning_rate": 2.0458044895916516e-07, | |
| "loss": 1.1433, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.41509433962264153, | |
| "eval_loss": 1.2584929466247559, | |
| "eval_runtime": 1.5229, | |
| "eval_samples_per_second": 139.206, | |
| "eval_steps_per_second": 17.729, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 24.375, | |
| "grad_norm": 2.0122158527374268, | |
| "learning_rate": 5.1232604899952296e-08, | |
| "loss": 1.1303, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 2.9466681480407715, | |
| "learning_rate": 0.0, | |
| "loss": 1.1492, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.41509433962264153, | |
| "eval_loss": 1.2584505081176758, | |
| "eval_runtime": 2.8679, | |
| "eval_samples_per_second": 73.922, | |
| "eval_steps_per_second": 9.415, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "step": 2000, | |
| "total_flos": 3.739939937176781e+18, | |
| "train_loss": 1.1909779739379882, | |
| "train_runtime": 509.0926, | |
| "train_samples_per_second": 31.232, | |
| "train_steps_per_second": 3.929 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 2000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 25, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.739939937176781e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |