| { | |
| "best_metric": 0.5424528301886793, | |
| "best_model_checkpoint": "./Validated_Balanced_Raw_Data_model_boost7_outputs/checkpoint-640", | |
| "epoch": 25.0, | |
| "eval_steps": 500, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.3490566037735849, | |
| "eval_loss": 1.3316097259521484, | |
| "eval_runtime": 1.7234, | |
| "eval_samples_per_second": 123.011, | |
| "eval_steps_per_second": 8.123, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 8.837675094604492, | |
| "learning_rate": 1.5e-05, | |
| "loss": 1.3694, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.37264150943396224, | |
| "eval_loss": 1.2695280313491821, | |
| "eval_runtime": 1.7368, | |
| "eval_samples_per_second": 122.063, | |
| "eval_steps_per_second": 8.061, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 23.59905433654785, | |
| "learning_rate": 1.4897709775520418e-05, | |
| "loss": 1.291, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.41509433962264153, | |
| "eval_loss": 1.2237251996994019, | |
| "eval_runtime": 1.7278, | |
| "eval_samples_per_second": 122.701, | |
| "eval_steps_per_second": 8.103, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "grad_norm": 12.828843116760254, | |
| "learning_rate": 1.4593629312754759e-05, | |
| "loss": 1.2374, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.4481132075471698, | |
| "eval_loss": 1.1886036396026611, | |
| "eval_runtime": 1.7269, | |
| "eval_samples_per_second": 122.761, | |
| "eval_steps_per_second": 8.107, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 23.113628387451172, | |
| "learning_rate": 1.4096053134048668e-05, | |
| "loss": 1.1815, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.4386792452830189, | |
| "eval_loss": 1.1869800090789795, | |
| "eval_runtime": 1.7412, | |
| "eval_samples_per_second": 121.752, | |
| "eval_steps_per_second": 8.04, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.4716981132075472, | |
| "eval_loss": 1.172649621963501, | |
| "eval_runtime": 1.7177, | |
| "eval_samples_per_second": 123.418, | |
| "eval_steps_per_second": 8.15, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "grad_norm": 23.421894073486328, | |
| "learning_rate": 1.3418553820472952e-05, | |
| "loss": 1.1479, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.4858490566037736, | |
| "eval_loss": 1.12235426902771, | |
| "eval_runtime": 1.742, | |
| "eval_samples_per_second": 121.7, | |
| "eval_steps_per_second": 8.037, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "grad_norm": 21.626155853271484, | |
| "learning_rate": 1.2579611787193059e-05, | |
| "loss": 1.0818, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.4716981132075472, | |
| "eval_loss": 1.1309412717819214, | |
| "eval_runtime": 1.7266, | |
| "eval_samples_per_second": 122.786, | |
| "eval_steps_per_second": 8.109, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "grad_norm": 17.436782836914062, | |
| "learning_rate": 1.1602111185918205e-05, | |
| "loss": 1.0507, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.4811320754716981, | |
| "eval_loss": 1.1351451873779297, | |
| "eval_runtime": 1.7198, | |
| "eval_samples_per_second": 123.27, | |
| "eval_steps_per_second": 8.14, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 25.729656219482422, | |
| "learning_rate": 1.0512715684897273e-05, | |
| "loss": 1.0198, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.5188679245283019, | |
| "eval_loss": 1.1313749551773071, | |
| "eval_runtime": 1.7239, | |
| "eval_samples_per_second": 122.975, | |
| "eval_steps_per_second": 8.121, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.5047169811320755, | |
| "eval_loss": 1.1235365867614746, | |
| "eval_runtime": 1.7373, | |
| "eval_samples_per_second": 122.031, | |
| "eval_steps_per_second": 8.059, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 11.25, | |
| "grad_norm": 17.072206497192383, | |
| "learning_rate": 9.341141153555994e-06, | |
| "loss": 1.0075, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.5283018867924528, | |
| "eval_loss": 1.113572359085083, | |
| "eval_runtime": 1.7317, | |
| "eval_samples_per_second": 122.424, | |
| "eval_steps_per_second": 8.085, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "grad_norm": 29.981237411499023, | |
| "learning_rate": 8.119345091042494e-06, | |
| "loss": 0.9692, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.5094339622641509, | |
| "eval_loss": 1.1230112314224243, | |
| "eval_runtime": 1.7323, | |
| "eval_samples_per_second": 122.381, | |
| "eval_steps_per_second": 8.082, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 13.75, | |
| "grad_norm": 23.71090316772461, | |
| "learning_rate": 6.8806549089575084e-06, | |
| "loss": 0.919, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.5, | |
| "eval_loss": 1.1158391237258911, | |
| "eval_runtime": 1.7323, | |
| "eval_samples_per_second": 122.38, | |
| "eval_steps_per_second": 8.082, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 20.237754821777344, | |
| "learning_rate": 5.658858846444007e-06, | |
| "loss": 0.9306, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.5235849056603774, | |
| "eval_loss": 1.108890175819397, | |
| "eval_runtime": 1.7314, | |
| "eval_samples_per_second": 122.444, | |
| "eval_steps_per_second": 8.086, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.5424528301886793, | |
| "eval_loss": 1.1008423566818237, | |
| "eval_runtime": 1.7198, | |
| "eval_samples_per_second": 123.273, | |
| "eval_steps_per_second": 8.141, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 16.25, | |
| "grad_norm": 15.5910062789917, | |
| "learning_rate": 4.487284315102731e-06, | |
| "loss": 0.89, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.5235849056603774, | |
| "eval_loss": 1.1071282625198364, | |
| "eval_runtime": 1.7351, | |
| "eval_samples_per_second": 122.185, | |
| "eval_steps_per_second": 8.069, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 17.5, | |
| "grad_norm": 19.464712142944336, | |
| "learning_rate": 3.3978888140817996e-06, | |
| "loss": 0.8853, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.5235849056603774, | |
| "eval_loss": 1.1110377311706543, | |
| "eval_runtime": 1.7376, | |
| "eval_samples_per_second": 122.008, | |
| "eval_steps_per_second": 8.057, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 18.75, | |
| "grad_norm": 24.714292526245117, | |
| "learning_rate": 2.4203882128069435e-06, | |
| "loss": 0.8852, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.5330188679245284, | |
| "eval_loss": 1.102632761001587, | |
| "eval_runtime": 1.7735, | |
| "eval_samples_per_second": 119.541, | |
| "eval_steps_per_second": 7.894, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 19.79788589477539, | |
| "learning_rate": 1.581446179527049e-06, | |
| "loss": 0.824, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.5377358490566038, | |
| "eval_loss": 1.1056451797485352, | |
| "eval_runtime": 1.7219, | |
| "eval_samples_per_second": 123.121, | |
| "eval_steps_per_second": 8.131, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.5283018867924528, | |
| "eval_loss": 1.1087795495986938, | |
| "eval_runtime": 1.7345, | |
| "eval_samples_per_second": 122.225, | |
| "eval_steps_per_second": 8.071, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 21.25, | |
| "grad_norm": 35.61381912231445, | |
| "learning_rate": 9.039468659513328e-07, | |
| "loss": 0.8327, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.5283018867924528, | |
| "eval_loss": 1.1064716577529907, | |
| "eval_runtime": 1.7395, | |
| "eval_samples_per_second": 121.872, | |
| "eval_steps_per_second": 8.048, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 22.5, | |
| "grad_norm": 19.63943099975586, | |
| "learning_rate": 4.0637068724524024e-07, | |
| "loss": 0.832, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.5330188679245284, | |
| "eval_loss": 1.1062887907028198, | |
| "eval_runtime": 1.7199, | |
| "eval_samples_per_second": 123.265, | |
| "eval_steps_per_second": 8.14, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 23.75, | |
| "grad_norm": 16.668516159057617, | |
| "learning_rate": 1.0229022447958258e-07, | |
| "loss": 0.8801, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.5330188679245284, | |
| "eval_loss": 1.106476902961731, | |
| "eval_runtime": 1.7268, | |
| "eval_samples_per_second": 122.771, | |
| "eval_steps_per_second": 8.108, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 17.07996368408203, | |
| "learning_rate": 0.0, | |
| "loss": 0.8372, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.5330188679245284, | |
| "eval_loss": 1.1065127849578857, | |
| "eval_runtime": 3.038, | |
| "eval_samples_per_second": 69.784, | |
| "eval_steps_per_second": 4.608, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "step": 1000, | |
| "total_flos": 1.2575458052333568e+18, | |
| "train_loss": 1.0036159286499022, | |
| "train_runtime": 496.6689, | |
| "train_samples_per_second": 32.013, | |
| "train_steps_per_second": 2.013 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 1000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 25, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.2575458052333568e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |