| { | |
| "best_metric": 0.49082762002944946, | |
| "best_model_checkpoint": "models/toxic-bert-mbert/checkpoint-380", | |
| "epoch": 0.41170097508125675, | |
| "eval_steps": 10, | |
| "global_step": 380, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.968e-05, | |
| "loss": 1.4487, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_f1": 0.48475671310316976, | |
| "eval_loss": 1.1357542276382446, | |
| "eval_runtime": 20.5083, | |
| "eval_samples_per_second": 270.037, | |
| "eval_steps_per_second": 11.264, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.9280000000000002e-05, | |
| "loss": 1.1897, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_f1": 0.48475671310316976, | |
| "eval_loss": 1.0628113746643066, | |
| "eval_runtime": 20.8063, | |
| "eval_samples_per_second": 266.169, | |
| "eval_steps_per_second": 11.102, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.8880000000000002e-05, | |
| "loss": 1.0351, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_f1": 0.48475671310316976, | |
| "eval_loss": 1.0026295185089111, | |
| "eval_runtime": 21.4302, | |
| "eval_samples_per_second": 258.42, | |
| "eval_steps_per_second": 10.779, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.8480000000000003e-05, | |
| "loss": 0.9274, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_f1": 0.602134407148032, | |
| "eval_loss": 0.8651727437973022, | |
| "eval_runtime": 21.7252, | |
| "eval_samples_per_second": 254.911, | |
| "eval_steps_per_second": 10.633, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.8080000000000003e-05, | |
| "loss": 0.8717, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_f1": 0.5878712152620588, | |
| "eval_loss": 0.8609752058982849, | |
| "eval_runtime": 21.4208, | |
| "eval_samples_per_second": 258.534, | |
| "eval_steps_per_second": 10.784, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.768e-05, | |
| "loss": 0.8187, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_f1": 0.6764570688570977, | |
| "eval_loss": 0.7394715547561646, | |
| "eval_runtime": 21.8392, | |
| "eval_samples_per_second": 253.58, | |
| "eval_steps_per_second": 10.577, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.732e-05, | |
| "loss": 0.8706, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_f1": 0.6850705705176812, | |
| "eval_loss": 0.7013543844223022, | |
| "eval_runtime": 21.7795, | |
| "eval_samples_per_second": 254.276, | |
| "eval_steps_per_second": 10.606, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.692e-05, | |
| "loss": 0.7463, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_f1": 0.7283246330984933, | |
| "eval_loss": 0.665144681930542, | |
| "eval_runtime": 21.4734, | |
| "eval_samples_per_second": 257.901, | |
| "eval_steps_per_second": 10.758, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.652e-05, | |
| "loss": 0.7677, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_f1": 0.6548131465387051, | |
| "eval_loss": 0.718267560005188, | |
| "eval_runtime": 21.871, | |
| "eval_samples_per_second": 253.212, | |
| "eval_steps_per_second": 10.562, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.612e-05, | |
| "loss": 0.6402, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_f1": 0.7153883418869857, | |
| "eval_loss": 0.6134028434753418, | |
| "eval_runtime": 21.3946, | |
| "eval_samples_per_second": 258.85, | |
| "eval_steps_per_second": 10.797, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.5720000000000002e-05, | |
| "loss": 0.6408, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_f1": 0.7200057070545418, | |
| "eval_loss": 0.6316511034965515, | |
| "eval_runtime": 21.4218, | |
| "eval_samples_per_second": 258.521, | |
| "eval_steps_per_second": 10.783, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.5320000000000002e-05, | |
| "loss": 0.6293, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_f1": 0.7252309612107771, | |
| "eval_loss": 0.6177955865859985, | |
| "eval_runtime": 22.0787, | |
| "eval_samples_per_second": 250.831, | |
| "eval_steps_per_second": 10.463, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.4920000000000001e-05, | |
| "loss": 0.5921, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_f1": 0.718310903510847, | |
| "eval_loss": 0.6382821798324585, | |
| "eval_runtime": 21.4528, | |
| "eval_samples_per_second": 258.148, | |
| "eval_steps_per_second": 10.768, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.4520000000000002e-05, | |
| "loss": 0.6829, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_f1": 0.7104650126557948, | |
| "eval_loss": 0.6063101291656494, | |
| "eval_runtime": 21.6996, | |
| "eval_samples_per_second": 255.212, | |
| "eval_steps_per_second": 10.645, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.412e-05, | |
| "loss": 0.6528, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_f1": 0.7266460816131931, | |
| "eval_loss": 0.5720272064208984, | |
| "eval_runtime": 21.3758, | |
| "eval_samples_per_second": 259.078, | |
| "eval_steps_per_second": 10.807, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.3720000000000002e-05, | |
| "loss": 0.5472, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_f1": 0.7174077954335052, | |
| "eval_loss": 0.6016837358474731, | |
| "eval_runtime": 21.9003, | |
| "eval_samples_per_second": 252.873, | |
| "eval_steps_per_second": 10.548, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.3320000000000001e-05, | |
| "loss": 0.6625, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_f1": 0.7238055756700145, | |
| "eval_loss": 0.5748048424720764, | |
| "eval_runtime": 21.4241, | |
| "eval_samples_per_second": 258.494, | |
| "eval_steps_per_second": 10.782, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.2920000000000002e-05, | |
| "loss": 0.551, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_f1": 0.7216203790293768, | |
| "eval_loss": 0.5944197177886963, | |
| "eval_runtime": 21.7253, | |
| "eval_samples_per_second": 254.91, | |
| "eval_steps_per_second": 10.633, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.252e-05, | |
| "loss": 0.5633, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_f1": 0.7591239466384562, | |
| "eval_loss": 0.5621122717857361, | |
| "eval_runtime": 21.1932, | |
| "eval_samples_per_second": 261.31, | |
| "eval_steps_per_second": 10.9, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.2120000000000001e-05, | |
| "loss": 0.5372, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_f1": 0.75760072154562, | |
| "eval_loss": 0.5480858087539673, | |
| "eval_runtime": 21.2836, | |
| "eval_samples_per_second": 260.201, | |
| "eval_steps_per_second": 10.853, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.172e-05, | |
| "loss": 0.6353, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_f1": 0.7511938652247329, | |
| "eval_loss": 0.5706632137298584, | |
| "eval_runtime": 21.8422, | |
| "eval_samples_per_second": 253.545, | |
| "eval_steps_per_second": 10.576, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.132e-05, | |
| "loss": 0.6332, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_f1": 0.7614490903091703, | |
| "eval_loss": 0.5543012022972107, | |
| "eval_runtime": 21.5696, | |
| "eval_samples_per_second": 256.75, | |
| "eval_steps_per_second": 10.71, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.0920000000000002e-05, | |
| "loss": 0.5311, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_f1": 0.7432260016931946, | |
| "eval_loss": 0.5397886633872986, | |
| "eval_runtime": 21.3122, | |
| "eval_samples_per_second": 259.851, | |
| "eval_steps_per_second": 10.839, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.0520000000000001e-05, | |
| "loss": 0.5791, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_f1": 0.7434033096243912, | |
| "eval_loss": 0.5391152501106262, | |
| "eval_runtime": 21.3796, | |
| "eval_samples_per_second": 259.032, | |
| "eval_steps_per_second": 10.805, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.0120000000000001e-05, | |
| "loss": 0.5831, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_f1": 0.763099957359349, | |
| "eval_loss": 0.5244932174682617, | |
| "eval_runtime": 21.3101, | |
| "eval_samples_per_second": 259.877, | |
| "eval_steps_per_second": 10.84, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 9.72e-06, | |
| "loss": 0.5453, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_f1": 0.7585661401268046, | |
| "eval_loss": 0.5211306214332581, | |
| "eval_runtime": 21.2639, | |
| "eval_samples_per_second": 260.442, | |
| "eval_steps_per_second": 10.863, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 9.32e-06, | |
| "loss": 0.5087, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_f1": 0.7549183270549422, | |
| "eval_loss": 0.5206575989723206, | |
| "eval_runtime": 21.5568, | |
| "eval_samples_per_second": 256.902, | |
| "eval_steps_per_second": 10.716, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 8.920000000000001e-06, | |
| "loss": 0.539, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_f1": 0.7483575758659107, | |
| "eval_loss": 0.5601561665534973, | |
| "eval_runtime": 21.7276, | |
| "eval_samples_per_second": 254.883, | |
| "eval_steps_per_second": 10.632, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 8.52e-06, | |
| "loss": 0.502, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_f1": 0.7497368207624416, | |
| "eval_loss": 0.5269237160682678, | |
| "eval_runtime": 21.3645, | |
| "eval_samples_per_second": 259.215, | |
| "eval_steps_per_second": 10.812, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 8.120000000000002e-06, | |
| "loss": 0.5656, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_f1": 0.7490172830029811, | |
| "eval_loss": 0.5704778432846069, | |
| "eval_runtime": 21.3309, | |
| "eval_samples_per_second": 259.623, | |
| "eval_steps_per_second": 10.829, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 7.72e-06, | |
| "loss": 0.6157, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_f1": 0.7610313116302819, | |
| "eval_loss": 0.5527724027633667, | |
| "eval_runtime": 21.5886, | |
| "eval_samples_per_second": 256.524, | |
| "eval_steps_per_second": 10.7, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 7.32e-06, | |
| "loss": 0.5262, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_f1": 0.7693956225207979, | |
| "eval_loss": 0.5064041614532471, | |
| "eval_runtime": 21.4519, | |
| "eval_samples_per_second": 258.159, | |
| "eval_steps_per_second": 10.768, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 6.92e-06, | |
| "loss": 0.5032, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_f1": 0.757594480871035, | |
| "eval_loss": 0.5091240406036377, | |
| "eval_runtime": 21.5576, | |
| "eval_samples_per_second": 256.894, | |
| "eval_steps_per_second": 10.716, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 6.520000000000001e-06, | |
| "loss": 0.4859, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_f1": 0.751697533021681, | |
| "eval_loss": 0.5241729021072388, | |
| "eval_runtime": 21.6466, | |
| "eval_samples_per_second": 255.837, | |
| "eval_steps_per_second": 10.671, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 6.120000000000001e-06, | |
| "loss": 0.6227, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_f1": 0.7821857624888272, | |
| "eval_loss": 0.4922301769256592, | |
| "eval_runtime": 21.5823, | |
| "eval_samples_per_second": 256.599, | |
| "eval_steps_per_second": 10.703, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 5.72e-06, | |
| "loss": 0.4927, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_f1": 0.778530969617547, | |
| "eval_loss": 0.4920683801174164, | |
| "eval_runtime": 21.173, | |
| "eval_samples_per_second": 261.559, | |
| "eval_steps_per_second": 10.91, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 5.320000000000001e-06, | |
| "loss": 0.596, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_f1": 0.7737080595193158, | |
| "eval_loss": 0.509250283241272, | |
| "eval_runtime": 21.9207, | |
| "eval_samples_per_second": 252.638, | |
| "eval_steps_per_second": 10.538, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.92e-06, | |
| "loss": 0.5932, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_f1": 0.7900070254555186, | |
| "eval_loss": 0.49082762002944946, | |
| "eval_runtime": 21.7381, | |
| "eval_samples_per_second": 254.76, | |
| "eval_steps_per_second": 10.627, | |
| "step": 380 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 10, | |
| "total_flos": 547397542755648.0, | |
| "train_batch_size": 24, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |