| { | |
| "best_metric": 0.8807787895202637, | |
| "best_model_checkpoint": "org_org_a/org_aug_a/checkpoint-400", | |
| "epoch": 0.24615384615384617, | |
| "eval_steps": 25, | |
| "global_step": 400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.015384615384615385, | |
| "grad_norm": 99.7617416381836, | |
| "learning_rate": 9.375e-05, | |
| "loss": 2.2195, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.015384615384615385, | |
| "eval_f1_macro": 0.5521299126846324, | |
| "eval_f1_micro": 0.5691194856224325, | |
| "eval_f1_weighted": 0.569423288164649, | |
| "eval_loss": 1.520703673362732, | |
| "eval_runtime": 1286.7359, | |
| "eval_samples_per_second": 8.703, | |
| "eval_steps_per_second": 0.272, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.03076923076923077, | |
| "grad_norm": 40.433555603027344, | |
| "learning_rate": 8.75e-05, | |
| "loss": 1.4371, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03076923076923077, | |
| "eval_f1_macro": 0.5857301862067549, | |
| "eval_f1_micro": 0.6089480264332917, | |
| "eval_f1_weighted": 0.6051732662908408, | |
| "eval_loss": 1.2746953964233398, | |
| "eval_runtime": 1302.368, | |
| "eval_samples_per_second": 8.598, | |
| "eval_steps_per_second": 0.269, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.046153846153846156, | |
| "grad_norm": 50.680335998535156, | |
| "learning_rate": 8.125000000000001e-05, | |
| "loss": 1.2556, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.046153846153846156, | |
| "eval_f1_macro": 0.6036315452406847, | |
| "eval_f1_micro": 0.6303804250759064, | |
| "eval_f1_weighted": 0.6240449220829647, | |
| "eval_loss": 1.1545159816741943, | |
| "eval_runtime": 1308.8563, | |
| "eval_samples_per_second": 8.556, | |
| "eval_steps_per_second": 0.267, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.06153846153846154, | |
| "grad_norm": 24.990671157836914, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 1.2415, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06153846153846154, | |
| "eval_f1_macro": 0.6131831448419165, | |
| "eval_f1_micro": 0.6319878549741025, | |
| "eval_f1_weighted": 0.6300913896752308, | |
| "eval_loss": 1.0690622329711914, | |
| "eval_runtime": 1302.0106, | |
| "eval_samples_per_second": 8.601, | |
| "eval_steps_per_second": 0.269, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07692307692307693, | |
| "grad_norm": 20.941816329956055, | |
| "learning_rate": 6.875e-05, | |
| "loss": 0.9864, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.07692307692307693, | |
| "eval_f1_macro": 0.627800207605976, | |
| "eval_f1_micro": 0.6399357028040722, | |
| "eval_f1_weighted": 0.6411240450638264, | |
| "eval_loss": 1.0263742208480835, | |
| "eval_runtime": 1310.3949, | |
| "eval_samples_per_second": 8.546, | |
| "eval_steps_per_second": 0.267, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.09230769230769231, | |
| "grad_norm": 17.243314743041992, | |
| "learning_rate": 6.25e-05, | |
| "loss": 1.0647, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.09230769230769231, | |
| "eval_f1_macro": 0.6265827051287185, | |
| "eval_f1_micro": 0.6510091087694231, | |
| "eval_f1_weighted": 0.6455165794591529, | |
| "eval_loss": 0.9917964339256287, | |
| "eval_runtime": 1292.6103, | |
| "eval_samples_per_second": 8.663, | |
| "eval_steps_per_second": 0.271, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1076923076923077, | |
| "grad_norm": 20.114173889160156, | |
| "learning_rate": 5.6250000000000005e-05, | |
| "loss": 0.9849, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.1076923076923077, | |
| "eval_f1_macro": 0.6317476736951155, | |
| "eval_f1_micro": 0.6576174316842294, | |
| "eval_f1_weighted": 0.6510976948325254, | |
| "eval_loss": 0.9679338932037354, | |
| "eval_runtime": 1305.0812, | |
| "eval_samples_per_second": 8.58, | |
| "eval_steps_per_second": 0.268, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.12307692307692308, | |
| "grad_norm": 39.2221565246582, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0067, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.12307692307692308, | |
| "eval_f1_macro": 0.6383959350585475, | |
| "eval_f1_micro": 0.6501160921593142, | |
| "eval_f1_weighted": 0.6513020604373679, | |
| "eval_loss": 0.9382981061935425, | |
| "eval_runtime": 1283.0843, | |
| "eval_samples_per_second": 8.727, | |
| "eval_steps_per_second": 0.273, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.13846153846153847, | |
| "grad_norm": 26.992185592651367, | |
| "learning_rate": 4.375e-05, | |
| "loss": 0.8928, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.13846153846153847, | |
| "eval_f1_macro": 0.640450740779414, | |
| "eval_f1_micro": 0.6619932130737631, | |
| "eval_f1_weighted": 0.6578984928748007, | |
| "eval_loss": 0.9242791533470154, | |
| "eval_runtime": 1276.3685, | |
| "eval_samples_per_second": 8.773, | |
| "eval_steps_per_second": 0.274, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.15384615384615385, | |
| "grad_norm": 71.61570739746094, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.9858, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.15384615384615385, | |
| "eval_f1_macro": 0.640469116958249, | |
| "eval_f1_micro": 0.6627076263618503, | |
| "eval_f1_weighted": 0.6581960436641718, | |
| "eval_loss": 0.9131789803504944, | |
| "eval_runtime": 1285.3671, | |
| "eval_samples_per_second": 8.712, | |
| "eval_steps_per_second": 0.272, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.16923076923076924, | |
| "grad_norm": 51.381019592285156, | |
| "learning_rate": 3.125e-05, | |
| "loss": 0.9085, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.16923076923076924, | |
| "eval_f1_macro": 0.6446344740224741, | |
| "eval_f1_micro": 0.6575281300232184, | |
| "eval_f1_weighted": 0.6580540316041209, | |
| "eval_loss": 0.9010853171348572, | |
| "eval_runtime": 1307.6049, | |
| "eval_samples_per_second": 8.564, | |
| "eval_steps_per_second": 0.268, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.18461538461538463, | |
| "grad_norm": 25.47317886352539, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.0059, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.18461538461538463, | |
| "eval_f1_macro": 0.6435798595814134, | |
| "eval_f1_micro": 0.6686015359885694, | |
| "eval_f1_weighted": 0.662301841928527, | |
| "eval_loss": 0.9018191695213318, | |
| "eval_runtime": 1330.8319, | |
| "eval_samples_per_second": 8.414, | |
| "eval_steps_per_second": 0.263, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 16.64508628845215, | |
| "learning_rate": 1.8750000000000002e-05, | |
| "loss": 0.8939, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_f1_macro": 0.6448244249080287, | |
| "eval_f1_micro": 0.6681550276835149, | |
| "eval_f1_weighted": 0.6628710932570111, | |
| "eval_loss": 0.892798125743866, | |
| "eval_runtime": 1305.0822, | |
| "eval_samples_per_second": 8.58, | |
| "eval_steps_per_second": 0.268, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.2153846153846154, | |
| "grad_norm": 32.9316520690918, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.864, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.2153846153846154, | |
| "eval_f1_macro": 0.6477608498148076, | |
| "eval_f1_micro": 0.6621718163957849, | |
| "eval_f1_weighted": 0.6618855058014609, | |
| "eval_loss": 0.8832775950431824, | |
| "eval_runtime": 1344.9811, | |
| "eval_samples_per_second": 8.326, | |
| "eval_steps_per_second": 0.26, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.23076923076923078, | |
| "grad_norm": 63.84526824951172, | |
| "learning_rate": 6.25e-06, | |
| "loss": 0.9499, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.23076923076923078, | |
| "eval_f1_macro": 0.646307301425064, | |
| "eval_f1_micro": 0.6585104482943382, | |
| "eval_f1_weighted": 0.659331813949001, | |
| "eval_loss": 0.8836826682090759, | |
| "eval_runtime": 1287.5574, | |
| "eval_samples_per_second": 8.697, | |
| "eval_steps_per_second": 0.272, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.24615384615384617, | |
| "grad_norm": 21.614879608154297, | |
| "learning_rate": 0.0, | |
| "loss": 0.9721, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.24615384615384617, | |
| "eval_f1_macro": 0.6475943444675745, | |
| "eval_f1_micro": 0.6614574031076978, | |
| "eval_f1_weighted": 0.6614511612983146, | |
| "eval_loss": 0.8807787895202637, | |
| "eval_runtime": 1313.0891, | |
| "eval_samples_per_second": 8.528, | |
| "eval_steps_per_second": 0.267, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.24615384615384617, | |
| "step": 400, | |
| "total_flos": 1.690284412871639e+17, | |
| "train_loss": 1.104334650039673, | |
| "train_runtime": 22485.3021, | |
| "train_samples_per_second": 0.569, | |
| "train_steps_per_second": 0.018 | |
| }, | |
| { | |
| "epoch": 0.24615384615384617, | |
| "eval_f1_macro": 0.6475943444675745, | |
| "eval_f1_micro": 0.6614574031076978, | |
| "eval_f1_weighted": 0.6614511612983146, | |
| "eval_loss": 0.8807787895202637, | |
| "eval_runtime": 1299.8097, | |
| "eval_samples_per_second": 8.615, | |
| "eval_steps_per_second": 0.269, | |
| "step": 400 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 400, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 25, | |
| "total_flos": 1.690284412871639e+17, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |