{ "best_metric": 0.6138996138996139, "best_model_checkpoint": "./Validated_cracks_raw_dataset_359_relabeled2_outputs/checkpoint-432", "epoch": 30.0, "eval_steps": 500, "global_step": 3240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.46296296296296297, "grad_norm": 11.11386489868164, "learning_rate": 2.7777777777777783e-06, "loss": 0.9464, "step": 50 }, { "epoch": 0.9259259259259259, "grad_norm": 22.558269500732422, "learning_rate": 5.864197530864199e-06, "loss": 0.9468, "step": 100 }, { "epoch": 1.0, "eval_accuracy": 0.6023166023166023, "eval_loss": 0.9880222678184509, "eval_runtime": 1.7676, "eval_samples_per_second": 146.524, "eval_steps_per_second": 18.669, "step": 108 }, { "epoch": 1.3888888888888888, "grad_norm": 23.05500030517578, "learning_rate": 8.950617283950618e-06, "loss": 0.9061, "step": 150 }, { "epoch": 1.8518518518518519, "grad_norm": 20.498937606811523, "learning_rate": 9.997164110237345e-06, "loss": 0.9275, "step": 200 }, { "epoch": 2.0, "eval_accuracy": 0.583011583011583, "eval_loss": 1.0344593524932861, "eval_runtime": 1.7521, "eval_samples_per_second": 147.825, "eval_steps_per_second": 18.835, "step": 216 }, { "epoch": 2.314814814814815, "grad_norm": 10.24534797668457, "learning_rate": 9.98206922906478e-06, "loss": 0.9077, "step": 250 }, { "epoch": 2.7777777777777777, "grad_norm": 62.0069580078125, "learning_rate": 9.954002016824226e-06, "loss": 0.9377, "step": 300 }, { "epoch": 3.0, "eval_accuracy": 0.5675675675675675, "eval_loss": 1.023147463798523, "eval_runtime": 1.7707, "eval_samples_per_second": 146.27, "eval_steps_per_second": 18.637, "step": 324 }, { "epoch": 3.240740740740741, "grad_norm": 19.093095779418945, "learning_rate": 9.91398066213228e-06, "loss": 0.8912, "step": 350 }, { "epoch": 3.7037037037037037, "grad_norm": 14.229964256286621, "learning_rate": 9.860476209695013e-06, "loss": 0.8461, "step": 400 }, { "epoch": 4.0, "eval_accuracy": 0.6138996138996139, "eval_loss": 0.9997061491012573, "eval_runtime": 1.7439, "eval_samples_per_second": 148.519, "eval_steps_per_second": 18.923, "step": 432 }, { "epoch": 4.166666666666667, "grad_norm": 25.24408721923828, "learning_rate": 9.794316030562418e-06, "loss": 0.8663, "step": 450 }, { "epoch": 4.62962962962963, "grad_norm": 27.978118896484375, "learning_rate": 9.715672392864017e-06, "loss": 0.8429, "step": 500 }, { "epoch": 5.0, "eval_accuracy": 0.5868725868725869, "eval_loss": 0.9872773885726929, "eval_runtime": 1.7504, "eval_samples_per_second": 147.962, "eval_steps_per_second": 18.852, "step": 540 }, { "epoch": 5.092592592592593, "grad_norm": 22.3253173828125, "learning_rate": 9.62475006920687e-06, "loss": 0.8246, "step": 550 }, { "epoch": 5.555555555555555, "grad_norm": 29.907615661621094, "learning_rate": 9.521785803487888e-06, "loss": 0.781, "step": 600 }, { "epoch": 6.0, "eval_accuracy": 0.6061776061776062, "eval_loss": 0.9970757365226746, "eval_runtime": 1.7424, "eval_samples_per_second": 148.643, "eval_steps_per_second": 18.939, "step": 648 }, { "epoch": 6.018518518518518, "grad_norm": 23.293292999267578, "learning_rate": 9.407047694459149e-06, "loss": 0.8409, "step": 650 }, { "epoch": 6.481481481481482, "grad_norm": 21.52410888671875, "learning_rate": 9.280834497651334e-06, "loss": 0.8137, "step": 700 }, { "epoch": 6.944444444444445, "grad_norm": 34.57286071777344, "learning_rate": 9.143474847472932e-06, "loss": 0.7621, "step": 750 }, { "epoch": 7.0, "eval_accuracy": 0.583011583011583, "eval_loss": 1.0092536211013794, "eval_runtime": 1.7382, "eval_samples_per_second": 149.004, "eval_steps_per_second": 18.985, "step": 756 }, { "epoch": 7.407407407407407, "grad_norm": 15.154962539672852, "learning_rate": 8.9953264015107e-06, "loss": 0.7191, "step": 800 }, { "epoch": 7.87037037037037, "grad_norm": 40.10356140136719, "learning_rate": 8.836774909259442e-06, "loss": 0.7649, "step": 850 }, { "epoch": 8.0, "eval_accuracy": 0.5675675675675675, "eval_loss": 1.0584689378738403, "eval_runtime": 1.7294, "eval_samples_per_second": 149.767, "eval_steps_per_second": 19.082, "step": 864 }, { "epoch": 8.333333333333334, "grad_norm": 65.0152587890625, "learning_rate": 8.668233207706014e-06, "loss": 0.7822, "step": 900 }, { "epoch": 8.796296296296296, "grad_norm": 27.71040916442871, "learning_rate": 8.490140146382767e-06, "loss": 0.7912, "step": 950 }, { "epoch": 9.0, "eval_accuracy": 0.5752895752895753, "eval_loss": 1.0433698892593384, "eval_runtime": 1.7507, "eval_samples_per_second": 147.94, "eval_steps_per_second": 18.85, "step": 972 }, { "epoch": 9.25925925925926, "grad_norm": 35.2958984375, "learning_rate": 8.302959444689442e-06, "loss": 0.7821, "step": 1000 }, { "epoch": 9.722222222222221, "grad_norm": 47.18446731567383, "learning_rate": 8.107178484458825e-06, "loss": 0.7529, "step": 1050 }, { "epoch": 10.0, "eval_accuracy": 0.61003861003861, "eval_loss": 1.022578477859497, "eval_runtime": 1.747, "eval_samples_per_second": 148.256, "eval_steps_per_second": 18.89, "step": 1080 }, { "epoch": 10.185185185185185, "grad_norm": 18.027130126953125, "learning_rate": 7.903307040910035e-06, "loss": 0.7478, "step": 1100 }, { "epoch": 10.648148148148149, "grad_norm": 17.87278175354004, "learning_rate": 7.691875955293851e-06, "loss": 0.6924, "step": 1150 }, { "epoch": 11.0, "eval_accuracy": 0.5868725868725869, "eval_loss": 1.0567034482955933, "eval_runtime": 1.7505, "eval_samples_per_second": 147.961, "eval_steps_per_second": 18.852, "step": 1188 }, { "epoch": 11.11111111111111, "grad_norm": 30.35451316833496, "learning_rate": 7.473435752686213e-06, "loss": 0.7193, "step": 1200 }, { "epoch": 11.574074074074074, "grad_norm": 27.861392974853516, "learning_rate": 7.248555208528913e-06, "loss": 0.7015, "step": 1250 }, { "epoch": 12.0, "eval_accuracy": 0.583011583011583, "eval_loss": 1.0600991249084473, "eval_runtime": 1.7432, "eval_samples_per_second": 148.579, "eval_steps_per_second": 18.931, "step": 1296 }, { "epoch": 12.037037037037036, "grad_norm": 16.875736236572266, "learning_rate": 7.017819867649898e-06, "loss": 0.7261, "step": 1300 }, { "epoch": 12.5, "grad_norm": 29.472204208374023, "learning_rate": 6.781830519619392e-06, "loss": 0.7286, "step": 1350 }, { "epoch": 12.962962962962964, "grad_norm": 79.9528579711914, "learning_rate": 6.541201634411704e-06, "loss": 0.7094, "step": 1400 }, { "epoch": 13.0, "eval_accuracy": 0.5637065637065637, "eval_loss": 1.0830413103103638, "eval_runtime": 1.7485, "eval_samples_per_second": 148.126, "eval_steps_per_second": 18.873, "step": 1404 }, { "epoch": 13.425925925925926, "grad_norm": 32.787437438964844, "learning_rate": 6.296559762445928e-06, "loss": 0.6587, "step": 1450 }, { "epoch": 13.88888888888889, "grad_norm": 21.805397033691406, "learning_rate": 6.048541903171552e-06, "loss": 0.749, "step": 1500 }, { "epoch": 14.0, "eval_accuracy": 0.5907335907335908, "eval_loss": 1.0383802652359009, "eval_runtime": 1.7396, "eval_samples_per_second": 148.885, "eval_steps_per_second": 18.97, "step": 1512 }, { "epoch": 14.351851851851851, "grad_norm": 14.21861457824707, "learning_rate": 5.79779384644684e-06, "loss": 0.6565, "step": 1550 }, { "epoch": 14.814814814814815, "grad_norm": 111.92208862304688, "learning_rate": 5.550041104970398e-06, "loss": 0.7433, "step": 1600 }, { "epoch": 15.0, "eval_accuracy": 0.5675675675675675, "eval_loss": 1.0580743551254272, "eval_runtime": 1.737, "eval_samples_per_second": 149.108, "eval_steps_per_second": 18.998, "step": 1620 }, { "epoch": 15.277777777777779, "grad_norm": 27.13836669921875, "learning_rate": 5.2958186601645375e-06, "loss": 0.7729, "step": 1650 }, { "epoch": 15.74074074074074, "grad_norm": 41.231163024902344, "learning_rate": 5.040825961590755e-06, "loss": 0.7595, "step": 1700 }, { "epoch": 16.0, "eval_accuracy": 0.5714285714285714, "eval_loss": 1.0641090869903564, "eval_runtime": 1.7544, "eval_samples_per_second": 147.629, "eval_steps_per_second": 18.81, "step": 1728 }, { "epoch": 16.203703703703702, "grad_norm": 18.411924362182617, "learning_rate": 4.785726960220769e-06, "loss": 0.7803, "step": 1750 }, { "epoch": 16.666666666666668, "grad_norm": 34.5395393371582, "learning_rate": 4.531185883817934e-06, "loss": 0.8132, "step": 1800 }, { "epoch": 17.0, "eval_accuracy": 0.5714285714285714, "eval_loss": 1.0863285064697266, "eval_runtime": 1.7323, "eval_samples_per_second": 149.51, "eval_steps_per_second": 19.05, "step": 1836 }, { "epoch": 17.12962962962963, "grad_norm": 30.300302505493164, "learning_rate": 4.277865507418416e-06, "loss": 0.8507, "step": 1850 }, { "epoch": 17.59259259259259, "grad_norm": 19.417051315307617, "learning_rate": 4.026425427595017e-06, "loss": 0.7951, "step": 1900 }, { "epoch": 18.0, "eval_accuracy": 0.5868725868725869, "eval_loss": 1.0517489910125732, "eval_runtime": 1.7405, "eval_samples_per_second": 148.809, "eval_steps_per_second": 18.96, "step": 1944 }, { "epoch": 18.055555555555557, "grad_norm": 20.044008255004883, "learning_rate": 3.777520344997052e-06, "loss": 0.8131, "step": 1950 }, { "epoch": 18.51851851851852, "grad_norm": 29.19565200805664, "learning_rate": 3.5317983596382976e-06, "loss": 0.7714, "step": 2000 }, { "epoch": 18.98148148148148, "grad_norm": 32.99634552001953, "learning_rate": 3.289899283371657e-06, "loss": 0.7861, "step": 2050 }, { "epoch": 19.0, "eval_accuracy": 0.5907335907335908, "eval_loss": 1.0631417036056519, "eval_runtime": 1.739, "eval_samples_per_second": 148.94, "eval_steps_per_second": 18.977, "step": 2052 }, { "epoch": 19.444444444444443, "grad_norm": 20.90323829650879, "learning_rate": 3.052452973944603e-06, "loss": 0.7129, "step": 2100 }, { "epoch": 19.90740740740741, "grad_norm": 40.24602508544922, "learning_rate": 2.8200776949731435e-06, "loss": 0.7334, "step": 2150 }, { "epoch": 20.0, "eval_accuracy": 0.5907335907335908, "eval_loss": 1.0507447719573975, "eval_runtime": 1.893, "eval_samples_per_second": 136.818, "eval_steps_per_second": 17.432, "step": 2160 }, { "epoch": 20.37037037037037, "grad_norm": 19.65435028076172, "learning_rate": 2.5933785061046333e-06, "loss": 0.7563, "step": 2200 }, { "epoch": 20.833333333333332, "grad_norm": 96.42213439941406, "learning_rate": 2.372945687561132e-06, "loss": 0.6807, "step": 2250 }, { "epoch": 21.0, "eval_accuracy": 0.5791505791505791, "eval_loss": 1.0659432411193848, "eval_runtime": 1.7425, "eval_samples_per_second": 148.637, "eval_steps_per_second": 18.938, "step": 2268 }, { "epoch": 21.296296296296298, "grad_norm": 30.792505264282227, "learning_rate": 2.159353203165484e-06, "loss": 0.7706, "step": 2300 }, { "epoch": 21.75925925925926, "grad_norm": 62.397857666015625, "learning_rate": 1.9531572058521074e-06, "loss": 0.6905, "step": 2350 }, { "epoch": 22.0, "eval_accuracy": 0.5752895752895753, "eval_loss": 1.0680582523345947, "eval_runtime": 1.7542, "eval_samples_per_second": 147.645, "eval_steps_per_second": 18.812, "step": 2376 }, { "epoch": 22.22222222222222, "grad_norm": 17.79286766052246, "learning_rate": 1.754894589553836e-06, "loss": 0.763, "step": 2400 }, { "epoch": 22.685185185185187, "grad_norm": 20.265037536621094, "learning_rate": 1.5650815912354285e-06, "loss": 0.7242, "step": 2450 }, { "epoch": 23.0, "eval_accuracy": 0.5907335907335908, "eval_loss": 1.0529667139053345, "eval_runtime": 1.7502, "eval_samples_per_second": 147.984, "eval_steps_per_second": 18.855, "step": 2484 }, { "epoch": 23.14814814814815, "grad_norm": 43.985389709472656, "learning_rate": 1.3842124467137664e-06, "loss": 0.7003, "step": 2500 }, { "epoch": 23.61111111111111, "grad_norm": 58.851200103759766, "learning_rate": 1.212758103764733e-06, "loss": 0.7342, "step": 2550 }, { "epoch": 24.0, "eval_accuracy": 0.5637065637065637, "eval_loss": 1.0659518241882324, "eval_runtime": 1.7466, "eval_samples_per_second": 148.291, "eval_steps_per_second": 18.894, "step": 2592 }, { "epoch": 24.074074074074073, "grad_norm": 34.4920654296875, "learning_rate": 1.0511649958675745e-06, "loss": 0.7262, "step": 2600 }, { "epoch": 24.537037037037038, "grad_norm": 29.049121856689453, "learning_rate": 8.998538797796879e-07, "loss": 0.6924, "step": 2650 }, { "epoch": 25.0, "grad_norm": 54.39902877807617, "learning_rate": 7.592187399685524e-07, "loss": 0.7736, "step": 2700 }, { "epoch": 25.0, "eval_accuracy": 0.5791505791505791, "eval_loss": 1.070913314819336, "eval_runtime": 1.7438, "eval_samples_per_second": 148.528, "eval_steps_per_second": 18.924, "step": 2700 }, { "epoch": 25.462962962962962, "grad_norm": 41.184539794921875, "learning_rate": 6.296257627534552e-07, "loss": 0.6561, "step": 2750 }, { "epoch": 25.925925925925927, "grad_norm": 57.05064010620117, "learning_rate": 5.114123828281398e-07, "loss": 0.7798, "step": 2800 }, { "epoch": 26.0, "eval_accuracy": 0.5714285714285714, "eval_loss": 1.0627210140228271, "eval_runtime": 1.737, "eval_samples_per_second": 149.108, "eval_steps_per_second": 18.998, "step": 2808 }, { "epoch": 26.38888888888889, "grad_norm": 29.81525993347168, "learning_rate": 4.048864046470502e-07, "loss": 0.7423, "step": 2850 }, { "epoch": 26.85185185185185, "grad_norm": 36.73592758178711, "learning_rate": 3.1032520096290817e-07, "loss": 0.71, "step": 2900 }, { "epoch": 27.0, "eval_accuracy": 0.5868725868725869, "eval_loss": 1.060496211051941, "eval_runtime": 1.7352, "eval_samples_per_second": 149.263, "eval_steps_per_second": 19.018, "step": 2916 }, { "epoch": 27.314814814814813, "grad_norm": 63.34288024902344, "learning_rate": 2.2797499060246253e-07, "loss": 0.6777, "step": 2950 }, { "epoch": 27.77777777777778, "grad_norm": 25.37461280822754, "learning_rate": 1.5805019736097105e-07, "loss": 0.706, "step": 3000 }, { "epoch": 28.0, "eval_accuracy": 0.5791505791505791, "eval_loss": 1.0605989694595337, "eval_runtime": 1.7369, "eval_samples_per_second": 149.116, "eval_steps_per_second": 18.999, "step": 3024 }, { "epoch": 28.24074074074074, "grad_norm": 42.660560607910156, "learning_rate": 1.007328916846817e-07, "loss": 0.7275, "step": 3050 }, { "epoch": 28.703703703703702, "grad_norm": 31.769351959228516, "learning_rate": 5.617231659511446e-08, "loss": 0.7421, "step": 3100 }, { "epoch": 29.0, "eval_accuracy": 0.5752895752895753, "eval_loss": 1.0606402158737183, "eval_runtime": 1.7325, "eval_samples_per_second": 149.494, "eval_steps_per_second": 19.048, "step": 3132 }, { "epoch": 29.166666666666668, "grad_norm": 23.639081954956055, "learning_rate": 2.448449908949879e-08, "loss": 0.6976, "step": 3150 }, { "epoch": 29.62962962962963, "grad_norm": 30.702539443969727, "learning_rate": 5.751948029234511e-09, "loss": 0.654, "step": 3200 }, { "epoch": 30.0, "eval_accuracy": 0.5752895752895753, "eval_loss": 1.060823917388916, "eval_runtime": 3.0177, "eval_samples_per_second": 85.826, "eval_steps_per_second": 10.935, "step": 3240 }, { "epoch": 30.0, "step": 3240, "total_flos": 2.0405460235862016e+18, "train_loss": 0.7678916913491708, "train_runtime": 680.1267, "train_samples_per_second": 37.934, "train_steps_per_second": 4.764 } ], "logging_steps": 50, "max_steps": 3240, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.0405460235862016e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }