{ "best_metric": 0.7836363636363637, "best_model_checkpoint": "/content/drive/MyDrive/autoTaggingProject/ViT/General/Features/NeckLine/Results/model_2024-10-16_test/checkpoint-5192", "epoch": 11.0, "eval_steps": 500, "global_step": 5192, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1059322033898305, "grad_norm": 1.528609037399292, "learning_rate": 9.964689265536724e-06, "loss": 2.3665, "step": 50 }, { "epoch": 0.211864406779661, "grad_norm": 1.4840689897537231, "learning_rate": 9.929378531073447e-06, "loss": 2.2771, "step": 100 }, { "epoch": 0.3177966101694915, "grad_norm": 1.6797162294387817, "learning_rate": 9.89406779661017e-06, "loss": 2.1584, "step": 150 }, { "epoch": 0.423728813559322, "grad_norm": 1.5869511365890503, "learning_rate": 9.858757062146892e-06, "loss": 2.0337, "step": 200 }, { "epoch": 0.5296610169491526, "grad_norm": 1.625272512435913, "learning_rate": 9.823446327683617e-06, "loss": 1.945, "step": 250 }, { "epoch": 0.635593220338983, "grad_norm": 1.918320655822754, "learning_rate": 9.788135593220339e-06, "loss": 1.8575, "step": 300 }, { "epoch": 0.7415254237288136, "grad_norm": 2.73588490486145, "learning_rate": 9.752824858757062e-06, "loss": 1.795, "step": 350 }, { "epoch": 0.847457627118644, "grad_norm": 1.978835105895996, "learning_rate": 9.717514124293787e-06, "loss": 1.7206, "step": 400 }, { "epoch": 0.9533898305084746, "grad_norm": 2.827810764312744, "learning_rate": 9.682203389830509e-06, "loss": 1.643, "step": 450 }, { "epoch": 1.0, "eval_accuracy": 0.5754545454545454, "eval_loss": 1.6228725910186768, "eval_runtime": 14.283, "eval_samples_per_second": 77.014, "eval_steps_per_second": 2.45, "step": 472 }, { "epoch": 1.0593220338983051, "grad_norm": 2.191129207611084, "learning_rate": 9.646892655367232e-06, "loss": 1.5876, "step": 500 }, { "epoch": 1.1652542372881356, "grad_norm": 2.16697359085083, "learning_rate": 9.611581920903955e-06, "loss": 1.5416, "step": 550 }, { "epoch": 1.271186440677966, "grad_norm": 2.1197969913482666, "learning_rate": 9.576271186440679e-06, "loss": 1.5015, "step": 600 }, { "epoch": 1.3771186440677967, "grad_norm": 2.465144634246826, "learning_rate": 9.540960451977402e-06, "loss": 1.4279, "step": 650 }, { "epoch": 1.4830508474576272, "grad_norm": 1.8128849267959595, "learning_rate": 9.505649717514125e-06, "loss": 1.4298, "step": 700 }, { "epoch": 1.5889830508474576, "grad_norm": 2.8659541606903076, "learning_rate": 9.470338983050848e-06, "loss": 1.3816, "step": 750 }, { "epoch": 1.694915254237288, "grad_norm": 2.582030773162842, "learning_rate": 9.435028248587572e-06, "loss": 1.3254, "step": 800 }, { "epoch": 1.8008474576271185, "grad_norm": 2.6557815074920654, "learning_rate": 9.399717514124295e-06, "loss": 1.2937, "step": 850 }, { "epoch": 1.9067796610169492, "grad_norm": 3.4831998348236084, "learning_rate": 9.364406779661017e-06, "loss": 1.2511, "step": 900 }, { "epoch": 2.0, "eval_accuracy": 0.6927272727272727, "eval_loss": 1.3029816150665283, "eval_runtime": 14.6712, "eval_samples_per_second": 74.977, "eval_steps_per_second": 2.386, "step": 944 }, { "epoch": 2.01271186440678, "grad_norm": 2.9632270336151123, "learning_rate": 9.329096045197742e-06, "loss": 1.2574, "step": 950 }, { "epoch": 2.1186440677966103, "grad_norm": 4.297321796417236, "learning_rate": 9.293785310734465e-06, "loss": 1.213, "step": 1000 }, { "epoch": 2.2245762711864407, "grad_norm": 5.289111614227295, "learning_rate": 9.258474576271187e-06, "loss": 1.2127, "step": 1050 }, { "epoch": 2.330508474576271, "grad_norm": 5.046447277069092, "learning_rate": 9.22316384180791e-06, "loss": 1.1403, "step": 1100 }, { "epoch": 2.4364406779661016, "grad_norm": 3.322784662246704, "learning_rate": 9.187853107344633e-06, "loss": 1.1244, "step": 1150 }, { "epoch": 2.542372881355932, "grad_norm": 2.8034584522247314, "learning_rate": 9.152542372881356e-06, "loss": 1.1052, "step": 1200 }, { "epoch": 2.648305084745763, "grad_norm": 2.822523593902588, "learning_rate": 9.11723163841808e-06, "loss": 1.0929, "step": 1250 }, { "epoch": 2.7542372881355934, "grad_norm": 4.488712787628174, "learning_rate": 9.081920903954803e-06, "loss": 1.0991, "step": 1300 }, { "epoch": 2.860169491525424, "grad_norm": 4.986589431762695, "learning_rate": 9.046610169491526e-06, "loss": 1.0464, "step": 1350 }, { "epoch": 2.9661016949152543, "grad_norm": 4.071073532104492, "learning_rate": 9.01129943502825e-06, "loss": 1.0532, "step": 1400 }, { "epoch": 3.0, "eval_accuracy": 0.7209090909090909, "eval_loss": 1.0850567817687988, "eval_runtime": 17.8307, "eval_samples_per_second": 61.691, "eval_steps_per_second": 1.963, "step": 1416 }, { "epoch": 3.0720338983050848, "grad_norm": 4.750606536865234, "learning_rate": 8.975988700564973e-06, "loss": 1.0453, "step": 1450 }, { "epoch": 3.1779661016949152, "grad_norm": 3.7604198455810547, "learning_rate": 8.940677966101694e-06, "loss": 1.0231, "step": 1500 }, { "epoch": 3.2838983050847457, "grad_norm": 5.222371578216553, "learning_rate": 8.90536723163842e-06, "loss": 0.9812, "step": 1550 }, { "epoch": 3.389830508474576, "grad_norm": 6.1730570793151855, "learning_rate": 8.870056497175143e-06, "loss": 0.9771, "step": 1600 }, { "epoch": 3.4957627118644066, "grad_norm": 5.553199291229248, "learning_rate": 8.834745762711864e-06, "loss": 0.9682, "step": 1650 }, { "epoch": 3.601694915254237, "grad_norm": 4.2731451988220215, "learning_rate": 8.79943502824859e-06, "loss": 1.0006, "step": 1700 }, { "epoch": 3.707627118644068, "grad_norm": 3.617053985595703, "learning_rate": 8.764124293785311e-06, "loss": 0.9154, "step": 1750 }, { "epoch": 3.8135593220338984, "grad_norm": 2.5833144187927246, "learning_rate": 8.728813559322034e-06, "loss": 0.935, "step": 1800 }, { "epoch": 3.919491525423729, "grad_norm": 3.1606085300445557, "learning_rate": 8.693502824858758e-06, "loss": 0.9216, "step": 1850 }, { "epoch": 4.0, "eval_accuracy": 0.74, "eval_loss": 0.9795950651168823, "eval_runtime": 16.0161, "eval_samples_per_second": 68.681, "eval_steps_per_second": 2.185, "step": 1888 }, { "epoch": 4.02542372881356, "grad_norm": 3.959052801132202, "learning_rate": 8.65819209039548e-06, "loss": 0.9288, "step": 1900 }, { "epoch": 4.13135593220339, "grad_norm": 3.6853768825531006, "learning_rate": 8.622881355932204e-06, "loss": 0.879, "step": 1950 }, { "epoch": 4.237288135593221, "grad_norm": 3.1763620376586914, "learning_rate": 8.587570621468927e-06, "loss": 0.8912, "step": 2000 }, { "epoch": 4.343220338983051, "grad_norm": 4.019489288330078, "learning_rate": 8.55225988700565e-06, "loss": 0.9077, "step": 2050 }, { "epoch": 4.4491525423728815, "grad_norm": 4.087663650512695, "learning_rate": 8.516949152542372e-06, "loss": 0.8812, "step": 2100 }, { "epoch": 4.555084745762712, "grad_norm": 3.6994051933288574, "learning_rate": 8.481638418079097e-06, "loss": 0.8344, "step": 2150 }, { "epoch": 4.661016949152542, "grad_norm": 2.284302234649658, "learning_rate": 8.44632768361582e-06, "loss": 0.8501, "step": 2200 }, { "epoch": 4.766949152542373, "grad_norm": 4.348343372344971, "learning_rate": 8.411016949152542e-06, "loss": 0.8712, "step": 2250 }, { "epoch": 4.872881355932203, "grad_norm": 4.996354103088379, "learning_rate": 8.375706214689267e-06, "loss": 0.8264, "step": 2300 }, { "epoch": 4.978813559322034, "grad_norm": 3.136770486831665, "learning_rate": 8.340395480225989e-06, "loss": 0.843, "step": 2350 }, { "epoch": 5.0, "eval_accuracy": 0.7390909090909091, "eval_loss": 0.9454855918884277, "eval_runtime": 15.0504, "eval_samples_per_second": 73.088, "eval_steps_per_second": 2.326, "step": 2360 }, { "epoch": 5.084745762711864, "grad_norm": 6.956203937530518, "learning_rate": 8.305084745762712e-06, "loss": 0.8094, "step": 2400 }, { "epoch": 5.190677966101695, "grad_norm": 6.91636323928833, "learning_rate": 8.269774011299437e-06, "loss": 0.8301, "step": 2450 }, { "epoch": 5.296610169491525, "grad_norm": 2.561798334121704, "learning_rate": 8.234463276836159e-06, "loss": 0.8562, "step": 2500 }, { "epoch": 5.4025423728813555, "grad_norm": 4.503079891204834, "learning_rate": 8.199152542372882e-06, "loss": 0.7487, "step": 2550 }, { "epoch": 5.508474576271187, "grad_norm": 3.560302257537842, "learning_rate": 8.163841807909605e-06, "loss": 0.8222, "step": 2600 }, { "epoch": 5.614406779661017, "grad_norm": 6.565722465515137, "learning_rate": 8.128531073446328e-06, "loss": 0.7917, "step": 2650 }, { "epoch": 5.720338983050848, "grad_norm": 7.790140151977539, "learning_rate": 8.093220338983052e-06, "loss": 0.7764, "step": 2700 }, { "epoch": 5.826271186440678, "grad_norm": 4.017592430114746, "learning_rate": 8.057909604519775e-06, "loss": 0.7718, "step": 2750 }, { "epoch": 5.932203389830509, "grad_norm": 6.110499382019043, "learning_rate": 8.022598870056498e-06, "loss": 0.7445, "step": 2800 }, { "epoch": 6.0, "eval_accuracy": 0.7527272727272727, "eval_loss": 0.8885732889175415, "eval_runtime": 15.0594, "eval_samples_per_second": 73.044, "eval_steps_per_second": 2.324, "step": 2832 }, { "epoch": 6.038135593220339, "grad_norm": 4.600073337554932, "learning_rate": 7.987288135593222e-06, "loss": 0.7767, "step": 2850 }, { "epoch": 6.1440677966101696, "grad_norm": 5.356403827667236, "learning_rate": 7.951977401129945e-06, "loss": 0.708, "step": 2900 }, { "epoch": 6.25, "grad_norm": 5.1397223472595215, "learning_rate": 7.917372881355932e-06, "loss": 0.7397, "step": 2950 }, { "epoch": 6.3559322033898304, "grad_norm": 6.384206771850586, "learning_rate": 7.882062146892657e-06, "loss": 0.7523, "step": 3000 }, { "epoch": 6.461864406779661, "grad_norm": 5.545274257659912, "learning_rate": 7.846751412429378e-06, "loss": 0.7011, "step": 3050 }, { "epoch": 6.567796610169491, "grad_norm": 9.406649589538574, "learning_rate": 7.811440677966102e-06, "loss": 0.7975, "step": 3100 }, { "epoch": 6.673728813559322, "grad_norm": 7.81419563293457, "learning_rate": 7.776129943502827e-06, "loss": 0.7385, "step": 3150 }, { "epoch": 6.779661016949152, "grad_norm": 3.415956974029541, "learning_rate": 7.740819209039548e-06, "loss": 0.7356, "step": 3200 }, { "epoch": 6.885593220338983, "grad_norm": 5.487062931060791, "learning_rate": 7.705508474576271e-06, "loss": 0.7121, "step": 3250 }, { "epoch": 6.991525423728813, "grad_norm": 5.682718276977539, "learning_rate": 7.670197740112995e-06, "loss": 0.7191, "step": 3300 }, { "epoch": 7.0, "eval_accuracy": 0.7545454545454545, "eval_loss": 0.8240677118301392, "eval_runtime": 14.7678, "eval_samples_per_second": 74.486, "eval_steps_per_second": 2.37, "step": 3304 }, { "epoch": 7.0974576271186445, "grad_norm": 8.319087028503418, "learning_rate": 7.634887005649718e-06, "loss": 0.7096, "step": 3350 }, { "epoch": 7.203389830508475, "grad_norm": 5.857816696166992, "learning_rate": 7.599576271186442e-06, "loss": 0.7006, "step": 3400 }, { "epoch": 7.309322033898305, "grad_norm": 4.400519371032715, "learning_rate": 7.564265536723165e-06, "loss": 0.706, "step": 3450 }, { "epoch": 7.415254237288136, "grad_norm": 4.573615550994873, "learning_rate": 7.528954802259888e-06, "loss": 0.6543, "step": 3500 }, { "epoch": 7.521186440677966, "grad_norm": 7.545746803283691, "learning_rate": 7.49364406779661e-06, "loss": 0.6498, "step": 3550 }, { "epoch": 7.627118644067797, "grad_norm": 6.38883638381958, "learning_rate": 7.4583333333333345e-06, "loss": 0.6849, "step": 3600 }, { "epoch": 7.733050847457627, "grad_norm": 4.496486186981201, "learning_rate": 7.423022598870057e-06, "loss": 0.6741, "step": 3650 }, { "epoch": 7.838983050847458, "grad_norm": 5.2381792068481445, "learning_rate": 7.38771186440678e-06, "loss": 0.6441, "step": 3700 }, { "epoch": 7.944915254237288, "grad_norm": 6.047347068786621, "learning_rate": 7.3524011299435035e-06, "loss": 0.7089, "step": 3750 }, { "epoch": 8.0, "eval_accuracy": 0.7718181818181818, "eval_loss": 0.845079779624939, "eval_runtime": 14.9091, "eval_samples_per_second": 73.78, "eval_steps_per_second": 2.348, "step": 3776 }, { "epoch": 8.05084745762712, "grad_norm": 5.383782386779785, "learning_rate": 7.317090395480226e-06, "loss": 0.6375, "step": 3800 }, { "epoch": 8.15677966101695, "grad_norm": 7.663337707519531, "learning_rate": 7.28177966101695e-06, "loss": 0.6101, "step": 3850 }, { "epoch": 8.26271186440678, "grad_norm": 4.593461036682129, "learning_rate": 7.2464689265536725e-06, "loss": 0.6356, "step": 3900 }, { "epoch": 8.36864406779661, "grad_norm": 7.878734111785889, "learning_rate": 7.211158192090396e-06, "loss": 0.6412, "step": 3950 }, { "epoch": 8.474576271186441, "grad_norm": 3.501059055328369, "learning_rate": 7.17584745762712e-06, "loss": 0.6296, "step": 4000 }, { "epoch": 8.580508474576272, "grad_norm": 3.8199708461761475, "learning_rate": 7.140536723163842e-06, "loss": 0.654, "step": 4050 }, { "epoch": 8.686440677966102, "grad_norm": 6.42057466506958, "learning_rate": 7.105225988700566e-06, "loss": 0.6378, "step": 4100 }, { "epoch": 8.792372881355933, "grad_norm": 6.310295104980469, "learning_rate": 7.069915254237288e-06, "loss": 0.6235, "step": 4150 }, { "epoch": 8.898305084745763, "grad_norm": 4.627810478210449, "learning_rate": 7.034604519774012e-06, "loss": 0.6631, "step": 4200 }, { "epoch": 9.0, "eval_accuracy": 0.77, "eval_loss": 0.8626542687416077, "eval_runtime": 15.548, "eval_samples_per_second": 70.749, "eval_steps_per_second": 2.251, "step": 4248 }, { "epoch": 9.004237288135593, "grad_norm": 5.442898273468018, "learning_rate": 6.999293785310735e-06, "loss": 0.6358, "step": 4250 }, { "epoch": 9.110169491525424, "grad_norm": 5.042696475982666, "learning_rate": 6.963983050847458e-06, "loss": 0.6183, "step": 4300 }, { "epoch": 9.216101694915254, "grad_norm": 5.006898403167725, "learning_rate": 6.928672316384182e-06, "loss": 0.6438, "step": 4350 }, { "epoch": 9.322033898305085, "grad_norm": 6.093140125274658, "learning_rate": 6.893361581920905e-06, "loss": 0.5826, "step": 4400 }, { "epoch": 9.427966101694915, "grad_norm": 4.637847900390625, "learning_rate": 6.858050847457628e-06, "loss": 0.5559, "step": 4450 }, { "epoch": 9.533898305084746, "grad_norm": 2.860111951828003, "learning_rate": 6.82274011299435e-06, "loss": 0.5577, "step": 4500 }, { "epoch": 9.639830508474576, "grad_norm": 10.876856803894043, "learning_rate": 6.7874293785310745e-06, "loss": 0.6233, "step": 4550 }, { "epoch": 9.745762711864407, "grad_norm": 5.635727882385254, "learning_rate": 6.752118644067798e-06, "loss": 0.5703, "step": 4600 }, { "epoch": 9.851694915254237, "grad_norm": 6.9388532638549805, "learning_rate": 6.71680790960452e-06, "loss": 0.6323, "step": 4650 }, { "epoch": 9.957627118644067, "grad_norm": 7.485644340515137, "learning_rate": 6.6814971751412435e-06, "loss": 0.6021, "step": 4700 }, { "epoch": 10.0, "eval_accuracy": 0.7772727272727272, "eval_loss": 0.8030957579612732, "eval_runtime": 14.6023, "eval_samples_per_second": 75.331, "eval_steps_per_second": 2.397, "step": 4720 }, { "epoch": 10.063559322033898, "grad_norm": 13.398885726928711, "learning_rate": 6.646186440677966e-06, "loss": 0.5698, "step": 4750 }, { "epoch": 10.169491525423728, "grad_norm": 7.821059226989746, "learning_rate": 6.61087570621469e-06, "loss": 0.5364, "step": 4800 }, { "epoch": 10.275423728813559, "grad_norm": 7.535600662231445, "learning_rate": 6.576271186440678e-06, "loss": 0.5959, "step": 4850 }, { "epoch": 10.38135593220339, "grad_norm": 8.605109214782715, "learning_rate": 6.540960451977402e-06, "loss": 0.5837, "step": 4900 }, { "epoch": 10.48728813559322, "grad_norm": 5.240401268005371, "learning_rate": 6.505649717514125e-06, "loss": 0.5796, "step": 4950 }, { "epoch": 10.59322033898305, "grad_norm": 6.025055408477783, "learning_rate": 6.4703389830508476e-06, "loss": 0.5749, "step": 5000 }, { "epoch": 10.69915254237288, "grad_norm": 6.605931282043457, "learning_rate": 6.435028248587572e-06, "loss": 0.5185, "step": 5050 }, { "epoch": 10.805084745762711, "grad_norm": 3.71102237701416, "learning_rate": 6.399717514124294e-06, "loss": 0.5289, "step": 5100 }, { "epoch": 10.911016949152543, "grad_norm": 4.973482608795166, "learning_rate": 6.3644067796610174e-06, "loss": 0.5691, "step": 5150 }, { "epoch": 11.0, "eval_accuracy": 0.7836363636363637, "eval_loss": 0.8530685305595398, "eval_runtime": 14.8369, "eval_samples_per_second": 74.14, "eval_steps_per_second": 2.359, "step": 5192 } ], "logging_steps": 50, "max_steps": 14160, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.286478541133356e+19, "train_batch_size": 32, "trial_name": null, "trial_params": null }