{ "best_metric": 0.4469132423400879, "best_model_checkpoint": "xblock-base-patch1-224/checkpoint-2253", "epoch": 3.0, "eval_steps": 500, "global_step": 2253, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "grad_norm": 9.94556713104248, "learning_rate": 5.088495575221239e-06, "loss": 1.5614, "step": 25 }, { "epoch": 0.07, "grad_norm": 9.733675956726074, "learning_rate": 1.0619469026548673e-05, "loss": 1.4735, "step": 50 }, { "epoch": 0.1, "grad_norm": 9.123199462890625, "learning_rate": 1.6150442477876106e-05, "loss": 1.3293, "step": 75 }, { "epoch": 0.13, "grad_norm": 10.894118309020996, "learning_rate": 2.1681415929203542e-05, "loss": 1.2442, "step": 100 }, { "epoch": 0.17, "grad_norm": 8.819581985473633, "learning_rate": 2.721238938053097e-05, "loss": 1.0742, "step": 125 }, { "epoch": 0.2, "grad_norm": 13.125239372253418, "learning_rate": 3.274336283185841e-05, "loss": 1.1571, "step": 150 }, { "epoch": 0.23, "grad_norm": 8.423856735229492, "learning_rate": 3.827433628318584e-05, "loss": 1.1193, "step": 175 }, { "epoch": 0.27, "grad_norm": 11.360130310058594, "learning_rate": 4.380530973451328e-05, "loss": 1.0605, "step": 200 }, { "epoch": 0.3, "grad_norm": 7.3759379386901855, "learning_rate": 4.9336283185840707e-05, "loss": 0.9883, "step": 225 }, { "epoch": 0.33, "grad_norm": 12.12755298614502, "learning_rate": 4.9457326097681306e-05, "loss": 0.9764, "step": 250 }, { "epoch": 0.37, "grad_norm": 5.0697712898254395, "learning_rate": 4.884065120868278e-05, "loss": 1.1213, "step": 275 }, { "epoch": 0.4, "grad_norm": 8.178693771362305, "learning_rate": 4.822397631968427e-05, "loss": 0.9683, "step": 300 }, { "epoch": 0.43, "grad_norm": 7.6824846267700195, "learning_rate": 4.7607301430685744e-05, "loss": 1.0408, "step": 325 }, { "epoch": 0.47, "grad_norm": 10.855768203735352, "learning_rate": 4.699062654168723e-05, "loss": 0.9092, "step": 350 }, { "epoch": 0.5, "grad_norm": 6.187016487121582, "learning_rate": 4.6373951652688704e-05, "loss": 1.0058, "step": 375 }, { "epoch": 0.53, "grad_norm": 8.00668716430664, "learning_rate": 4.575727676369018e-05, "loss": 0.8086, "step": 400 }, { "epoch": 0.57, "grad_norm": 5.1038336753845215, "learning_rate": 4.5140601874691665e-05, "loss": 0.9027, "step": 425 }, { "epoch": 0.6, "grad_norm": 10.569095611572266, "learning_rate": 4.452392698569314e-05, "loss": 0.8234, "step": 450 }, { "epoch": 0.63, "grad_norm": 8.256200790405273, "learning_rate": 4.3907252096694626e-05, "loss": 0.9163, "step": 475 }, { "epoch": 0.67, "grad_norm": 11.042557716369629, "learning_rate": 4.32905772076961e-05, "loss": 0.8243, "step": 500 }, { "epoch": 0.7, "grad_norm": 18.462459564208984, "learning_rate": 4.267390231869759e-05, "loss": 0.8703, "step": 525 }, { "epoch": 0.73, "grad_norm": 12.024648666381836, "learning_rate": 4.2057227429699064e-05, "loss": 0.991, "step": 550 }, { "epoch": 0.77, "grad_norm": 11.26354694366455, "learning_rate": 4.144055254070055e-05, "loss": 0.8402, "step": 575 }, { "epoch": 0.8, "grad_norm": 8.839094161987305, "learning_rate": 4.0823877651702024e-05, "loss": 0.7653, "step": 600 }, { "epoch": 0.83, "grad_norm": 10.799356460571289, "learning_rate": 4.020720276270351e-05, "loss": 0.7787, "step": 625 }, { "epoch": 0.87, "grad_norm": 12.935748100280762, "learning_rate": 3.9590527873704985e-05, "loss": 0.7738, "step": 650 }, { "epoch": 0.9, "grad_norm": 4.829887866973877, "learning_rate": 3.897385298470647e-05, "loss": 0.8329, "step": 675 }, { "epoch": 0.93, "grad_norm": 4.532620429992676, "learning_rate": 3.8357178095707946e-05, "loss": 0.9689, "step": 700 }, { "epoch": 0.97, "grad_norm": 8.8261079788208, "learning_rate": 3.774050320670943e-05, "loss": 0.7615, "step": 725 }, { "epoch": 1.0, "grad_norm": 3.9906632900238037, "learning_rate": 3.712382831771091e-05, "loss": 0.8284, "step": 750 }, { "epoch": 1.0, "eval_accuracy": 0.7503328894806924, "eval_f1_macro": 0.7379493476306923, "eval_f1_micro": 0.7503328894806925, "eval_f1_weighted": 0.7478927601803307, "eval_loss": 0.6315314769744873, "eval_precision_macro": 0.7619988736851466, "eval_precision_micro": 0.7503328894806924, "eval_precision_weighted": 0.7668596523942972, "eval_recall_macro": 0.7356084437086093, "eval_recall_micro": 0.7503328894806924, "eval_recall_weighted": 0.7503328894806924, "eval_runtime": 103.3083, "eval_samples_per_second": 14.539, "eval_steps_per_second": 0.91, "step": 751 }, { "epoch": 1.03, "grad_norm": 7.72317361831665, "learning_rate": 3.650715342871239e-05, "loss": 0.8196, "step": 775 }, { "epoch": 1.07, "grad_norm": 6.5625152587890625, "learning_rate": 3.589047853971386e-05, "loss": 0.7794, "step": 800 }, { "epoch": 1.1, "grad_norm": 5.599926948547363, "learning_rate": 3.5273803650715344e-05, "loss": 0.6632, "step": 825 }, { "epoch": 1.13, "grad_norm": 9.425518989562988, "learning_rate": 3.465712876171682e-05, "loss": 0.7074, "step": 850 }, { "epoch": 1.17, "grad_norm": 8.80082893371582, "learning_rate": 3.4040453872718305e-05, "loss": 0.7325, "step": 875 }, { "epoch": 1.2, "grad_norm": 11.81970500946045, "learning_rate": 3.342377898371978e-05, "loss": 0.8574, "step": 900 }, { "epoch": 1.23, "grad_norm": 8.871489524841309, "learning_rate": 3.2807104094721266e-05, "loss": 0.8407, "step": 925 }, { "epoch": 1.26, "grad_norm": 7.296131610870361, "learning_rate": 3.219042920572274e-05, "loss": 0.6962, "step": 950 }, { "epoch": 1.3, "grad_norm": 8.161062240600586, "learning_rate": 3.1573754316724227e-05, "loss": 0.8163, "step": 975 }, { "epoch": 1.33, "grad_norm": 14.00735092163086, "learning_rate": 3.0957079427725704e-05, "loss": 0.6706, "step": 1000 }, { "epoch": 1.36, "grad_norm": 10.980608940124512, "learning_rate": 3.0340404538727184e-05, "loss": 0.6639, "step": 1025 }, { "epoch": 1.4, "grad_norm": 6.72366189956665, "learning_rate": 2.9723729649728664e-05, "loss": 0.7, "step": 1050 }, { "epoch": 1.43, "grad_norm": 24.283390045166016, "learning_rate": 2.9107054760730145e-05, "loss": 0.7156, "step": 1075 }, { "epoch": 1.46, "grad_norm": 4.696579933166504, "learning_rate": 2.8490379871731625e-05, "loss": 0.5984, "step": 1100 }, { "epoch": 1.5, "grad_norm": 6.687375545501709, "learning_rate": 2.7873704982733105e-05, "loss": 0.6752, "step": 1125 }, { "epoch": 1.53, "grad_norm": 12.651863098144531, "learning_rate": 2.7257030093734586e-05, "loss": 0.8265, "step": 1150 }, { "epoch": 1.56, "grad_norm": 5.703587532043457, "learning_rate": 2.6640355204736066e-05, "loss": 0.6061, "step": 1175 }, { "epoch": 1.6, "grad_norm": 12.219403266906738, "learning_rate": 2.6023680315737543e-05, "loss": 0.5414, "step": 1200 }, { "epoch": 1.63, "grad_norm": 13.479424476623535, "learning_rate": 2.5407005426739024e-05, "loss": 0.6115, "step": 1225 }, { "epoch": 1.66, "grad_norm": 2.6645803451538086, "learning_rate": 2.4790330537740504e-05, "loss": 0.7061, "step": 1250 }, { "epoch": 1.7, "grad_norm": 7.649036884307861, "learning_rate": 2.4173655648741984e-05, "loss": 0.5908, "step": 1275 }, { "epoch": 1.73, "grad_norm": 7.877263069152832, "learning_rate": 2.3556980759743465e-05, "loss": 0.5877, "step": 1300 }, { "epoch": 1.76, "grad_norm": 3.582003355026245, "learning_rate": 2.2940305870744945e-05, "loss": 0.6062, "step": 1325 }, { "epoch": 1.8, "grad_norm": 11.514890670776367, "learning_rate": 2.2323630981746425e-05, "loss": 0.5983, "step": 1350 }, { "epoch": 1.83, "grad_norm": 14.150575637817383, "learning_rate": 2.1706956092747906e-05, "loss": 0.6691, "step": 1375 }, { "epoch": 1.86, "grad_norm": 5.117061138153076, "learning_rate": 2.1090281203749386e-05, "loss": 0.6876, "step": 1400 }, { "epoch": 1.9, "grad_norm": 6.246029376983643, "learning_rate": 2.0473606314750867e-05, "loss": 0.6186, "step": 1425 }, { "epoch": 1.93, "grad_norm": 8.04295825958252, "learning_rate": 1.9856931425752347e-05, "loss": 0.7033, "step": 1450 }, { "epoch": 1.96, "grad_norm": 6.798038482666016, "learning_rate": 1.9240256536753827e-05, "loss": 0.6698, "step": 1475 }, { "epoch": 2.0, "grad_norm": 7.464223384857178, "learning_rate": 1.8623581647755304e-05, "loss": 0.6421, "step": 1500 }, { "epoch": 2.0, "eval_accuracy": 0.8062583222370173, "eval_f1_macro": 0.8030885441145199, "eval_f1_micro": 0.8062583222370173, "eval_f1_weighted": 0.8081177448606712, "eval_loss": 0.49981948733329773, "eval_precision_macro": 0.8105725964724615, "eval_precision_micro": 0.8062583222370173, "eval_precision_weighted": 0.8226218541008891, "eval_recall_macro": 0.8098096026490066, "eval_recall_micro": 0.8062583222370173, "eval_recall_weighted": 0.8062583222370173, "eval_runtime": 108.6859, "eval_samples_per_second": 13.82, "eval_steps_per_second": 0.865, "step": 1502 }, { "epoch": 2.03, "grad_norm": 7.260876655578613, "learning_rate": 1.8006906758756785e-05, "loss": 0.6168, "step": 1525 }, { "epoch": 2.06, "grad_norm": 10.966588973999023, "learning_rate": 1.7390231869758265e-05, "loss": 0.6235, "step": 1550 }, { "epoch": 2.1, "grad_norm": 7.024848937988281, "learning_rate": 1.6773556980759745e-05, "loss": 0.5583, "step": 1575 }, { "epoch": 2.13, "grad_norm": 7.464731693267822, "learning_rate": 1.6156882091761226e-05, "loss": 0.5755, "step": 1600 }, { "epoch": 2.16, "grad_norm": 3.144723892211914, "learning_rate": 1.5540207202762706e-05, "loss": 0.5972, "step": 1625 }, { "epoch": 2.2, "grad_norm": 10.064537048339844, "learning_rate": 1.4923532313764185e-05, "loss": 0.583, "step": 1650 }, { "epoch": 2.23, "grad_norm": 8.008367538452148, "learning_rate": 1.4306857424765665e-05, "loss": 0.5201, "step": 1675 }, { "epoch": 2.26, "grad_norm": 6.358066082000732, "learning_rate": 1.3690182535767144e-05, "loss": 0.5662, "step": 1700 }, { "epoch": 2.3, "grad_norm": 6.239820957183838, "learning_rate": 1.3073507646768624e-05, "loss": 0.5854, "step": 1725 }, { "epoch": 2.33, "grad_norm": 6.083053112030029, "learning_rate": 1.2456832757770105e-05, "loss": 0.5655, "step": 1750 }, { "epoch": 2.36, "grad_norm": 17.246747970581055, "learning_rate": 1.1840157868771585e-05, "loss": 0.6351, "step": 1775 }, { "epoch": 2.4, "grad_norm": 11.279265403747559, "learning_rate": 1.1223482979773065e-05, "loss": 0.5639, "step": 1800 }, { "epoch": 2.43, "grad_norm": 6.290694713592529, "learning_rate": 1.0606808090774544e-05, "loss": 0.5125, "step": 1825 }, { "epoch": 2.46, "grad_norm": 12.703798294067383, "learning_rate": 9.990133201776024e-06, "loss": 0.6839, "step": 1850 }, { "epoch": 2.5, "grad_norm": 5.460929870605469, "learning_rate": 9.373458312777505e-06, "loss": 0.5766, "step": 1875 }, { "epoch": 2.53, "grad_norm": 8.471376419067383, "learning_rate": 8.756783423778985e-06, "loss": 0.5011, "step": 1900 }, { "epoch": 2.56, "grad_norm": 5.83112096786499, "learning_rate": 8.140108534780466e-06, "loss": 0.4984, "step": 1925 }, { "epoch": 2.6, "grad_norm": 7.202915668487549, "learning_rate": 7.523433645781943e-06, "loss": 0.6013, "step": 1950 }, { "epoch": 2.63, "grad_norm": 7.630007266998291, "learning_rate": 6.906758756783424e-06, "loss": 0.4976, "step": 1975 }, { "epoch": 2.66, "grad_norm": 2.946850061416626, "learning_rate": 6.290083867784904e-06, "loss": 0.4218, "step": 2000 }, { "epoch": 2.7, "grad_norm": 12.109963417053223, "learning_rate": 5.6734089787863845e-06, "loss": 0.5818, "step": 2025 }, { "epoch": 2.73, "grad_norm": 12.913744926452637, "learning_rate": 5.056734089787865e-06, "loss": 0.5638, "step": 2050 }, { "epoch": 2.76, "grad_norm": 21.241952896118164, "learning_rate": 4.464726196349285e-06, "loss": 0.6451, "step": 2075 }, { "epoch": 2.8, "grad_norm": 10.033452987670898, "learning_rate": 3.848051307350765e-06, "loss": 0.5067, "step": 2100 }, { "epoch": 2.83, "grad_norm": 15.357681274414062, "learning_rate": 3.231376418352245e-06, "loss": 0.6292, "step": 2125 }, { "epoch": 2.86, "grad_norm": 10.886502265930176, "learning_rate": 2.614701529353725e-06, "loss": 0.6365, "step": 2150 }, { "epoch": 2.9, "grad_norm": 6.1179986000061035, "learning_rate": 1.998026640355205e-06, "loss": 0.5717, "step": 2175 }, { "epoch": 2.93, "grad_norm": 8.245763778686523, "learning_rate": 1.381351751356685e-06, "loss": 0.5669, "step": 2200 }, { "epoch": 2.96, "grad_norm": 9.85698413848877, "learning_rate": 7.646768623581648e-07, "loss": 0.4571, "step": 2225 }, { "epoch": 3.0, "grad_norm": 16.406970977783203, "learning_rate": 1.480019733596448e-07, "loss": 0.5549, "step": 2250 }, { "epoch": 3.0, "eval_accuracy": 0.829560585885486, "eval_f1_macro": 0.8236777117298302, "eval_f1_micro": 0.829560585885486, "eval_f1_weighted": 0.8289271724966029, "eval_loss": 0.4469132423400879, "eval_precision_macro": 0.8243514221166717, "eval_precision_micro": 0.829560585885486, "eval_precision_weighted": 0.8313607282611274, "eval_recall_macro": 0.8260057947019868, "eval_recall_micro": 0.829560585885486, "eval_recall_weighted": 0.829560585885486, "eval_runtime": 107.4762, "eval_samples_per_second": 13.975, "eval_steps_per_second": 0.875, "step": 2253 } ], "logging_steps": 25, "max_steps": 2253, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 1.3962756971819336e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }