| { | |
| "best_metric": 0.4469132423400879, | |
| "best_model_checkpoint": "xblock-base-patch1-224/checkpoint-2253", | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 2253, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 9.94556713104248, | |
| "learning_rate": 5.088495575221239e-06, | |
| "loss": 1.5614, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 9.733675956726074, | |
| "learning_rate": 1.0619469026548673e-05, | |
| "loss": 1.4735, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 9.123199462890625, | |
| "learning_rate": 1.6150442477876106e-05, | |
| "loss": 1.3293, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 10.894118309020996, | |
| "learning_rate": 2.1681415929203542e-05, | |
| "loss": 1.2442, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 8.819581985473633, | |
| "learning_rate": 2.721238938053097e-05, | |
| "loss": 1.0742, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 13.125239372253418, | |
| "learning_rate": 3.274336283185841e-05, | |
| "loss": 1.1571, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 8.423856735229492, | |
| "learning_rate": 3.827433628318584e-05, | |
| "loss": 1.1193, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 11.360130310058594, | |
| "learning_rate": 4.380530973451328e-05, | |
| "loss": 1.0605, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 7.3759379386901855, | |
| "learning_rate": 4.9336283185840707e-05, | |
| "loss": 0.9883, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 12.12755298614502, | |
| "learning_rate": 4.9457326097681306e-05, | |
| "loss": 0.9764, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 5.0697712898254395, | |
| "learning_rate": 4.884065120868278e-05, | |
| "loss": 1.1213, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 8.178693771362305, | |
| "learning_rate": 4.822397631968427e-05, | |
| "loss": 0.9683, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 7.6824846267700195, | |
| "learning_rate": 4.7607301430685744e-05, | |
| "loss": 1.0408, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 10.855768203735352, | |
| "learning_rate": 4.699062654168723e-05, | |
| "loss": 0.9092, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 6.187016487121582, | |
| "learning_rate": 4.6373951652688704e-05, | |
| "loss": 1.0058, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 8.00668716430664, | |
| "learning_rate": 4.575727676369018e-05, | |
| "loss": 0.8086, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 5.1038336753845215, | |
| "learning_rate": 4.5140601874691665e-05, | |
| "loss": 0.9027, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 10.569095611572266, | |
| "learning_rate": 4.452392698569314e-05, | |
| "loss": 0.8234, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 8.256200790405273, | |
| "learning_rate": 4.3907252096694626e-05, | |
| "loss": 0.9163, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 11.042557716369629, | |
| "learning_rate": 4.32905772076961e-05, | |
| "loss": 0.8243, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 18.462459564208984, | |
| "learning_rate": 4.267390231869759e-05, | |
| "loss": 0.8703, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 12.024648666381836, | |
| "learning_rate": 4.2057227429699064e-05, | |
| "loss": 0.991, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 11.26354694366455, | |
| "learning_rate": 4.144055254070055e-05, | |
| "loss": 0.8402, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 8.839094161987305, | |
| "learning_rate": 4.0823877651702024e-05, | |
| "loss": 0.7653, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 10.799356460571289, | |
| "learning_rate": 4.020720276270351e-05, | |
| "loss": 0.7787, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 12.935748100280762, | |
| "learning_rate": 3.9590527873704985e-05, | |
| "loss": 0.7738, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 4.829887866973877, | |
| "learning_rate": 3.897385298470647e-05, | |
| "loss": 0.8329, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 4.532620429992676, | |
| "learning_rate": 3.8357178095707946e-05, | |
| "loss": 0.9689, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 8.8261079788208, | |
| "learning_rate": 3.774050320670943e-05, | |
| "loss": 0.7615, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 3.9906632900238037, | |
| "learning_rate": 3.712382831771091e-05, | |
| "loss": 0.8284, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.7503328894806924, | |
| "eval_f1_macro": 0.7379493476306923, | |
| "eval_f1_micro": 0.7503328894806925, | |
| "eval_f1_weighted": 0.7478927601803307, | |
| "eval_loss": 0.6315314769744873, | |
| "eval_precision_macro": 0.7619988736851466, | |
| "eval_precision_micro": 0.7503328894806924, | |
| "eval_precision_weighted": 0.7668596523942972, | |
| "eval_recall_macro": 0.7356084437086093, | |
| "eval_recall_micro": 0.7503328894806924, | |
| "eval_recall_weighted": 0.7503328894806924, | |
| "eval_runtime": 103.3083, | |
| "eval_samples_per_second": 14.539, | |
| "eval_steps_per_second": 0.91, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 7.72317361831665, | |
| "learning_rate": 3.650715342871239e-05, | |
| "loss": 0.8196, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "grad_norm": 6.5625152587890625, | |
| "learning_rate": 3.589047853971386e-05, | |
| "loss": 0.7794, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 5.599926948547363, | |
| "learning_rate": 3.5273803650715344e-05, | |
| "loss": 0.6632, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "grad_norm": 9.425518989562988, | |
| "learning_rate": 3.465712876171682e-05, | |
| "loss": 0.7074, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "grad_norm": 8.80082893371582, | |
| "learning_rate": 3.4040453872718305e-05, | |
| "loss": 0.7325, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 11.81970500946045, | |
| "learning_rate": 3.342377898371978e-05, | |
| "loss": 0.8574, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "grad_norm": 8.871489524841309, | |
| "learning_rate": 3.2807104094721266e-05, | |
| "loss": 0.8407, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "grad_norm": 7.296131610870361, | |
| "learning_rate": 3.219042920572274e-05, | |
| "loss": 0.6962, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 8.161062240600586, | |
| "learning_rate": 3.1573754316724227e-05, | |
| "loss": 0.8163, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "grad_norm": 14.00735092163086, | |
| "learning_rate": 3.0957079427725704e-05, | |
| "loss": 0.6706, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "grad_norm": 10.980608940124512, | |
| "learning_rate": 3.0340404538727184e-05, | |
| "loss": 0.6639, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 6.72366189956665, | |
| "learning_rate": 2.9723729649728664e-05, | |
| "loss": 0.7, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 24.283390045166016, | |
| "learning_rate": 2.9107054760730145e-05, | |
| "loss": 0.7156, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 4.696579933166504, | |
| "learning_rate": 2.8490379871731625e-05, | |
| "loss": 0.5984, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 6.687375545501709, | |
| "learning_rate": 2.7873704982733105e-05, | |
| "loss": 0.6752, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "grad_norm": 12.651863098144531, | |
| "learning_rate": 2.7257030093734586e-05, | |
| "loss": 0.8265, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 5.703587532043457, | |
| "learning_rate": 2.6640355204736066e-05, | |
| "loss": 0.6061, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 12.219403266906738, | |
| "learning_rate": 2.6023680315737543e-05, | |
| "loss": 0.5414, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "grad_norm": 13.479424476623535, | |
| "learning_rate": 2.5407005426739024e-05, | |
| "loss": 0.6115, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "grad_norm": 2.6645803451538086, | |
| "learning_rate": 2.4790330537740504e-05, | |
| "loss": 0.7061, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 7.649036884307861, | |
| "learning_rate": 2.4173655648741984e-05, | |
| "loss": 0.5908, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 7.877263069152832, | |
| "learning_rate": 2.3556980759743465e-05, | |
| "loss": 0.5877, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 3.582003355026245, | |
| "learning_rate": 2.2940305870744945e-05, | |
| "loss": 0.6062, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 11.514890670776367, | |
| "learning_rate": 2.2323630981746425e-05, | |
| "loss": 0.5983, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "grad_norm": 14.150575637817383, | |
| "learning_rate": 2.1706956092747906e-05, | |
| "loss": 0.6691, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "grad_norm": 5.117061138153076, | |
| "learning_rate": 2.1090281203749386e-05, | |
| "loss": 0.6876, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 6.246029376983643, | |
| "learning_rate": 2.0473606314750867e-05, | |
| "loss": 0.6186, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "grad_norm": 8.04295825958252, | |
| "learning_rate": 1.9856931425752347e-05, | |
| "loss": 0.7033, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 6.798038482666016, | |
| "learning_rate": 1.9240256536753827e-05, | |
| "loss": 0.6698, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 7.464223384857178, | |
| "learning_rate": 1.8623581647755304e-05, | |
| "loss": 0.6421, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8062583222370173, | |
| "eval_f1_macro": 0.8030885441145199, | |
| "eval_f1_micro": 0.8062583222370173, | |
| "eval_f1_weighted": 0.8081177448606712, | |
| "eval_loss": 0.49981948733329773, | |
| "eval_precision_macro": 0.8105725964724615, | |
| "eval_precision_micro": 0.8062583222370173, | |
| "eval_precision_weighted": 0.8226218541008891, | |
| "eval_recall_macro": 0.8098096026490066, | |
| "eval_recall_micro": 0.8062583222370173, | |
| "eval_recall_weighted": 0.8062583222370173, | |
| "eval_runtime": 108.6859, | |
| "eval_samples_per_second": 13.82, | |
| "eval_steps_per_second": 0.865, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 7.260876655578613, | |
| "learning_rate": 1.8006906758756785e-05, | |
| "loss": 0.6168, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "grad_norm": 10.966588973999023, | |
| "learning_rate": 1.7390231869758265e-05, | |
| "loss": 0.6235, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "grad_norm": 7.024848937988281, | |
| "learning_rate": 1.6773556980759745e-05, | |
| "loss": 0.5583, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "grad_norm": 7.464731693267822, | |
| "learning_rate": 1.6156882091761226e-05, | |
| "loss": 0.5755, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 3.144723892211914, | |
| "learning_rate": 1.5540207202762706e-05, | |
| "loss": 0.5972, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 10.064537048339844, | |
| "learning_rate": 1.4923532313764185e-05, | |
| "loss": 0.583, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "grad_norm": 8.008367538452148, | |
| "learning_rate": 1.4306857424765665e-05, | |
| "loss": 0.5201, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "grad_norm": 6.358066082000732, | |
| "learning_rate": 1.3690182535767144e-05, | |
| "loss": 0.5662, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "grad_norm": 6.239820957183838, | |
| "learning_rate": 1.3073507646768624e-05, | |
| "loss": 0.5854, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "grad_norm": 6.083053112030029, | |
| "learning_rate": 1.2456832757770105e-05, | |
| "loss": 0.5655, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 17.246747970581055, | |
| "learning_rate": 1.1840157868771585e-05, | |
| "loss": 0.6351, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 11.279265403747559, | |
| "learning_rate": 1.1223482979773065e-05, | |
| "loss": 0.5639, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "grad_norm": 6.290694713592529, | |
| "learning_rate": 1.0606808090774544e-05, | |
| "loss": 0.5125, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "grad_norm": 12.703798294067383, | |
| "learning_rate": 9.990133201776024e-06, | |
| "loss": 0.6839, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 5.460929870605469, | |
| "learning_rate": 9.373458312777505e-06, | |
| "loss": 0.5766, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "grad_norm": 8.471376419067383, | |
| "learning_rate": 8.756783423778985e-06, | |
| "loss": 0.5011, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 5.83112096786499, | |
| "learning_rate": 8.140108534780466e-06, | |
| "loss": 0.4984, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 7.202915668487549, | |
| "learning_rate": 7.523433645781943e-06, | |
| "loss": 0.6013, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "grad_norm": 7.630007266998291, | |
| "learning_rate": 6.906758756783424e-06, | |
| "loss": 0.4976, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "grad_norm": 2.946850061416626, | |
| "learning_rate": 6.290083867784904e-06, | |
| "loss": 0.4218, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "grad_norm": 12.109963417053223, | |
| "learning_rate": 5.6734089787863845e-06, | |
| "loss": 0.5818, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "grad_norm": 12.913744926452637, | |
| "learning_rate": 5.056734089787865e-06, | |
| "loss": 0.5638, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 21.241952896118164, | |
| "learning_rate": 4.464726196349285e-06, | |
| "loss": 0.6451, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 10.033452987670898, | |
| "learning_rate": 3.848051307350765e-06, | |
| "loss": 0.5067, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "grad_norm": 15.357681274414062, | |
| "learning_rate": 3.231376418352245e-06, | |
| "loss": 0.6292, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "grad_norm": 10.886502265930176, | |
| "learning_rate": 2.614701529353725e-06, | |
| "loss": 0.6365, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 6.1179986000061035, | |
| "learning_rate": 1.998026640355205e-06, | |
| "loss": 0.5717, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "grad_norm": 8.245763778686523, | |
| "learning_rate": 1.381351751356685e-06, | |
| "loss": 0.5669, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 9.85698413848877, | |
| "learning_rate": 7.646768623581648e-07, | |
| "loss": 0.4571, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 16.406970977783203, | |
| "learning_rate": 1.480019733596448e-07, | |
| "loss": 0.5549, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.829560585885486, | |
| "eval_f1_macro": 0.8236777117298302, | |
| "eval_f1_micro": 0.829560585885486, | |
| "eval_f1_weighted": 0.8289271724966029, | |
| "eval_loss": 0.4469132423400879, | |
| "eval_precision_macro": 0.8243514221166717, | |
| "eval_precision_micro": 0.829560585885486, | |
| "eval_precision_weighted": 0.8313607282611274, | |
| "eval_recall_macro": 0.8260057947019868, | |
| "eval_recall_micro": 0.829560585885486, | |
| "eval_recall_weighted": 0.829560585885486, | |
| "eval_runtime": 107.4762, | |
| "eval_samples_per_second": 13.975, | |
| "eval_steps_per_second": 0.875, | |
| "step": 2253 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 2253, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 1.3962756971819336e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |