| { |
| "best_global_step": 59997, |
| "best_metric": 0.8794049695261634, |
| "best_model_checkpoint": "./doc_type_v1_primary_model_multilingual-e5-small/checkpoint-59997", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 59997, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.025001250062503127, |
| "grad_norm": 6.811492443084717, |
| "learning_rate": 4.958414587396037e-05, |
| "loss": 1.2379, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.050002500125006254, |
| "grad_norm": 27.201915740966797, |
| "learning_rate": 4.916745837291865e-05, |
| "loss": 0.8651, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.07500375018750938, |
| "grad_norm": 38.990150451660156, |
| "learning_rate": 4.875077087187693e-05, |
| "loss": 0.7379, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.10000500025001251, |
| "grad_norm": 7.001402378082275, |
| "learning_rate": 4.833408337083521e-05, |
| "loss": 0.7292, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.12500625031251564, |
| "grad_norm": 13.325078010559082, |
| "learning_rate": 4.791739586979349e-05, |
| "loss": 0.696, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.15000750037501875, |
| "grad_norm": 8.727783203125, |
| "learning_rate": 4.7500708368751775e-05, |
| "loss": 0.711, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.17500875043752187, |
| "grad_norm": 16.32622528076172, |
| "learning_rate": 4.708402086771005e-05, |
| "loss": 0.6598, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.20001000050002501, |
| "grad_norm": 7.856052875518799, |
| "learning_rate": 4.6667333366668335e-05, |
| "loss": 0.6057, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.22501125056252813, |
| "grad_norm": 7.5791335105896, |
| "learning_rate": 4.625064586562662e-05, |
| "loss": 0.585, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.2500125006250313, |
| "grad_norm": 10.478697776794434, |
| "learning_rate": 4.58339583645849e-05, |
| "loss": 0.5894, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.27501375068753436, |
| "grad_norm": 12.097478866577148, |
| "learning_rate": 4.541727086354318e-05, |
| "loss": 0.5759, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.3000150007500375, |
| "grad_norm": 9.861705780029297, |
| "learning_rate": 4.5000583362501456e-05, |
| "loss": 0.5605, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.32501625081254065, |
| "grad_norm": 23.986101150512695, |
| "learning_rate": 4.458389586145974e-05, |
| "loss": 0.5548, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.35001750087504374, |
| "grad_norm": 28.77222442626953, |
| "learning_rate": 4.416720836041802e-05, |
| "loss": 0.5508, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.3750187509375469, |
| "grad_norm": 5.619863510131836, |
| "learning_rate": 4.3750520859376306e-05, |
| "loss": 0.5182, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.40002000100005003, |
| "grad_norm": 5.042713642120361, |
| "learning_rate": 4.333383335833458e-05, |
| "loss": 0.5597, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.4250212510625531, |
| "grad_norm": 8.755766868591309, |
| "learning_rate": 4.291714585729287e-05, |
| "loss": 0.5342, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.45002250112505626, |
| "grad_norm": 9.902745246887207, |
| "learning_rate": 4.250045835625115e-05, |
| "loss": 0.5154, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.47502375118755935, |
| "grad_norm": 18.608970642089844, |
| "learning_rate": 4.2083770855209434e-05, |
| "loss": 0.5101, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.5000250012500626, |
| "grad_norm": 7.529025077819824, |
| "learning_rate": 4.166708335416771e-05, |
| "loss": 0.5153, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.5250262513125656, |
| "grad_norm": 4.368379592895508, |
| "learning_rate": 4.125039585312599e-05, |
| "loss": 0.4962, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.5500275013750687, |
| "grad_norm": 5.460580348968506, |
| "learning_rate": 4.083370835208427e-05, |
| "loss": 0.5055, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.5750287514375719, |
| "grad_norm": 17.047475814819336, |
| "learning_rate": 4.0417020851042555e-05, |
| "loss": 0.5289, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.600030001500075, |
| "grad_norm": 8.671713829040527, |
| "learning_rate": 4.000033335000083e-05, |
| "loss": 0.5024, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.6250312515625781, |
| "grad_norm": 13.457786560058594, |
| "learning_rate": 3.9583645848959115e-05, |
| "loss": 0.481, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.6500325016250813, |
| "grad_norm": 1.2953449487686157, |
| "learning_rate": 3.91669583479174e-05, |
| "loss": 0.4843, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.6750337516875844, |
| "grad_norm": 0.7997756004333496, |
| "learning_rate": 3.875027084687568e-05, |
| "loss": 0.4519, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.7000350017500875, |
| "grad_norm": 5.482394218444824, |
| "learning_rate": 3.833358334583396e-05, |
| "loss": 0.4829, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.7250362518125907, |
| "grad_norm": 13.774917602539062, |
| "learning_rate": 3.791689584479224e-05, |
| "loss": 0.4746, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.7500375018750938, |
| "grad_norm": 7.448230743408203, |
| "learning_rate": 3.750020834375052e-05, |
| "loss": 0.5123, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.7750387519375969, |
| "grad_norm": 11.042137145996094, |
| "learning_rate": 3.70835208427088e-05, |
| "loss": 0.5058, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.8000400020001001, |
| "grad_norm": 9.733834266662598, |
| "learning_rate": 3.666683334166709e-05, |
| "loss": 0.453, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.8250412520626031, |
| "grad_norm": 9.845958709716797, |
| "learning_rate": 3.6250145840625363e-05, |
| "loss": 0.4604, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.8500425021251062, |
| "grad_norm": 24.350200653076172, |
| "learning_rate": 3.583345833958365e-05, |
| "loss": 0.4689, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.8750437521876093, |
| "grad_norm": 25.33015251159668, |
| "learning_rate": 3.541677083854193e-05, |
| "loss": 0.4689, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.9000450022501125, |
| "grad_norm": 3.9758975505828857, |
| "learning_rate": 3.5000083337500214e-05, |
| "loss": 0.4704, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.9250462523126156, |
| "grad_norm": 5.771573066711426, |
| "learning_rate": 3.458339583645849e-05, |
| "loss": 0.4367, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.9500475023751187, |
| "grad_norm": 5.9610819816589355, |
| "learning_rate": 3.4166708335416775e-05, |
| "loss": 0.451, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.9750487524376219, |
| "grad_norm": 13.63049030303955, |
| "learning_rate": 3.375002083437505e-05, |
| "loss": 0.4538, |
| "step": 19500 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_f1": 0.8655922269409259, |
| "eval_loss": 0.4387092590332031, |
| "eval_runtime": 10.5773, |
| "eval_samples_per_second": 1891.79, |
| "eval_steps_per_second": 236.545, |
| "step": 19999 |
| }, |
| { |
| "epoch": 1.000050002500125, |
| "grad_norm": 5.536550045013428, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 0.4367, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.025051252562628, |
| "grad_norm": 16.51177406311035, |
| "learning_rate": 3.291664583229161e-05, |
| "loss": 0.3614, |
| "step": 20500 |
| }, |
| { |
| "epoch": 1.0500525026251313, |
| "grad_norm": 20.4051570892334, |
| "learning_rate": 3.2499958331249895e-05, |
| "loss": 0.3757, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.0750537526876345, |
| "grad_norm": 4.4692912101745605, |
| "learning_rate": 3.208327083020818e-05, |
| "loss": 0.3197, |
| "step": 21500 |
| }, |
| { |
| "epoch": 1.1000550027501375, |
| "grad_norm": 9.222668647766113, |
| "learning_rate": 3.166658332916646e-05, |
| "loss": 0.3649, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.1250562528126407, |
| "grad_norm": 20.703449249267578, |
| "learning_rate": 3.124989582812474e-05, |
| "loss": 0.3736, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.1500575028751436, |
| "grad_norm": 10.590250015258789, |
| "learning_rate": 3.083320832708302e-05, |
| "loss": 0.3325, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.1750587529376468, |
| "grad_norm": 39.34541320800781, |
| "learning_rate": 3.0416520826041306e-05, |
| "loss": 0.3472, |
| "step": 23500 |
| }, |
| { |
| "epoch": 1.20006000300015, |
| "grad_norm": 9.01701545715332, |
| "learning_rate": 2.9999833324999583e-05, |
| "loss": 0.3513, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.2250612530626532, |
| "grad_norm": 20.631200790405273, |
| "learning_rate": 2.9583145823957863e-05, |
| "loss": 0.3699, |
| "step": 24500 |
| }, |
| { |
| "epoch": 1.2500625031251562, |
| "grad_norm": 0.09197826683521271, |
| "learning_rate": 2.9166458322916147e-05, |
| "loss": 0.3847, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.2750637531876594, |
| "grad_norm": 5.867861747741699, |
| "learning_rate": 2.8749770821874427e-05, |
| "loss": 0.3252, |
| "step": 25500 |
| }, |
| { |
| "epoch": 1.3000650032501624, |
| "grad_norm": 21.79376220703125, |
| "learning_rate": 2.833308332083271e-05, |
| "loss": 0.3573, |
| "step": 26000 |
| }, |
| { |
| "epoch": 1.3250662533126656, |
| "grad_norm": 10.486989974975586, |
| "learning_rate": 2.791639581979099e-05, |
| "loss": 0.3704, |
| "step": 26500 |
| }, |
| { |
| "epoch": 1.3500675033751688, |
| "grad_norm": 10.07336139678955, |
| "learning_rate": 2.7499708318749275e-05, |
| "loss": 0.3269, |
| "step": 27000 |
| }, |
| { |
| "epoch": 1.375068753437672, |
| "grad_norm": 0.7917349338531494, |
| "learning_rate": 2.7083020817707555e-05, |
| "loss": 0.3637, |
| "step": 27500 |
| }, |
| { |
| "epoch": 1.400070003500175, |
| "grad_norm": 4.454699993133545, |
| "learning_rate": 2.666633331666584e-05, |
| "loss": 0.3503, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.4250712535626782, |
| "grad_norm": 4.157910346984863, |
| "learning_rate": 2.6249645815624112e-05, |
| "loss": 0.3503, |
| "step": 28500 |
| }, |
| { |
| "epoch": 1.4500725036251811, |
| "grad_norm": 0.06325356662273407, |
| "learning_rate": 2.5832958314582395e-05, |
| "loss": 0.3246, |
| "step": 29000 |
| }, |
| { |
| "epoch": 1.4750737536876843, |
| "grad_norm": 19.81574058532715, |
| "learning_rate": 2.5416270813540676e-05, |
| "loss": 0.3507, |
| "step": 29500 |
| }, |
| { |
| "epoch": 1.5000750037501875, |
| "grad_norm": 0.12496486306190491, |
| "learning_rate": 2.499958331249896e-05, |
| "loss": 0.3274, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1.5250762538126907, |
| "grad_norm": 14.442770004272461, |
| "learning_rate": 2.458289581145724e-05, |
| "loss": 0.3926, |
| "step": 30500 |
| }, |
| { |
| "epoch": 1.5500775038751937, |
| "grad_norm": 30.787338256835938, |
| "learning_rate": 2.4166208310415523e-05, |
| "loss": 0.3445, |
| "step": 31000 |
| }, |
| { |
| "epoch": 1.575078753937697, |
| "grad_norm": 16.105411529541016, |
| "learning_rate": 2.3749520809373803e-05, |
| "loss": 0.3397, |
| "step": 31500 |
| }, |
| { |
| "epoch": 1.6000800040002, |
| "grad_norm": 0.16195891797542572, |
| "learning_rate": 2.3332833308332083e-05, |
| "loss": 0.3337, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.625081254062703, |
| "grad_norm": 5.337989807128906, |
| "learning_rate": 2.2916145807290363e-05, |
| "loss": 0.3398, |
| "step": 32500 |
| }, |
| { |
| "epoch": 1.6500825041252063, |
| "grad_norm": 12.693595886230469, |
| "learning_rate": 2.2499458306248647e-05, |
| "loss": 0.3457, |
| "step": 33000 |
| }, |
| { |
| "epoch": 1.6750837541877095, |
| "grad_norm": 0.6089347004890442, |
| "learning_rate": 2.2082770805206927e-05, |
| "loss": 0.3252, |
| "step": 33500 |
| }, |
| { |
| "epoch": 1.7000850042502125, |
| "grad_norm": 21.343097686767578, |
| "learning_rate": 2.166608330416521e-05, |
| "loss": 0.3691, |
| "step": 34000 |
| }, |
| { |
| "epoch": 1.7250862543127157, |
| "grad_norm": 7.946640968322754, |
| "learning_rate": 2.124939580312349e-05, |
| "loss": 0.3334, |
| "step": 34500 |
| }, |
| { |
| "epoch": 1.7500875043752186, |
| "grad_norm": 7.454244613647461, |
| "learning_rate": 2.083270830208177e-05, |
| "loss": 0.3363, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1.7750887544377218, |
| "grad_norm": 0.1911257654428482, |
| "learning_rate": 2.0416020801040055e-05, |
| "loss": 0.3454, |
| "step": 35500 |
| }, |
| { |
| "epoch": 1.800090004500225, |
| "grad_norm": 13.01964282989502, |
| "learning_rate": 1.9999333299998335e-05, |
| "loss": 0.3189, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.8250912545627282, |
| "grad_norm": 0.19861529767513275, |
| "learning_rate": 1.9582645798956615e-05, |
| "loss": 0.3422, |
| "step": 36500 |
| }, |
| { |
| "epoch": 1.8500925046252312, |
| "grad_norm": 19.57059097290039, |
| "learning_rate": 1.9165958297914895e-05, |
| "loss": 0.3355, |
| "step": 37000 |
| }, |
| { |
| "epoch": 1.8750937546877344, |
| "grad_norm": 4.671505451202393, |
| "learning_rate": 1.874927079687318e-05, |
| "loss": 0.3195, |
| "step": 37500 |
| }, |
| { |
| "epoch": 1.9000950047502374, |
| "grad_norm": 13.636198997497559, |
| "learning_rate": 1.833258329583146e-05, |
| "loss": 0.2937, |
| "step": 38000 |
| }, |
| { |
| "epoch": 1.9250962548127406, |
| "grad_norm": 3.3196206092834473, |
| "learning_rate": 1.7915895794789743e-05, |
| "loss": 0.3382, |
| "step": 38500 |
| }, |
| { |
| "epoch": 1.9500975048752438, |
| "grad_norm": 10.638530731201172, |
| "learning_rate": 1.749920829374802e-05, |
| "loss": 0.3509, |
| "step": 39000 |
| }, |
| { |
| "epoch": 1.975098754937747, |
| "grad_norm": 25.718746185302734, |
| "learning_rate": 1.7082520792706303e-05, |
| "loss": 0.3244, |
| "step": 39500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_f1": 0.8739136212588781, |
| "eval_loss": 0.5150496959686279, |
| "eval_runtime": 10.5221, |
| "eval_samples_per_second": 1901.71, |
| "eval_steps_per_second": 237.785, |
| "step": 39998 |
| }, |
| { |
| "epoch": 2.00010000500025, |
| "grad_norm": 5.354879856109619, |
| "learning_rate": 1.6665833291664583e-05, |
| "loss": 0.3325, |
| "step": 40000 |
| }, |
| { |
| "epoch": 2.025101255062753, |
| "grad_norm": 26.597732543945312, |
| "learning_rate": 1.6249145790622867e-05, |
| "loss": 0.2202, |
| "step": 40500 |
| }, |
| { |
| "epoch": 2.050102505125256, |
| "grad_norm": 19.418195724487305, |
| "learning_rate": 1.5832458289581144e-05, |
| "loss": 0.2126, |
| "step": 41000 |
| }, |
| { |
| "epoch": 2.0751037551877594, |
| "grad_norm": 30.896820068359375, |
| "learning_rate": 1.5415770788539427e-05, |
| "loss": 0.1978, |
| "step": 41500 |
| }, |
| { |
| "epoch": 2.1001050052502626, |
| "grad_norm": 33.62570571899414, |
| "learning_rate": 1.4999083287497709e-05, |
| "loss": 0.2235, |
| "step": 42000 |
| }, |
| { |
| "epoch": 2.1251062553127658, |
| "grad_norm": 34.06229782104492, |
| "learning_rate": 1.4582395786455991e-05, |
| "loss": 0.2285, |
| "step": 42500 |
| }, |
| { |
| "epoch": 2.150107505375269, |
| "grad_norm": 7.419727325439453, |
| "learning_rate": 1.4165708285414273e-05, |
| "loss": 0.2114, |
| "step": 43000 |
| }, |
| { |
| "epoch": 2.1751087554377717, |
| "grad_norm": 0.09190714359283447, |
| "learning_rate": 1.3749020784372551e-05, |
| "loss": 0.2401, |
| "step": 43500 |
| }, |
| { |
| "epoch": 2.200110005500275, |
| "grad_norm": 3.367650032043457, |
| "learning_rate": 1.3332333283330833e-05, |
| "loss": 0.2316, |
| "step": 44000 |
| }, |
| { |
| "epoch": 2.225111255562778, |
| "grad_norm": 1.1675509214401245, |
| "learning_rate": 1.2915645782289115e-05, |
| "loss": 0.2356, |
| "step": 44500 |
| }, |
| { |
| "epoch": 2.2501125056252813, |
| "grad_norm": 9.24765682220459, |
| "learning_rate": 1.2498958281247395e-05, |
| "loss": 0.2265, |
| "step": 45000 |
| }, |
| { |
| "epoch": 2.2751137556877845, |
| "grad_norm": 49.01567459106445, |
| "learning_rate": 1.2082270780205677e-05, |
| "loss": 0.2156, |
| "step": 45500 |
| }, |
| { |
| "epoch": 2.3001150057502873, |
| "grad_norm": 1.4991744756698608, |
| "learning_rate": 1.1665583279163959e-05, |
| "loss": 0.1985, |
| "step": 46000 |
| }, |
| { |
| "epoch": 2.3251162558127905, |
| "grad_norm": 23.972734451293945, |
| "learning_rate": 1.124889577812224e-05, |
| "loss": 0.2341, |
| "step": 46500 |
| }, |
| { |
| "epoch": 2.3501175058752937, |
| "grad_norm": 0.17075876891613007, |
| "learning_rate": 1.0832208277080521e-05, |
| "loss": 0.2253, |
| "step": 47000 |
| }, |
| { |
| "epoch": 2.375118755937797, |
| "grad_norm": 31.144304275512695, |
| "learning_rate": 1.0415520776038801e-05, |
| "loss": 0.2155, |
| "step": 47500 |
| }, |
| { |
| "epoch": 2.4001200060003, |
| "grad_norm": 0.0451333224773407, |
| "learning_rate": 9.998833274997083e-06, |
| "loss": 0.1964, |
| "step": 48000 |
| }, |
| { |
| "epoch": 2.4251212560628033, |
| "grad_norm": 0.07999496906995773, |
| "learning_rate": 9.582145773955365e-06, |
| "loss": 0.2406, |
| "step": 48500 |
| }, |
| { |
| "epoch": 2.4501225061253065, |
| "grad_norm": 0.9701845049858093, |
| "learning_rate": 9.165458272913647e-06, |
| "loss": 0.2345, |
| "step": 49000 |
| }, |
| { |
| "epoch": 2.475123756187809, |
| "grad_norm": 1.0465730428695679, |
| "learning_rate": 8.748770771871927e-06, |
| "loss": 0.2179, |
| "step": 49500 |
| }, |
| { |
| "epoch": 2.5001250062503124, |
| "grad_norm": 0.01202826015651226, |
| "learning_rate": 8.332083270830209e-06, |
| "loss": 0.2076, |
| "step": 50000 |
| }, |
| { |
| "epoch": 2.5251262563128156, |
| "grad_norm": 0.02468780241906643, |
| "learning_rate": 7.915395769788491e-06, |
| "loss": 0.2387, |
| "step": 50500 |
| }, |
| { |
| "epoch": 2.550127506375319, |
| "grad_norm": 2.958998918533325, |
| "learning_rate": 7.498708268746771e-06, |
| "loss": 0.2114, |
| "step": 51000 |
| }, |
| { |
| "epoch": 2.575128756437822, |
| "grad_norm": 0.1065281331539154, |
| "learning_rate": 7.082020767705053e-06, |
| "loss": 0.1916, |
| "step": 51500 |
| }, |
| { |
| "epoch": 2.6001300065003248, |
| "grad_norm": 34.561431884765625, |
| "learning_rate": 6.665333266663333e-06, |
| "loss": 0.2074, |
| "step": 52000 |
| }, |
| { |
| "epoch": 2.625131256562828, |
| "grad_norm": 0.007779615931212902, |
| "learning_rate": 6.248645765621615e-06, |
| "loss": 0.2133, |
| "step": 52500 |
| }, |
| { |
| "epoch": 2.650132506625331, |
| "grad_norm": 0.13916213810443878, |
| "learning_rate": 5.831958264579896e-06, |
| "loss": 0.2301, |
| "step": 53000 |
| }, |
| { |
| "epoch": 2.6751337566878344, |
| "grad_norm": 42.24327087402344, |
| "learning_rate": 5.415270763538177e-06, |
| "loss": 0.2216, |
| "step": 53500 |
| }, |
| { |
| "epoch": 2.7001350067503376, |
| "grad_norm": 7.163196563720703, |
| "learning_rate": 4.998583262496458e-06, |
| "loss": 0.2313, |
| "step": 54000 |
| }, |
| { |
| "epoch": 2.7251362568128408, |
| "grad_norm": 0.8102510571479797, |
| "learning_rate": 4.581895761454739e-06, |
| "loss": 0.1916, |
| "step": 54500 |
| }, |
| { |
| "epoch": 2.750137506875344, |
| "grad_norm": 7.8397417068481445, |
| "learning_rate": 4.16520826041302e-06, |
| "loss": 0.2055, |
| "step": 55000 |
| }, |
| { |
| "epoch": 2.7751387569378467, |
| "grad_norm": 1.0101325511932373, |
| "learning_rate": 3.7485207593713018e-06, |
| "loss": 0.2059, |
| "step": 55500 |
| }, |
| { |
| "epoch": 2.80014000700035, |
| "grad_norm": 0.05691508203744888, |
| "learning_rate": 3.3318332583295837e-06, |
| "loss": 0.2021, |
| "step": 56000 |
| }, |
| { |
| "epoch": 2.825141257062853, |
| "grad_norm": 0.08343327045440674, |
| "learning_rate": 2.9151457572878643e-06, |
| "loss": 0.2075, |
| "step": 56500 |
| }, |
| { |
| "epoch": 2.8501425071253563, |
| "grad_norm": 0.06794146448373795, |
| "learning_rate": 2.4984582562461457e-06, |
| "loss": 0.1644, |
| "step": 57000 |
| }, |
| { |
| "epoch": 2.8751437571878595, |
| "grad_norm": 0.023177076131105423, |
| "learning_rate": 2.081770755204427e-06, |
| "loss": 0.2023, |
| "step": 57500 |
| }, |
| { |
| "epoch": 2.9001450072503623, |
| "grad_norm": 0.15367339551448822, |
| "learning_rate": 1.6650832541627082e-06, |
| "loss": 0.2175, |
| "step": 58000 |
| }, |
| { |
| "epoch": 2.9251462573128655, |
| "grad_norm": 0.029408954083919525, |
| "learning_rate": 1.2483957531209895e-06, |
| "loss": 0.2073, |
| "step": 58500 |
| }, |
| { |
| "epoch": 2.9501475073753687, |
| "grad_norm": 2.278526782989502, |
| "learning_rate": 8.317082520792706e-07, |
| "loss": 0.2154, |
| "step": 59000 |
| }, |
| { |
| "epoch": 2.975148757437872, |
| "grad_norm": 15.807899475097656, |
| "learning_rate": 4.150207510375519e-07, |
| "loss": 0.2132, |
| "step": 59500 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_f1": 0.8794049695261634, |
| "eval_loss": 0.6096033453941345, |
| "eval_runtime": 10.6241, |
| "eval_samples_per_second": 1883.457, |
| "eval_steps_per_second": 235.503, |
| "step": 59997 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 59997, |
| "total_flos": 7906263495201792.0, |
| "train_loss": 0.3725671201518653, |
| "train_runtime": 1624.1756, |
| "train_samples_per_second": 295.512, |
| "train_steps_per_second": 36.94 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 59997, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7906263495201792.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|