| { |
| "best_global_step": 44000, |
| "best_metric": 0.9900904784547742, |
| "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-44000", |
| "epoch": 1.408, |
| "eval_steps": 4000, |
| "global_step": 44000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0032, |
| "grad_norm": 1.3923053741455078, |
| "learning_rate": 3.96e-06, |
| "loss": 0.685, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0064, |
| "grad_norm": 1.665050983428955, |
| "learning_rate": 7.960000000000002e-06, |
| "loss": 0.6654, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0096, |
| "grad_norm": 3.1655123233795166, |
| "learning_rate": 1.196e-05, |
| "loss": 0.6169, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.0128, |
| "grad_norm": 3.3807435035705566, |
| "learning_rate": 1.5960000000000003e-05, |
| "loss": 0.4635, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 0.7963143587112427, |
| "learning_rate": 1.9960000000000002e-05, |
| "loss": 0.2886, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.0192, |
| "grad_norm": 1.5362155437469482, |
| "learning_rate": 1.9987287319422154e-05, |
| "loss": 0.2605, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.0224, |
| "grad_norm": 3.649484395980835, |
| "learning_rate": 1.9974446227929375e-05, |
| "loss": 0.2194, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.0256, |
| "grad_norm": 1.402876377105713, |
| "learning_rate": 1.99616051364366e-05, |
| "loss": 0.2367, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.0288, |
| "grad_norm": 2.708932399749756, |
| "learning_rate": 1.994876404494382e-05, |
| "loss": 0.2061, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 2.4827463626861572, |
| "learning_rate": 1.9935922953451046e-05, |
| "loss": 0.1892, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.0352, |
| "grad_norm": 0.9402571320533752, |
| "learning_rate": 1.9923081861958268e-05, |
| "loss": 0.1819, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.0384, |
| "grad_norm": 0.8088085055351257, |
| "learning_rate": 1.9910240770465493e-05, |
| "loss": 0.1894, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.0416, |
| "grad_norm": 2.8913955688476562, |
| "learning_rate": 1.9897399678972714e-05, |
| "loss": 0.1733, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.0448, |
| "grad_norm": 1.9755029678344727, |
| "learning_rate": 1.988455858747994e-05, |
| "loss": 0.1748, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 2.659393310546875, |
| "learning_rate": 1.987171749598716e-05, |
| "loss": 0.1557, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.0512, |
| "grad_norm": 10.118029594421387, |
| "learning_rate": 1.9858876404494382e-05, |
| "loss": 0.1702, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.0544, |
| "grad_norm": 3.736616373062134, |
| "learning_rate": 1.9846035313001607e-05, |
| "loss": 0.1694, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.0576, |
| "grad_norm": 4.140033721923828, |
| "learning_rate": 1.9833194221508828e-05, |
| "loss": 0.1615, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.0608, |
| "grad_norm": 4.504345893859863, |
| "learning_rate": 1.9820353130016053e-05, |
| "loss": 0.1425, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 5.786899089813232, |
| "learning_rate": 1.9807512038523274e-05, |
| "loss": 0.1588, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.0672, |
| "grad_norm": 9.956130027770996, |
| "learning_rate": 1.97946709470305e-05, |
| "loss": 0.1399, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.0704, |
| "grad_norm": 11.86201286315918, |
| "learning_rate": 1.978182985553772e-05, |
| "loss": 0.173, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.0736, |
| "grad_norm": 0.6308254599571228, |
| "learning_rate": 1.9768988764044946e-05, |
| "loss": 0.1428, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.0768, |
| "grad_norm": 3.123718023300171, |
| "learning_rate": 1.9756147672552167e-05, |
| "loss": 0.1365, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 3.8000500202178955, |
| "learning_rate": 1.9743306581059392e-05, |
| "loss": 0.1528, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.0832, |
| "grad_norm": 2.9149184226989746, |
| "learning_rate": 1.9730465489566613e-05, |
| "loss": 0.1568, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.0864, |
| "grad_norm": 8.345555305480957, |
| "learning_rate": 1.9717624398073838e-05, |
| "loss": 0.139, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.0896, |
| "grad_norm": 1.785736322402954, |
| "learning_rate": 1.970478330658106e-05, |
| "loss": 0.1509, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.0928, |
| "grad_norm": 5.256857872009277, |
| "learning_rate": 1.9691942215088284e-05, |
| "loss": 0.1284, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 2.305225133895874, |
| "learning_rate": 1.967910112359551e-05, |
| "loss": 0.1249, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.0992, |
| "grad_norm": 1.1149404048919678, |
| "learning_rate": 1.966626003210273e-05, |
| "loss": 0.1223, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.1024, |
| "grad_norm": 0.24498392641544342, |
| "learning_rate": 1.9653418940609955e-05, |
| "loss": 0.1415, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.1056, |
| "grad_norm": 3.027209520339966, |
| "learning_rate": 1.9640577849117177e-05, |
| "loss": 0.133, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.1088, |
| "grad_norm": 10.783885955810547, |
| "learning_rate": 1.9627736757624402e-05, |
| "loss": 0.1309, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 7.116244316101074, |
| "learning_rate": 1.9614895666131623e-05, |
| "loss": 0.1056, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.1152, |
| "grad_norm": 8.193492889404297, |
| "learning_rate": 1.9602054574638848e-05, |
| "loss": 0.1201, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.1184, |
| "grad_norm": 0.9174596071243286, |
| "learning_rate": 1.958921348314607e-05, |
| "loss": 0.12, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.1216, |
| "grad_norm": 3.436502695083618, |
| "learning_rate": 1.957637239165329e-05, |
| "loss": 0.1257, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.1248, |
| "grad_norm": 9.131117820739746, |
| "learning_rate": 1.9563531300160516e-05, |
| "loss": 0.1139, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 0.3150777518749237, |
| "learning_rate": 1.9550690208667737e-05, |
| "loss": 0.1381, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.128, |
| "eval_accuracy": 0.9586, |
| "eval_f1": 0.9586218246722099, |
| "eval_loss": 0.1626722663640976, |
| "eval_precision": 0.9598553600795934, |
| "eval_recall": 0.9586, |
| "eval_runtime": 815.7025, |
| "eval_samples_per_second": 122.594, |
| "eval_steps_per_second": 7.662, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.1312, |
| "grad_norm": 8.65023422241211, |
| "learning_rate": 1.9537849117174962e-05, |
| "loss": 0.1026, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.1344, |
| "grad_norm": 8.46996784210205, |
| "learning_rate": 1.9525008025682183e-05, |
| "loss": 0.1251, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.1376, |
| "grad_norm": 6.439260959625244, |
| "learning_rate": 1.9512166934189408e-05, |
| "loss": 0.1338, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.1408, |
| "grad_norm": 1.012399435043335, |
| "learning_rate": 1.949932584269663e-05, |
| "loss": 0.1164, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 0.33647093176841736, |
| "learning_rate": 1.9486484751203855e-05, |
| "loss": 0.1156, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.1472, |
| "grad_norm": 7.39678955078125, |
| "learning_rate": 1.9473643659711076e-05, |
| "loss": 0.1013, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.1504, |
| "grad_norm": 4.556000709533691, |
| "learning_rate": 1.94608025682183e-05, |
| "loss": 0.1038, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.1536, |
| "grad_norm": 0.5087370276451111, |
| "learning_rate": 1.9447961476725522e-05, |
| "loss": 0.1319, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.1568, |
| "grad_norm": 4.385759353637695, |
| "learning_rate": 1.9435120385232747e-05, |
| "loss": 0.1229, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 5.46568489074707, |
| "learning_rate": 1.942227929373997e-05, |
| "loss": 0.1234, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.1632, |
| "grad_norm": 3.3565216064453125, |
| "learning_rate": 1.9409438202247193e-05, |
| "loss": 0.1007, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.1664, |
| "grad_norm": 0.17969129979610443, |
| "learning_rate": 1.9396597110754415e-05, |
| "loss": 0.108, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.1696, |
| "grad_norm": 8.49416732788086, |
| "learning_rate": 1.938375601926164e-05, |
| "loss": 0.1105, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.1728, |
| "grad_norm": 6.072606086730957, |
| "learning_rate": 1.937091492776886e-05, |
| "loss": 0.1082, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 1.2361280918121338, |
| "learning_rate": 1.9358073836276086e-05, |
| "loss": 0.0966, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.1792, |
| "grad_norm": 6.232377052307129, |
| "learning_rate": 1.9345232744783307e-05, |
| "loss": 0.1151, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.1824, |
| "grad_norm": 0.6630802154541016, |
| "learning_rate": 1.9332391653290532e-05, |
| "loss": 0.104, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.1856, |
| "grad_norm": 0.2744814157485962, |
| "learning_rate": 1.9319550561797754e-05, |
| "loss": 0.104, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.1888, |
| "grad_norm": 0.37808698415756226, |
| "learning_rate": 1.930670947030498e-05, |
| "loss": 0.1183, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 11.47230052947998, |
| "learning_rate": 1.9293868378812203e-05, |
| "loss": 0.1258, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.1952, |
| "grad_norm": 7.168028354644775, |
| "learning_rate": 1.928102728731942e-05, |
| "loss": 0.1117, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.1984, |
| "grad_norm": 0.11318526417016983, |
| "learning_rate": 1.9268186195826646e-05, |
| "loss": 0.1004, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.2016, |
| "grad_norm": 9.405433654785156, |
| "learning_rate": 1.9255345104333868e-05, |
| "loss": 0.1195, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.2048, |
| "grad_norm": 0.507453978061676, |
| "learning_rate": 1.9242504012841092e-05, |
| "loss": 0.0987, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 0.15791097283363342, |
| "learning_rate": 1.9229662921348314e-05, |
| "loss": 0.1156, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.2112, |
| "grad_norm": 5.216452121734619, |
| "learning_rate": 1.921682182985554e-05, |
| "loss": 0.1375, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.2144, |
| "grad_norm": 0.6168243885040283, |
| "learning_rate": 1.9203980738362764e-05, |
| "loss": 0.1086, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.2176, |
| "grad_norm": 2.949383497238159, |
| "learning_rate": 1.9191139646869985e-05, |
| "loss": 0.0957, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.2208, |
| "grad_norm": 9.519874572753906, |
| "learning_rate": 1.917829855537721e-05, |
| "loss": 0.0935, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 7.931914329528809, |
| "learning_rate": 1.916545746388443e-05, |
| "loss": 0.1335, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.2272, |
| "grad_norm": 8.498374938964844, |
| "learning_rate": 1.9152616372391656e-05, |
| "loss": 0.1025, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.2304, |
| "grad_norm": 5.054536819458008, |
| "learning_rate": 1.9139775280898878e-05, |
| "loss": 0.1165, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.2336, |
| "grad_norm": 0.17365020513534546, |
| "learning_rate": 1.9126934189406102e-05, |
| "loss": 0.0996, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.2368, |
| "grad_norm": 2.247058629989624, |
| "learning_rate": 1.9114093097913324e-05, |
| "loss": 0.1009, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 15.367574691772461, |
| "learning_rate": 1.910125200642055e-05, |
| "loss": 0.1082, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.2432, |
| "grad_norm": 6.72482967376709, |
| "learning_rate": 1.908841091492777e-05, |
| "loss": 0.1308, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.2464, |
| "grad_norm": 0.051803406327962875, |
| "learning_rate": 1.9075569823434995e-05, |
| "loss": 0.1031, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.2496, |
| "grad_norm": 9.355685234069824, |
| "learning_rate": 1.9062728731942216e-05, |
| "loss": 0.1012, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.2528, |
| "grad_norm": 2.047060012817383, |
| "learning_rate": 1.904988764044944e-05, |
| "loss": 0.1249, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 4.134668350219727, |
| "learning_rate": 1.9037046548956663e-05, |
| "loss": 0.0821, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.256, |
| "eval_accuracy": 0.97609, |
| "eval_f1": 0.9760920422665103, |
| "eval_loss": 0.10814645141363144, |
| "eval_precision": 0.9761013685233434, |
| "eval_recall": 0.97609, |
| "eval_runtime": 817.6077, |
| "eval_samples_per_second": 122.308, |
| "eval_steps_per_second": 7.644, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.2592, |
| "grad_norm": 6.723151206970215, |
| "learning_rate": 1.9024205457463887e-05, |
| "loss": 0.0799, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.2624, |
| "grad_norm": 1.805972933769226, |
| "learning_rate": 1.901136436597111e-05, |
| "loss": 0.1211, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.2656, |
| "grad_norm": 11.118502616882324, |
| "learning_rate": 1.8998523274478334e-05, |
| "loss": 0.0916, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.2688, |
| "grad_norm": 2.3953633308410645, |
| "learning_rate": 1.8985682182985555e-05, |
| "loss": 0.0878, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 0.10008874535560608, |
| "learning_rate": 1.8972841091492777e-05, |
| "loss": 0.0868, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.2752, |
| "grad_norm": 0.06934285163879395, |
| "learning_rate": 1.896e-05, |
| "loss": 0.1155, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.2784, |
| "grad_norm": 0.3781879246234894, |
| "learning_rate": 1.8947158908507223e-05, |
| "loss": 0.0988, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.2816, |
| "grad_norm": 9.6624174118042, |
| "learning_rate": 1.8934317817014448e-05, |
| "loss": 0.1072, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.2848, |
| "grad_norm": 0.09044591337442398, |
| "learning_rate": 1.892147672552167e-05, |
| "loss": 0.1085, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 0.0671633929014206, |
| "learning_rate": 1.8908635634028894e-05, |
| "loss": 0.1081, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.2912, |
| "grad_norm": 8.130873680114746, |
| "learning_rate": 1.8895794542536115e-05, |
| "loss": 0.0968, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.2944, |
| "grad_norm": 1.2537904977798462, |
| "learning_rate": 1.888295345104334e-05, |
| "loss": 0.0877, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.2976, |
| "grad_norm": 6.084417819976807, |
| "learning_rate": 1.887011235955056e-05, |
| "loss": 0.1022, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.3008, |
| "grad_norm": 6.140512943267822, |
| "learning_rate": 1.8857271268057787e-05, |
| "loss": 0.1016, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 1.7347182035446167, |
| "learning_rate": 1.8844430176565008e-05, |
| "loss": 0.092, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.3072, |
| "grad_norm": 0.6796423196792603, |
| "learning_rate": 1.8831589085072233e-05, |
| "loss": 0.1172, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.3104, |
| "grad_norm": 10.664779663085938, |
| "learning_rate": 1.8818747993579454e-05, |
| "loss": 0.0808, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.3136, |
| "grad_norm": 0.4076235294342041, |
| "learning_rate": 1.880590690208668e-05, |
| "loss": 0.0836, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.3168, |
| "grad_norm": 9.418440818786621, |
| "learning_rate": 1.8793065810593904e-05, |
| "loss": 0.0675, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 9.2078857421875, |
| "learning_rate": 1.8780224719101125e-05, |
| "loss": 0.0686, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.3232, |
| "grad_norm": 0.47173646092414856, |
| "learning_rate": 1.876738362760835e-05, |
| "loss": 0.1096, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.3264, |
| "grad_norm": 4.5297322273254395, |
| "learning_rate": 1.875454253611557e-05, |
| "loss": 0.098, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.3296, |
| "grad_norm": 5.099269866943359, |
| "learning_rate": 1.8741701444622796e-05, |
| "loss": 0.1063, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.3328, |
| "grad_norm": 2.588848114013672, |
| "learning_rate": 1.8728860353130018e-05, |
| "loss": 0.0989, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 4.629786968231201, |
| "learning_rate": 1.8716019261637243e-05, |
| "loss": 0.1018, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.3392, |
| "grad_norm": 11.187308311462402, |
| "learning_rate": 1.8703178170144464e-05, |
| "loss": 0.0864, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.3424, |
| "grad_norm": 2.476482391357422, |
| "learning_rate": 1.869033707865169e-05, |
| "loss": 0.0744, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.3456, |
| "grad_norm": 17.418149948120117, |
| "learning_rate": 1.867749598715891e-05, |
| "loss": 0.1189, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.3488, |
| "grad_norm": 11.753310203552246, |
| "learning_rate": 1.8664654895666132e-05, |
| "loss": 0.0832, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 0.41917338967323303, |
| "learning_rate": 1.8651813804173357e-05, |
| "loss": 0.1063, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.3552, |
| "grad_norm": 14.072111129760742, |
| "learning_rate": 1.8638972712680578e-05, |
| "loss": 0.1061, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.3584, |
| "grad_norm": 2.6141397953033447, |
| "learning_rate": 1.8626131621187803e-05, |
| "loss": 0.0934, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.3616, |
| "grad_norm": 3.1363914012908936, |
| "learning_rate": 1.8613290529695024e-05, |
| "loss": 0.0879, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.3648, |
| "grad_norm": 4.260811805725098, |
| "learning_rate": 1.860044943820225e-05, |
| "loss": 0.092, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 0.2677570879459381, |
| "learning_rate": 1.858760834670947e-05, |
| "loss": 0.1202, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.3712, |
| "grad_norm": 0.056061357259750366, |
| "learning_rate": 1.8574767255216696e-05, |
| "loss": 0.0773, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.3744, |
| "grad_norm": 7.95279598236084, |
| "learning_rate": 1.8561926163723917e-05, |
| "loss": 0.0749, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.3776, |
| "grad_norm": 7.4200873374938965, |
| "learning_rate": 1.8549085072231142e-05, |
| "loss": 0.1054, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.3808, |
| "grad_norm": 0.8819625973701477, |
| "learning_rate": 1.8536243980738363e-05, |
| "loss": 0.0998, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 6.32806396484375, |
| "learning_rate": 1.8523402889245588e-05, |
| "loss": 0.0667, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.384, |
| "eval_accuracy": 0.97856, |
| "eval_f1": 0.97856556986665, |
| "eval_loss": 0.1008467897772789, |
| "eval_precision": 0.9786554480535211, |
| "eval_recall": 0.97856, |
| "eval_runtime": 822.182, |
| "eval_samples_per_second": 121.628, |
| "eval_steps_per_second": 7.602, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.3872, |
| "grad_norm": 11.900330543518066, |
| "learning_rate": 1.851056179775281e-05, |
| "loss": 0.0958, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.3904, |
| "grad_norm": 18.242124557495117, |
| "learning_rate": 1.8497720706260034e-05, |
| "loss": 0.075, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.3936, |
| "grad_norm": 0.0790882408618927, |
| "learning_rate": 1.8484879614767256e-05, |
| "loss": 0.071, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.3968, |
| "grad_norm": 16.233280181884766, |
| "learning_rate": 1.847203852327448e-05, |
| "loss": 0.1109, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 12.048758506774902, |
| "learning_rate": 1.8459197431781702e-05, |
| "loss": 0.0703, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.4032, |
| "grad_norm": 9.086562156677246, |
| "learning_rate": 1.8446356340288927e-05, |
| "loss": 0.0706, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.4064, |
| "grad_norm": 9.871477127075195, |
| "learning_rate": 1.843351524879615e-05, |
| "loss": 0.0745, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.4096, |
| "grad_norm": 0.7136130928993225, |
| "learning_rate": 1.8420674157303373e-05, |
| "loss": 0.0544, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.4128, |
| "grad_norm": 10.892882347106934, |
| "learning_rate": 1.8407833065810598e-05, |
| "loss": 0.1109, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 1.3350844383239746, |
| "learning_rate": 1.839499197431782e-05, |
| "loss": 0.0898, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.4192, |
| "grad_norm": 11.215353012084961, |
| "learning_rate": 1.838215088282504e-05, |
| "loss": 0.0963, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.4224, |
| "grad_norm": 0.2309681475162506, |
| "learning_rate": 1.8369309791332262e-05, |
| "loss": 0.0785, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.4256, |
| "grad_norm": 0.0822586640715599, |
| "learning_rate": 1.8356468699839487e-05, |
| "loss": 0.0736, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.4288, |
| "grad_norm": 0.6296452283859253, |
| "learning_rate": 1.834362760834671e-05, |
| "loss": 0.1064, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 7.989764213562012, |
| "learning_rate": 1.8330786516853933e-05, |
| "loss": 0.0885, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.4352, |
| "grad_norm": 0.04016461223363876, |
| "learning_rate": 1.8317945425361158e-05, |
| "loss": 0.0574, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.4384, |
| "grad_norm": 0.03219222649931908, |
| "learning_rate": 1.830510433386838e-05, |
| "loss": 0.0742, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.4416, |
| "grad_norm": 11.071674346923828, |
| "learning_rate": 1.8292263242375605e-05, |
| "loss": 0.0968, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.4448, |
| "grad_norm": 22.920804977416992, |
| "learning_rate": 1.8279422150882826e-05, |
| "loss": 0.0782, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 0.05693759024143219, |
| "learning_rate": 1.826658105939005e-05, |
| "loss": 0.0538, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.4512, |
| "grad_norm": 0.08659256994724274, |
| "learning_rate": 1.8253739967897272e-05, |
| "loss": 0.0699, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.4544, |
| "grad_norm": 3.1322948932647705, |
| "learning_rate": 1.8240898876404497e-05, |
| "loss": 0.0841, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.4576, |
| "grad_norm": 0.025594089180231094, |
| "learning_rate": 1.822805778491172e-05, |
| "loss": 0.1108, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.4608, |
| "grad_norm": 0.5694848299026489, |
| "learning_rate": 1.8215216693418943e-05, |
| "loss": 0.0705, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 5.643801212310791, |
| "learning_rate": 1.8202375601926165e-05, |
| "loss": 0.0773, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.4672, |
| "grad_norm": 1.6325165033340454, |
| "learning_rate": 1.818953451043339e-05, |
| "loss": 0.1244, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.4704, |
| "grad_norm": 0.04731294512748718, |
| "learning_rate": 1.817669341894061e-05, |
| "loss": 0.0523, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.4736, |
| "grad_norm": 9.385772705078125, |
| "learning_rate": 1.8163852327447836e-05, |
| "loss": 0.0739, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.4768, |
| "grad_norm": 0.04752274602651596, |
| "learning_rate": 1.8151011235955057e-05, |
| "loss": 0.0636, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.4523526430130005, |
| "learning_rate": 1.8138170144462282e-05, |
| "loss": 0.0907, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.4832, |
| "grad_norm": 0.10660147666931152, |
| "learning_rate": 1.8125329052969504e-05, |
| "loss": 0.1093, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.4864, |
| "grad_norm": 0.024508710950613022, |
| "learning_rate": 1.811248796147673e-05, |
| "loss": 0.0562, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.4896, |
| "grad_norm": 4.832937240600586, |
| "learning_rate": 1.809964686998395e-05, |
| "loss": 0.0694, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.4928, |
| "grad_norm": 12.76455020904541, |
| "learning_rate": 1.808680577849117e-05, |
| "loss": 0.0525, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 0.244754359126091, |
| "learning_rate": 1.8073964686998396e-05, |
| "loss": 0.0632, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.4992, |
| "grad_norm": 19.315397262573242, |
| "learning_rate": 1.8061123595505618e-05, |
| "loss": 0.0794, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.5024, |
| "grad_norm": 0.05077612027525902, |
| "learning_rate": 1.8048282504012842e-05, |
| "loss": 0.0848, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.5056, |
| "grad_norm": 0.33186858892440796, |
| "learning_rate": 1.8035441412520064e-05, |
| "loss": 0.0894, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.5088, |
| "grad_norm": 0.26919984817504883, |
| "learning_rate": 1.802260032102729e-05, |
| "loss": 0.0801, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 3.566136121749878, |
| "learning_rate": 1.800975922953451e-05, |
| "loss": 0.0754, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.512, |
| "eval_accuracy": 0.98201, |
| "eval_f1": 0.9820134202589396, |
| "eval_loss": 0.07791993767023087, |
| "eval_precision": 0.9820579125315673, |
| "eval_recall": 0.98201, |
| "eval_runtime": 823.0886, |
| "eval_samples_per_second": 121.494, |
| "eval_steps_per_second": 7.593, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.5152, |
| "grad_norm": 8.349530220031738, |
| "learning_rate": 1.7996918138041735e-05, |
| "loss": 0.0724, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.5184, |
| "grad_norm": 0.05041235312819481, |
| "learning_rate": 1.7984077046548956e-05, |
| "loss": 0.0428, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.5216, |
| "grad_norm": 0.3858475685119629, |
| "learning_rate": 1.797123595505618e-05, |
| "loss": 0.0569, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.5248, |
| "grad_norm": 8.561657905578613, |
| "learning_rate": 1.7958394863563403e-05, |
| "loss": 0.0758, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 0.025413183495402336, |
| "learning_rate": 1.7945553772070628e-05, |
| "loss": 0.0548, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.5312, |
| "grad_norm": 0.010517638176679611, |
| "learning_rate": 1.7932712680577852e-05, |
| "loss": 0.0706, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.5344, |
| "grad_norm": 0.08805025368928909, |
| "learning_rate": 1.7919871589085074e-05, |
| "loss": 0.077, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.5376, |
| "grad_norm": 5.7931342124938965, |
| "learning_rate": 1.79070304975923e-05, |
| "loss": 0.0669, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.5408, |
| "grad_norm": 0.07078564912080765, |
| "learning_rate": 1.789418940609952e-05, |
| "loss": 0.1069, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 1.9886382818222046, |
| "learning_rate": 1.7881348314606745e-05, |
| "loss": 0.0501, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.5472, |
| "grad_norm": 6.3300065994262695, |
| "learning_rate": 1.7868507223113966e-05, |
| "loss": 0.0744, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.5504, |
| "grad_norm": 0.18114350736141205, |
| "learning_rate": 1.785566613162119e-05, |
| "loss": 0.0782, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.5536, |
| "grad_norm": 0.2821557819843292, |
| "learning_rate": 1.7842825040128413e-05, |
| "loss": 0.0477, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.5568, |
| "grad_norm": 17.2164306640625, |
| "learning_rate": 1.7829983948635637e-05, |
| "loss": 0.0522, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 11.891914367675781, |
| "learning_rate": 1.781714285714286e-05, |
| "loss": 0.0859, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.5632, |
| "grad_norm": 12.457894325256348, |
| "learning_rate": 1.7804301765650084e-05, |
| "loss": 0.0561, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.5664, |
| "grad_norm": 1.845371961593628, |
| "learning_rate": 1.7791460674157305e-05, |
| "loss": 0.0598, |
| "step": 17700 |
| }, |
| { |
| "epoch": 0.5696, |
| "grad_norm": 5.928323268890381, |
| "learning_rate": 1.7778619582664527e-05, |
| "loss": 0.0591, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.5728, |
| "grad_norm": 0.10898735374212265, |
| "learning_rate": 1.776577849117175e-05, |
| "loss": 0.068, |
| "step": 17900 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 8.659664154052734, |
| "learning_rate": 1.7752937399678973e-05, |
| "loss": 0.0672, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.5792, |
| "grad_norm": 0.1051400676369667, |
| "learning_rate": 1.7740096308186198e-05, |
| "loss": 0.0845, |
| "step": 18100 |
| }, |
| { |
| "epoch": 0.5824, |
| "grad_norm": 2.4647881984710693, |
| "learning_rate": 1.772725521669342e-05, |
| "loss": 0.067, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.5856, |
| "grad_norm": 0.04466241970658302, |
| "learning_rate": 1.7714414125200644e-05, |
| "loss": 0.0657, |
| "step": 18300 |
| }, |
| { |
| "epoch": 0.5888, |
| "grad_norm": 0.014231475070118904, |
| "learning_rate": 1.7701573033707865e-05, |
| "loss": 0.0666, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 10.516510009765625, |
| "learning_rate": 1.768873194221509e-05, |
| "loss": 0.0692, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.5952, |
| "grad_norm": 1.2347426414489746, |
| "learning_rate": 1.767589085072231e-05, |
| "loss": 0.057, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.5984, |
| "grad_norm": 0.09256485849618912, |
| "learning_rate": 1.7663049759229537e-05, |
| "loss": 0.0726, |
| "step": 18700 |
| }, |
| { |
| "epoch": 0.6016, |
| "grad_norm": 9.093379020690918, |
| "learning_rate": 1.7650208667736758e-05, |
| "loss": 0.0902, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.6048, |
| "grad_norm": 2.5893750190734863, |
| "learning_rate": 1.7637367576243983e-05, |
| "loss": 0.0611, |
| "step": 18900 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 0.708980143070221, |
| "learning_rate": 1.7624526484751204e-05, |
| "loss": 0.0726, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.6112, |
| "grad_norm": 0.35581639409065247, |
| "learning_rate": 1.761168539325843e-05, |
| "loss": 0.0681, |
| "step": 19100 |
| }, |
| { |
| "epoch": 0.6144, |
| "grad_norm": 1.3588510751724243, |
| "learning_rate": 1.759884430176565e-05, |
| "loss": 0.0662, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.6176, |
| "grad_norm": 9.142585754394531, |
| "learning_rate": 1.7586003210272875e-05, |
| "loss": 0.0559, |
| "step": 19300 |
| }, |
| { |
| "epoch": 0.6208, |
| "grad_norm": 16.729188919067383, |
| "learning_rate": 1.7573162118780097e-05, |
| "loss": 0.0754, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.624, |
| "grad_norm": 11.582767486572266, |
| "learning_rate": 1.756032102728732e-05, |
| "loss": 0.0681, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.6272, |
| "grad_norm": 0.046063363552093506, |
| "learning_rate": 1.7547479935794543e-05, |
| "loss": 0.0603, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.6304, |
| "grad_norm": 0.294583261013031, |
| "learning_rate": 1.7534638844301768e-05, |
| "loss": 0.0518, |
| "step": 19700 |
| }, |
| { |
| "epoch": 0.6336, |
| "grad_norm": 0.014278042130172253, |
| "learning_rate": 1.7521797752808993e-05, |
| "loss": 0.0576, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.6368, |
| "grad_norm": 0.16866210103034973, |
| "learning_rate": 1.7508956661316214e-05, |
| "loss": 0.0701, |
| "step": 19900 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 6.832259654998779, |
| "learning_rate": 1.749611556982344e-05, |
| "loss": 0.0776, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.64, |
| "eval_accuracy": 0.98464, |
| "eval_f1": 0.9846426496660109, |
| "eval_loss": 0.061699531972408295, |
| "eval_precision": 0.9846784488090538, |
| "eval_recall": 0.98464, |
| "eval_runtime": 814.0166, |
| "eval_samples_per_second": 122.848, |
| "eval_steps_per_second": 7.678, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.6432, |
| "grad_norm": 25.919387817382812, |
| "learning_rate": 1.7483274478330657e-05, |
| "loss": 0.0704, |
| "step": 20100 |
| }, |
| { |
| "epoch": 0.6464, |
| "grad_norm": 0.27127301692962646, |
| "learning_rate": 1.7470433386837882e-05, |
| "loss": 0.0749, |
| "step": 20200 |
| }, |
| { |
| "epoch": 0.6496, |
| "grad_norm": 0.1059252917766571, |
| "learning_rate": 1.7457592295345103e-05, |
| "loss": 0.0793, |
| "step": 20300 |
| }, |
| { |
| "epoch": 0.6528, |
| "grad_norm": 0.6019250154495239, |
| "learning_rate": 1.7444751203852328e-05, |
| "loss": 0.059, |
| "step": 20400 |
| }, |
| { |
| "epoch": 0.656, |
| "grad_norm": 0.28291046619415283, |
| "learning_rate": 1.7431910112359553e-05, |
| "loss": 0.0569, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.6592, |
| "grad_norm": 0.15100154280662537, |
| "learning_rate": 1.7419069020866774e-05, |
| "loss": 0.0398, |
| "step": 20600 |
| }, |
| { |
| "epoch": 0.6624, |
| "grad_norm": 0.017900506034493446, |
| "learning_rate": 1.7406227929374e-05, |
| "loss": 0.0559, |
| "step": 20700 |
| }, |
| { |
| "epoch": 0.6656, |
| "grad_norm": 0.007751260884106159, |
| "learning_rate": 1.739338683788122e-05, |
| "loss": 0.0621, |
| "step": 20800 |
| }, |
| { |
| "epoch": 0.6688, |
| "grad_norm": 5.354798793792725, |
| "learning_rate": 1.7380545746388445e-05, |
| "loss": 0.085, |
| "step": 20900 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 2.312457799911499, |
| "learning_rate": 1.7367704654895667e-05, |
| "loss": 0.0654, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.6752, |
| "grad_norm": 0.10008107125759125, |
| "learning_rate": 1.7354863563402892e-05, |
| "loss": 0.0697, |
| "step": 21100 |
| }, |
| { |
| "epoch": 0.6784, |
| "grad_norm": 0.008539400063455105, |
| "learning_rate": 1.7342022471910113e-05, |
| "loss": 0.0687, |
| "step": 21200 |
| }, |
| { |
| "epoch": 0.6816, |
| "grad_norm": 1.0686814785003662, |
| "learning_rate": 1.7329181380417338e-05, |
| "loss": 0.0491, |
| "step": 21300 |
| }, |
| { |
| "epoch": 0.6848, |
| "grad_norm": 42.248897552490234, |
| "learning_rate": 1.731634028892456e-05, |
| "loss": 0.0464, |
| "step": 21400 |
| }, |
| { |
| "epoch": 0.688, |
| "grad_norm": 17.70836067199707, |
| "learning_rate": 1.7303499197431784e-05, |
| "loss": 0.109, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.6912, |
| "grad_norm": 11.702173233032227, |
| "learning_rate": 1.7290658105939006e-05, |
| "loss": 0.0626, |
| "step": 21600 |
| }, |
| { |
| "epoch": 0.6944, |
| "grad_norm": 0.15207910537719727, |
| "learning_rate": 1.727781701444623e-05, |
| "loss": 0.0617, |
| "step": 21700 |
| }, |
| { |
| "epoch": 0.6976, |
| "grad_norm": 0.7698332667350769, |
| "learning_rate": 1.7264975922953452e-05, |
| "loss": 0.0508, |
| "step": 21800 |
| }, |
| { |
| "epoch": 0.7008, |
| "grad_norm": 0.012268565595149994, |
| "learning_rate": 1.7252134831460677e-05, |
| "loss": 0.0518, |
| "step": 21900 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.07914119213819504, |
| "learning_rate": 1.7239293739967898e-05, |
| "loss": 0.0699, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.7072, |
| "grad_norm": 0.5616400241851807, |
| "learning_rate": 1.7226452648475123e-05, |
| "loss": 0.0649, |
| "step": 22100 |
| }, |
| { |
| "epoch": 0.7104, |
| "grad_norm": 6.950782299041748, |
| "learning_rate": 1.7213611556982345e-05, |
| "loss": 0.0719, |
| "step": 22200 |
| }, |
| { |
| "epoch": 0.7136, |
| "grad_norm": 0.07157002389431, |
| "learning_rate": 1.720077046548957e-05, |
| "loss": 0.0403, |
| "step": 22300 |
| }, |
| { |
| "epoch": 0.7168, |
| "grad_norm": 2.784773588180542, |
| "learning_rate": 1.718792937399679e-05, |
| "loss": 0.0468, |
| "step": 22400 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.088102325797081, |
| "learning_rate": 1.7175088282504012e-05, |
| "loss": 0.0612, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.7232, |
| "grad_norm": 0.373806357383728, |
| "learning_rate": 1.7162247191011237e-05, |
| "loss": 0.0607, |
| "step": 22600 |
| }, |
| { |
| "epoch": 0.7264, |
| "grad_norm": 10.401127815246582, |
| "learning_rate": 1.714940609951846e-05, |
| "loss": 0.0572, |
| "step": 22700 |
| }, |
| { |
| "epoch": 0.7296, |
| "grad_norm": 0.006438109558075666, |
| "learning_rate": 1.7136565008025683e-05, |
| "loss": 0.0472, |
| "step": 22800 |
| }, |
| { |
| "epoch": 0.7328, |
| "grad_norm": 0.15433204174041748, |
| "learning_rate": 1.7123723916532905e-05, |
| "loss": 0.0578, |
| "step": 22900 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 0.29676365852355957, |
| "learning_rate": 1.711088282504013e-05, |
| "loss": 0.0355, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.7392, |
| "grad_norm": 0.007739920634776354, |
| "learning_rate": 1.709804173354735e-05, |
| "loss": 0.0545, |
| "step": 23100 |
| }, |
| { |
| "epoch": 0.7424, |
| "grad_norm": 16.565767288208008, |
| "learning_rate": 1.7085200642054576e-05, |
| "loss": 0.0662, |
| "step": 23200 |
| }, |
| { |
| "epoch": 0.7456, |
| "grad_norm": 0.013131607323884964, |
| "learning_rate": 1.7072359550561797e-05, |
| "loss": 0.0734, |
| "step": 23300 |
| }, |
| { |
| "epoch": 0.7488, |
| "grad_norm": 1.746962308883667, |
| "learning_rate": 1.7059518459069022e-05, |
| "loss": 0.0558, |
| "step": 23400 |
| }, |
| { |
| "epoch": 0.752, |
| "grad_norm": 6.599545955657959, |
| "learning_rate": 1.7046677367576247e-05, |
| "loss": 0.0485, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.7552, |
| "grad_norm": 4.333959102630615, |
| "learning_rate": 1.703383627608347e-05, |
| "loss": 0.0554, |
| "step": 23600 |
| }, |
| { |
| "epoch": 0.7584, |
| "grad_norm": 0.16271114349365234, |
| "learning_rate": 1.7020995184590693e-05, |
| "loss": 0.0367, |
| "step": 23700 |
| }, |
| { |
| "epoch": 0.7616, |
| "grad_norm": 17.760648727416992, |
| "learning_rate": 1.7008154093097915e-05, |
| "loss": 0.0871, |
| "step": 23800 |
| }, |
| { |
| "epoch": 0.7648, |
| "grad_norm": 0.15831367671489716, |
| "learning_rate": 1.699531300160514e-05, |
| "loss": 0.0525, |
| "step": 23900 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 10.259693145751953, |
| "learning_rate": 1.698247191011236e-05, |
| "loss": 0.0643, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.768, |
| "eval_accuracy": 0.98307, |
| "eval_f1": 0.9830752640408086, |
| "eval_loss": 0.07612209022045135, |
| "eval_precision": 0.9832125198389433, |
| "eval_recall": 0.98307, |
| "eval_runtime": 774.4597, |
| "eval_samples_per_second": 129.122, |
| "eval_steps_per_second": 8.07, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.7712, |
| "grad_norm": 0.025170153006911278, |
| "learning_rate": 1.6969630818619586e-05, |
| "loss": 0.0476, |
| "step": 24100 |
| }, |
| { |
| "epoch": 0.7744, |
| "grad_norm": 0.005416714586317539, |
| "learning_rate": 1.6956789727126807e-05, |
| "loss": 0.0625, |
| "step": 24200 |
| }, |
| { |
| "epoch": 0.7776, |
| "grad_norm": 35.962677001953125, |
| "learning_rate": 1.6943948635634032e-05, |
| "loss": 0.0492, |
| "step": 24300 |
| }, |
| { |
| "epoch": 0.7808, |
| "grad_norm": 0.019019972532987595, |
| "learning_rate": 1.6931107544141254e-05, |
| "loss": 0.0567, |
| "step": 24400 |
| }, |
| { |
| "epoch": 0.784, |
| "grad_norm": 4.536252021789551, |
| "learning_rate": 1.691826645264848e-05, |
| "loss": 0.0564, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.7872, |
| "grad_norm": 20.884357452392578, |
| "learning_rate": 1.69054253611557e-05, |
| "loss": 0.059, |
| "step": 24600 |
| }, |
| { |
| "epoch": 0.7904, |
| "grad_norm": 4.154411792755127, |
| "learning_rate": 1.689258426966292e-05, |
| "loss": 0.0567, |
| "step": 24700 |
| }, |
| { |
| "epoch": 0.7936, |
| "grad_norm": 0.23815973103046417, |
| "learning_rate": 1.6879743178170146e-05, |
| "loss": 0.0593, |
| "step": 24800 |
| }, |
| { |
| "epoch": 0.7968, |
| "grad_norm": 0.027695050463080406, |
| "learning_rate": 1.6866902086677368e-05, |
| "loss": 0.0545, |
| "step": 24900 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.052229318767786026, |
| "learning_rate": 1.6854060995184592e-05, |
| "loss": 0.0584, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.8032, |
| "grad_norm": 0.007615574169903994, |
| "learning_rate": 1.6841219903691814e-05, |
| "loss": 0.0492, |
| "step": 25100 |
| }, |
| { |
| "epoch": 0.8064, |
| "grad_norm": 0.34495148062705994, |
| "learning_rate": 1.682837881219904e-05, |
| "loss": 0.0512, |
| "step": 25200 |
| }, |
| { |
| "epoch": 0.8096, |
| "grad_norm": 0.4945124685764313, |
| "learning_rate": 1.681553772070626e-05, |
| "loss": 0.0659, |
| "step": 25300 |
| }, |
| { |
| "epoch": 0.8128, |
| "grad_norm": 3.508509397506714, |
| "learning_rate": 1.6802696629213485e-05, |
| "loss": 0.0431, |
| "step": 25400 |
| }, |
| { |
| "epoch": 0.816, |
| "grad_norm": 9.751060485839844, |
| "learning_rate": 1.6789855537720706e-05, |
| "loss": 0.0609, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.8192, |
| "grad_norm": 13.404821395874023, |
| "learning_rate": 1.677701444622793e-05, |
| "loss": 0.0451, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.8224, |
| "grad_norm": 0.09565193206071854, |
| "learning_rate": 1.6764173354735153e-05, |
| "loss": 0.0545, |
| "step": 25700 |
| }, |
| { |
| "epoch": 0.8256, |
| "grad_norm": 0.010966203175485134, |
| "learning_rate": 1.6751332263242377e-05, |
| "loss": 0.0703, |
| "step": 25800 |
| }, |
| { |
| "epoch": 0.8288, |
| "grad_norm": 0.18196341395378113, |
| "learning_rate": 1.67384911717496e-05, |
| "loss": 0.0392, |
| "step": 25900 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 15.195699691772461, |
| "learning_rate": 1.6725650080256824e-05, |
| "loss": 0.0429, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.8352, |
| "grad_norm": 0.2465728521347046, |
| "learning_rate": 1.6712808988764045e-05, |
| "loss": 0.0546, |
| "step": 26100 |
| }, |
| { |
| "epoch": 0.8384, |
| "grad_norm": 15.004085540771484, |
| "learning_rate": 1.669996789727127e-05, |
| "loss": 0.0716, |
| "step": 26200 |
| }, |
| { |
| "epoch": 0.8416, |
| "grad_norm": 0.17839568853378296, |
| "learning_rate": 1.668712680577849e-05, |
| "loss": 0.0413, |
| "step": 26300 |
| }, |
| { |
| "epoch": 0.8448, |
| "grad_norm": 30.638254165649414, |
| "learning_rate": 1.6674285714285716e-05, |
| "loss": 0.0391, |
| "step": 26400 |
| }, |
| { |
| "epoch": 0.848, |
| "grad_norm": 8.340238571166992, |
| "learning_rate": 1.6661444622792938e-05, |
| "loss": 0.0579, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.8512, |
| "grad_norm": 0.026733344420790672, |
| "learning_rate": 1.6648603531300163e-05, |
| "loss": 0.0647, |
| "step": 26600 |
| }, |
| { |
| "epoch": 0.8544, |
| "grad_norm": 0.013905318453907967, |
| "learning_rate": 1.6635762439807387e-05, |
| "loss": 0.0258, |
| "step": 26700 |
| }, |
| { |
| "epoch": 0.8576, |
| "grad_norm": 0.011157176457345486, |
| "learning_rate": 1.662292134831461e-05, |
| "loss": 0.0497, |
| "step": 26800 |
| }, |
| { |
| "epoch": 0.8608, |
| "grad_norm": 0.029731517657637596, |
| "learning_rate": 1.6610080256821834e-05, |
| "loss": 0.0567, |
| "step": 26900 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 0.3443889319896698, |
| "learning_rate": 1.6597239165329055e-05, |
| "loss": 0.0533, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.8672, |
| "grad_norm": 0.5098379254341125, |
| "learning_rate": 1.6584398073836277e-05, |
| "loss": 0.0532, |
| "step": 27100 |
| }, |
| { |
| "epoch": 0.8704, |
| "grad_norm": 0.045412395149469376, |
| "learning_rate": 1.6571556982343498e-05, |
| "loss": 0.0533, |
| "step": 27200 |
| }, |
| { |
| "epoch": 0.8736, |
| "grad_norm": 3.6077659130096436, |
| "learning_rate": 1.6558715890850723e-05, |
| "loss": 0.0432, |
| "step": 27300 |
| }, |
| { |
| "epoch": 0.8768, |
| "grad_norm": 27.717470169067383, |
| "learning_rate": 1.6545874799357948e-05, |
| "loss": 0.0599, |
| "step": 27400 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 8.342029571533203, |
| "learning_rate": 1.653303370786517e-05, |
| "loss": 0.066, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.8832, |
| "grad_norm": 0.09634260088205338, |
| "learning_rate": 1.6520192616372394e-05, |
| "loss": 0.0516, |
| "step": 27600 |
| }, |
| { |
| "epoch": 0.8864, |
| "grad_norm": 0.13491930067539215, |
| "learning_rate": 1.6507351524879615e-05, |
| "loss": 0.0539, |
| "step": 27700 |
| }, |
| { |
| "epoch": 0.8896, |
| "grad_norm": 0.23464186489582062, |
| "learning_rate": 1.649451043338684e-05, |
| "loss": 0.0415, |
| "step": 27800 |
| }, |
| { |
| "epoch": 0.8928, |
| "grad_norm": 0.46864306926727295, |
| "learning_rate": 1.648166934189406e-05, |
| "loss": 0.0535, |
| "step": 27900 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 6.844827175140381, |
| "learning_rate": 1.6468828250401286e-05, |
| "loss": 0.064, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.896, |
| "eval_accuracy": 0.98781, |
| "eval_f1": 0.9878109998616666, |
| "eval_loss": 0.04953546077013016, |
| "eval_precision": 0.9878187731391986, |
| "eval_recall": 0.98781, |
| "eval_runtime": 778.6432, |
| "eval_samples_per_second": 128.429, |
| "eval_steps_per_second": 8.027, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.8992, |
| "grad_norm": 0.008244618773460388, |
| "learning_rate": 1.6455987158908508e-05, |
| "loss": 0.041, |
| "step": 28100 |
| }, |
| { |
| "epoch": 0.9024, |
| "grad_norm": 0.01580977439880371, |
| "learning_rate": 1.6443146067415733e-05, |
| "loss": 0.057, |
| "step": 28200 |
| }, |
| { |
| "epoch": 0.9056, |
| "grad_norm": 10.525986671447754, |
| "learning_rate": 1.6430304975922954e-05, |
| "loss": 0.0322, |
| "step": 28300 |
| }, |
| { |
| "epoch": 0.9088, |
| "grad_norm": 0.016806138679385185, |
| "learning_rate": 1.641746388443018e-05, |
| "loss": 0.0523, |
| "step": 28400 |
| }, |
| { |
| "epoch": 0.912, |
| "grad_norm": 6.311640739440918, |
| "learning_rate": 1.64046227929374e-05, |
| "loss": 0.0497, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.9152, |
| "grad_norm": 0.09537007659673691, |
| "learning_rate": 1.6391781701444625e-05, |
| "loss": 0.0523, |
| "step": 28600 |
| }, |
| { |
| "epoch": 0.9184, |
| "grad_norm": 0.010998793877661228, |
| "learning_rate": 1.6378940609951847e-05, |
| "loss": 0.0481, |
| "step": 28700 |
| }, |
| { |
| "epoch": 0.9216, |
| "grad_norm": 0.24172453582286835, |
| "learning_rate": 1.636609951845907e-05, |
| "loss": 0.0537, |
| "step": 28800 |
| }, |
| { |
| "epoch": 0.9248, |
| "grad_norm": 0.00843421183526516, |
| "learning_rate": 1.6353258426966293e-05, |
| "loss": 0.0345, |
| "step": 28900 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 26.664979934692383, |
| "learning_rate": 1.6340417335473518e-05, |
| "loss": 0.0667, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.9312, |
| "grad_norm": 0.3694003224372864, |
| "learning_rate": 1.632757624398074e-05, |
| "loss": 0.0555, |
| "step": 29100 |
| }, |
| { |
| "epoch": 0.9344, |
| "grad_norm": 24.855958938598633, |
| "learning_rate": 1.6314735152487964e-05, |
| "loss": 0.058, |
| "step": 29200 |
| }, |
| { |
| "epoch": 0.9376, |
| "grad_norm": 2.051378011703491, |
| "learning_rate": 1.6301894060995186e-05, |
| "loss": 0.0427, |
| "step": 29300 |
| }, |
| { |
| "epoch": 0.9408, |
| "grad_norm": 0.8996158838272095, |
| "learning_rate": 1.6289052969502407e-05, |
| "loss": 0.0462, |
| "step": 29400 |
| }, |
| { |
| "epoch": 0.944, |
| "grad_norm": 14.316937446594238, |
| "learning_rate": 1.6276211878009632e-05, |
| "loss": 0.0658, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.9472, |
| "grad_norm": 11.959718704223633, |
| "learning_rate": 1.6263370786516853e-05, |
| "loss": 0.0396, |
| "step": 29600 |
| }, |
| { |
| "epoch": 0.9504, |
| "grad_norm": 0.015293744392693043, |
| "learning_rate": 1.6250529695024078e-05, |
| "loss": 0.0489, |
| "step": 29700 |
| }, |
| { |
| "epoch": 0.9536, |
| "grad_norm": 14.037763595581055, |
| "learning_rate": 1.62376886035313e-05, |
| "loss": 0.0746, |
| "step": 29800 |
| }, |
| { |
| "epoch": 0.9568, |
| "grad_norm": 0.04080112278461456, |
| "learning_rate": 1.6224847512038524e-05, |
| "loss": 0.0612, |
| "step": 29900 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.06590835005044937, |
| "learning_rate": 1.6212006420545746e-05, |
| "loss": 0.0597, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.9632, |
| "grad_norm": 8.91706657409668, |
| "learning_rate": 1.619916532905297e-05, |
| "loss": 0.0489, |
| "step": 30100 |
| }, |
| { |
| "epoch": 0.9664, |
| "grad_norm": 0.019029080867767334, |
| "learning_rate": 1.6186324237560192e-05, |
| "loss": 0.0572, |
| "step": 30200 |
| }, |
| { |
| "epoch": 0.9696, |
| "grad_norm": 4.674193382263184, |
| "learning_rate": 1.6173483146067417e-05, |
| "loss": 0.0587, |
| "step": 30300 |
| }, |
| { |
| "epoch": 0.9728, |
| "grad_norm": 0.14981134235858917, |
| "learning_rate": 1.6160642054574642e-05, |
| "loss": 0.0323, |
| "step": 30400 |
| }, |
| { |
| "epoch": 0.976, |
| "grad_norm": 4.462047100067139, |
| "learning_rate": 1.6147800963081863e-05, |
| "loss": 0.053, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.9792, |
| "grad_norm": 0.10449015349149704, |
| "learning_rate": 1.6134959871589088e-05, |
| "loss": 0.0465, |
| "step": 30600 |
| }, |
| { |
| "epoch": 0.9824, |
| "grad_norm": 1.134883999824524, |
| "learning_rate": 1.612211878009631e-05, |
| "loss": 0.0534, |
| "step": 30700 |
| }, |
| { |
| "epoch": 0.9856, |
| "grad_norm": 21.06899070739746, |
| "learning_rate": 1.6109277688603534e-05, |
| "loss": 0.0515, |
| "step": 30800 |
| }, |
| { |
| "epoch": 0.9888, |
| "grad_norm": 20.929187774658203, |
| "learning_rate": 1.6096436597110756e-05, |
| "loss": 0.046, |
| "step": 30900 |
| }, |
| { |
| "epoch": 0.992, |
| "grad_norm": 0.008979029022157192, |
| "learning_rate": 1.608359550561798e-05, |
| "loss": 0.0288, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.9952, |
| "grad_norm": 10.832496643066406, |
| "learning_rate": 1.6070754414125202e-05, |
| "loss": 0.0533, |
| "step": 31100 |
| }, |
| { |
| "epoch": 0.9984, |
| "grad_norm": 10.279513359069824, |
| "learning_rate": 1.6057913322632427e-05, |
| "loss": 0.0535, |
| "step": 31200 |
| }, |
| { |
| "epoch": 1.0016, |
| "grad_norm": 0.050070084631443024, |
| "learning_rate": 1.6045072231139648e-05, |
| "loss": 0.0488, |
| "step": 31300 |
| }, |
| { |
| "epoch": 1.0048, |
| "grad_norm": 7.610085964202881, |
| "learning_rate": 1.6032231139646873e-05, |
| "loss": 0.0499, |
| "step": 31400 |
| }, |
| { |
| "epoch": 1.008, |
| "grad_norm": 0.03847907483577728, |
| "learning_rate": 1.6019390048154095e-05, |
| "loss": 0.0668, |
| "step": 31500 |
| }, |
| { |
| "epoch": 1.0112, |
| "grad_norm": 0.009822272695600986, |
| "learning_rate": 1.600654895666132e-05, |
| "loss": 0.0403, |
| "step": 31600 |
| }, |
| { |
| "epoch": 1.0144, |
| "grad_norm": 0.1967863291501999, |
| "learning_rate": 1.599370786516854e-05, |
| "loss": 0.0565, |
| "step": 31700 |
| }, |
| { |
| "epoch": 1.0176, |
| "grad_norm": 8.522165298461914, |
| "learning_rate": 1.5980866773675762e-05, |
| "loss": 0.0298, |
| "step": 31800 |
| }, |
| { |
| "epoch": 1.0208, |
| "grad_norm": 0.7742573618888855, |
| "learning_rate": 1.5968025682182987e-05, |
| "loss": 0.0411, |
| "step": 31900 |
| }, |
| { |
| "epoch": 1.024, |
| "grad_norm": 0.07143627107143402, |
| "learning_rate": 1.595518459069021e-05, |
| "loss": 0.0477, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.024, |
| "eval_accuracy": 0.98794, |
| "eval_f1": 0.9879419419991998, |
| "eval_loss": 0.04801899939775467, |
| "eval_precision": 0.9879724568651884, |
| "eval_recall": 0.98794, |
| "eval_runtime": 775.4534, |
| "eval_samples_per_second": 128.957, |
| "eval_steps_per_second": 8.06, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.0272, |
| "grad_norm": 1.0557373762130737, |
| "learning_rate": 1.5942343499197433e-05, |
| "loss": 0.0594, |
| "step": 32100 |
| }, |
| { |
| "epoch": 1.0304, |
| "grad_norm": 0.03562343493103981, |
| "learning_rate": 1.5929502407704655e-05, |
| "loss": 0.0537, |
| "step": 32200 |
| }, |
| { |
| "epoch": 1.0336, |
| "grad_norm": 0.06482692807912827, |
| "learning_rate": 1.591666131621188e-05, |
| "loss": 0.0463, |
| "step": 32300 |
| }, |
| { |
| "epoch": 1.0368, |
| "grad_norm": 0.49017927050590515, |
| "learning_rate": 1.59038202247191e-05, |
| "loss": 0.0661, |
| "step": 32400 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 15.849417686462402, |
| "learning_rate": 1.5890979133226326e-05, |
| "loss": 0.0237, |
| "step": 32500 |
| }, |
| { |
| "epoch": 1.0432, |
| "grad_norm": 0.6910932064056396, |
| "learning_rate": 1.5878138041733547e-05, |
| "loss": 0.0383, |
| "step": 32600 |
| }, |
| { |
| "epoch": 1.0464, |
| "grad_norm": 0.06304027885198593, |
| "learning_rate": 1.5865296950240772e-05, |
| "loss": 0.0541, |
| "step": 32700 |
| }, |
| { |
| "epoch": 1.0496, |
| "grad_norm": 11.758397102355957, |
| "learning_rate": 1.5852455858747994e-05, |
| "loss": 0.0573, |
| "step": 32800 |
| }, |
| { |
| "epoch": 1.0528, |
| "grad_norm": 0.007580827921628952, |
| "learning_rate": 1.583961476725522e-05, |
| "loss": 0.0551, |
| "step": 32900 |
| }, |
| { |
| "epoch": 1.056, |
| "grad_norm": 0.7389895915985107, |
| "learning_rate": 1.582677367576244e-05, |
| "loss": 0.0408, |
| "step": 33000 |
| }, |
| { |
| "epoch": 1.0592, |
| "grad_norm": 0.011490071192383766, |
| "learning_rate": 1.5813932584269665e-05, |
| "loss": 0.0474, |
| "step": 33100 |
| }, |
| { |
| "epoch": 1.0624, |
| "grad_norm": 0.00892989058047533, |
| "learning_rate": 1.5801091492776886e-05, |
| "loss": 0.0338, |
| "step": 33200 |
| }, |
| { |
| "epoch": 1.0656, |
| "grad_norm": 0.21890480816364288, |
| "learning_rate": 1.578825040128411e-05, |
| "loss": 0.0465, |
| "step": 33300 |
| }, |
| { |
| "epoch": 1.0688, |
| "grad_norm": 20.05085563659668, |
| "learning_rate": 1.5775409309791332e-05, |
| "loss": 0.0633, |
| "step": 33400 |
| }, |
| { |
| "epoch": 1.072, |
| "grad_norm": 3.5141100883483887, |
| "learning_rate": 1.5762568218298557e-05, |
| "loss": 0.0743, |
| "step": 33500 |
| }, |
| { |
| "epoch": 1.0752, |
| "grad_norm": 0.03382471203804016, |
| "learning_rate": 1.5749727126805782e-05, |
| "loss": 0.0409, |
| "step": 33600 |
| }, |
| { |
| "epoch": 1.0784, |
| "grad_norm": 9.916868209838867, |
| "learning_rate": 1.5736886035313004e-05, |
| "loss": 0.0498, |
| "step": 33700 |
| }, |
| { |
| "epoch": 1.0816, |
| "grad_norm": 0.007973396219313145, |
| "learning_rate": 1.572404494382023e-05, |
| "loss": 0.0399, |
| "step": 33800 |
| }, |
| { |
| "epoch": 1.0848, |
| "grad_norm": 0.15778931975364685, |
| "learning_rate": 1.571120385232745e-05, |
| "loss": 0.0406, |
| "step": 33900 |
| }, |
| { |
| "epoch": 1.088, |
| "grad_norm": 0.027699623256921768, |
| "learning_rate": 1.569836276083467e-05, |
| "loss": 0.0481, |
| "step": 34000 |
| }, |
| { |
| "epoch": 1.0912, |
| "grad_norm": 2.7082841396331787, |
| "learning_rate": 1.5685521669341893e-05, |
| "loss": 0.0427, |
| "step": 34100 |
| }, |
| { |
| "epoch": 1.0944, |
| "grad_norm": 2.933098316192627, |
| "learning_rate": 1.5672680577849118e-05, |
| "loss": 0.0605, |
| "step": 34200 |
| }, |
| { |
| "epoch": 1.0976, |
| "grad_norm": 5.093338489532471, |
| "learning_rate": 1.5659839486356342e-05, |
| "loss": 0.0625, |
| "step": 34300 |
| }, |
| { |
| "epoch": 1.1008, |
| "grad_norm": 0.14764878153800964, |
| "learning_rate": 1.5646998394863564e-05, |
| "loss": 0.0467, |
| "step": 34400 |
| }, |
| { |
| "epoch": 1.104, |
| "grad_norm": 0.45360442996025085, |
| "learning_rate": 1.563415730337079e-05, |
| "loss": 0.0517, |
| "step": 34500 |
| }, |
| { |
| "epoch": 1.1072, |
| "grad_norm": 0.007702784612774849, |
| "learning_rate": 1.562131621187801e-05, |
| "loss": 0.0367, |
| "step": 34600 |
| }, |
| { |
| "epoch": 1.1104, |
| "grad_norm": 0.01513522956520319, |
| "learning_rate": 1.5608475120385235e-05, |
| "loss": 0.0514, |
| "step": 34700 |
| }, |
| { |
| "epoch": 1.1136, |
| "grad_norm": 0.29241982102394104, |
| "learning_rate": 1.5595634028892456e-05, |
| "loss": 0.0428, |
| "step": 34800 |
| }, |
| { |
| "epoch": 1.1168, |
| "grad_norm": 0.02503369376063347, |
| "learning_rate": 1.558279293739968e-05, |
| "loss": 0.0583, |
| "step": 34900 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 0.094393789768219, |
| "learning_rate": 1.5569951845906903e-05, |
| "loss": 0.0636, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1.1232, |
| "grad_norm": 0.011400578543543816, |
| "learning_rate": 1.5557110754414127e-05, |
| "loss": 0.0392, |
| "step": 35100 |
| }, |
| { |
| "epoch": 1.1264, |
| "grad_norm": 0.015260876156389713, |
| "learning_rate": 1.554426966292135e-05, |
| "loss": 0.0452, |
| "step": 35200 |
| }, |
| { |
| "epoch": 1.1296, |
| "grad_norm": 0.07425595074892044, |
| "learning_rate": 1.5531428571428574e-05, |
| "loss": 0.0506, |
| "step": 35300 |
| }, |
| { |
| "epoch": 1.1328, |
| "grad_norm": 2.20249605178833, |
| "learning_rate": 1.5518587479935795e-05, |
| "loss": 0.0742, |
| "step": 35400 |
| }, |
| { |
| "epoch": 1.1360000000000001, |
| "grad_norm": 0.027821656316518784, |
| "learning_rate": 1.550574638844302e-05, |
| "loss": 0.062, |
| "step": 35500 |
| }, |
| { |
| "epoch": 1.1392, |
| "grad_norm": 0.02665848284959793, |
| "learning_rate": 1.549290529695024e-05, |
| "loss": 0.0739, |
| "step": 35600 |
| }, |
| { |
| "epoch": 1.1424, |
| "grad_norm": 5.973049163818359, |
| "learning_rate": 1.5480064205457466e-05, |
| "loss": 0.0504, |
| "step": 35700 |
| }, |
| { |
| "epoch": 1.1456, |
| "grad_norm": 10.988951683044434, |
| "learning_rate": 1.5467223113964688e-05, |
| "loss": 0.0493, |
| "step": 35800 |
| }, |
| { |
| "epoch": 1.1488, |
| "grad_norm": 15.630033493041992, |
| "learning_rate": 1.5454382022471913e-05, |
| "loss": 0.0492, |
| "step": 35900 |
| }, |
| { |
| "epoch": 1.152, |
| "grad_norm": 4.891533851623535, |
| "learning_rate": 1.5441540930979134e-05, |
| "loss": 0.0427, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.152, |
| "eval_accuracy": 0.98938, |
| "eval_f1": 0.9893807849919393, |
| "eval_loss": 0.0424417182803154, |
| "eval_precision": 0.9893870324245919, |
| "eval_recall": 0.98938, |
| "eval_runtime": 778.7856, |
| "eval_samples_per_second": 128.405, |
| "eval_steps_per_second": 8.025, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.1552, |
| "grad_norm": 7.563354969024658, |
| "learning_rate": 1.542869983948636e-05, |
| "loss": 0.0446, |
| "step": 36100 |
| }, |
| { |
| "epoch": 1.1584, |
| "grad_norm": 0.017443129792809486, |
| "learning_rate": 1.541585874799358e-05, |
| "loss": 0.0472, |
| "step": 36200 |
| }, |
| { |
| "epoch": 1.1616, |
| "grad_norm": 0.007690173573791981, |
| "learning_rate": 1.5403017656500805e-05, |
| "loss": 0.0368, |
| "step": 36300 |
| }, |
| { |
| "epoch": 1.1648, |
| "grad_norm": 0.024819310754537582, |
| "learning_rate": 1.5390176565008027e-05, |
| "loss": 0.0466, |
| "step": 36400 |
| }, |
| { |
| "epoch": 1.168, |
| "grad_norm": 1.0305715799331665, |
| "learning_rate": 1.5377335473515248e-05, |
| "loss": 0.0396, |
| "step": 36500 |
| }, |
| { |
| "epoch": 1.1712, |
| "grad_norm": 13.792978286743164, |
| "learning_rate": 1.5364494382022473e-05, |
| "loss": 0.0382, |
| "step": 36600 |
| }, |
| { |
| "epoch": 1.1743999999999999, |
| "grad_norm": 4.012645721435547, |
| "learning_rate": 1.5351653290529694e-05, |
| "loss": 0.0519, |
| "step": 36700 |
| }, |
| { |
| "epoch": 1.1776, |
| "grad_norm": 0.015255521982908249, |
| "learning_rate": 1.533881219903692e-05, |
| "loss": 0.0331, |
| "step": 36800 |
| }, |
| { |
| "epoch": 1.1808, |
| "grad_norm": 0.03518729284405708, |
| "learning_rate": 1.532597110754414e-05, |
| "loss": 0.0442, |
| "step": 36900 |
| }, |
| { |
| "epoch": 1.184, |
| "grad_norm": 0.12012261152267456, |
| "learning_rate": 1.5313130016051365e-05, |
| "loss": 0.0392, |
| "step": 37000 |
| }, |
| { |
| "epoch": 1.1872, |
| "grad_norm": 0.03485884144902229, |
| "learning_rate": 1.5300288924558587e-05, |
| "loss": 0.0214, |
| "step": 37100 |
| }, |
| { |
| "epoch": 1.1904, |
| "grad_norm": 10.218805313110352, |
| "learning_rate": 1.528744783306581e-05, |
| "loss": 0.0528, |
| "step": 37200 |
| }, |
| { |
| "epoch": 1.1936, |
| "grad_norm": 0.0384359173476696, |
| "learning_rate": 1.5274606741573036e-05, |
| "loss": 0.0625, |
| "step": 37300 |
| }, |
| { |
| "epoch": 1.1968, |
| "grad_norm": 0.3779418170452118, |
| "learning_rate": 1.5261765650080258e-05, |
| "loss": 0.0328, |
| "step": 37400 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 5.618625640869141, |
| "learning_rate": 1.5248924558587481e-05, |
| "loss": 0.065, |
| "step": 37500 |
| }, |
| { |
| "epoch": 1.2032, |
| "grad_norm": 2.0705819129943848, |
| "learning_rate": 1.5236083467094704e-05, |
| "loss": 0.0594, |
| "step": 37600 |
| }, |
| { |
| "epoch": 1.2064, |
| "grad_norm": 0.14193743467330933, |
| "learning_rate": 1.5223242375601927e-05, |
| "loss": 0.0417, |
| "step": 37700 |
| }, |
| { |
| "epoch": 1.2096, |
| "grad_norm": 0.034703925251960754, |
| "learning_rate": 1.521040128410915e-05, |
| "loss": 0.0389, |
| "step": 37800 |
| }, |
| { |
| "epoch": 1.2128, |
| "grad_norm": 0.011800256557762623, |
| "learning_rate": 1.5197560192616374e-05, |
| "loss": 0.0393, |
| "step": 37900 |
| }, |
| { |
| "epoch": 1.216, |
| "grad_norm": 0.28774189949035645, |
| "learning_rate": 1.5184719101123597e-05, |
| "loss": 0.0504, |
| "step": 38000 |
| }, |
| { |
| "epoch": 1.2192, |
| "grad_norm": 0.035763729363679886, |
| "learning_rate": 1.517187800963082e-05, |
| "loss": 0.0534, |
| "step": 38100 |
| }, |
| { |
| "epoch": 1.2224, |
| "grad_norm": 0.158742755651474, |
| "learning_rate": 1.5159036918138043e-05, |
| "loss": 0.0436, |
| "step": 38200 |
| }, |
| { |
| "epoch": 1.2256, |
| "grad_norm": 9.794978141784668, |
| "learning_rate": 1.5146195826645266e-05, |
| "loss": 0.0502, |
| "step": 38300 |
| }, |
| { |
| "epoch": 1.2288000000000001, |
| "grad_norm": 0.021368766203522682, |
| "learning_rate": 1.513335473515249e-05, |
| "loss": 0.0286, |
| "step": 38400 |
| }, |
| { |
| "epoch": 1.232, |
| "grad_norm": 0.5888408422470093, |
| "learning_rate": 1.5120513643659714e-05, |
| "loss": 0.0674, |
| "step": 38500 |
| }, |
| { |
| "epoch": 1.2352, |
| "grad_norm": 0.005416017957031727, |
| "learning_rate": 1.5107672552166937e-05, |
| "loss": 0.0381, |
| "step": 38600 |
| }, |
| { |
| "epoch": 1.2384, |
| "grad_norm": 0.03922798112034798, |
| "learning_rate": 1.5094831460674157e-05, |
| "loss": 0.0747, |
| "step": 38700 |
| }, |
| { |
| "epoch": 1.2416, |
| "grad_norm": 0.030901480466127396, |
| "learning_rate": 1.508199036918138e-05, |
| "loss": 0.0491, |
| "step": 38800 |
| }, |
| { |
| "epoch": 1.2448, |
| "grad_norm": 0.02417912147939205, |
| "learning_rate": 1.5069149277688603e-05, |
| "loss": 0.0465, |
| "step": 38900 |
| }, |
| { |
| "epoch": 1.248, |
| "grad_norm": 15.668951988220215, |
| "learning_rate": 1.5056308186195826e-05, |
| "loss": 0.0535, |
| "step": 39000 |
| }, |
| { |
| "epoch": 1.2511999999999999, |
| "grad_norm": 0.21102702617645264, |
| "learning_rate": 1.504346709470305e-05, |
| "loss": 0.0517, |
| "step": 39100 |
| }, |
| { |
| "epoch": 1.2544, |
| "grad_norm": 0.06641241163015366, |
| "learning_rate": 1.5030626003210274e-05, |
| "loss": 0.0416, |
| "step": 39200 |
| }, |
| { |
| "epoch": 1.2576, |
| "grad_norm": 0.5495890974998474, |
| "learning_rate": 1.5017784911717497e-05, |
| "loss": 0.0357, |
| "step": 39300 |
| }, |
| { |
| "epoch": 1.2608, |
| "grad_norm": 0.035381533205509186, |
| "learning_rate": 1.500494382022472e-05, |
| "loss": 0.0577, |
| "step": 39400 |
| }, |
| { |
| "epoch": 1.264, |
| "grad_norm": 0.03879441320896149, |
| "learning_rate": 1.4992102728731944e-05, |
| "loss": 0.0191, |
| "step": 39500 |
| }, |
| { |
| "epoch": 1.2671999999999999, |
| "grad_norm": 0.014720222912728786, |
| "learning_rate": 1.4979261637239167e-05, |
| "loss": 0.0423, |
| "step": 39600 |
| }, |
| { |
| "epoch": 1.2704, |
| "grad_norm": 3.2292592525482178, |
| "learning_rate": 1.496642054574639e-05, |
| "loss": 0.0602, |
| "step": 39700 |
| }, |
| { |
| "epoch": 1.2736, |
| "grad_norm": 1.6030577421188354, |
| "learning_rate": 1.4953579454253613e-05, |
| "loss": 0.0543, |
| "step": 39800 |
| }, |
| { |
| "epoch": 1.2768, |
| "grad_norm": 0.031688716262578964, |
| "learning_rate": 1.4940738362760836e-05, |
| "loss": 0.0341, |
| "step": 39900 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 9.190576553344727, |
| "learning_rate": 1.492789727126806e-05, |
| "loss": 0.0381, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.28, |
| "eval_accuracy": 0.98796, |
| "eval_f1": 0.9879625821520611, |
| "eval_loss": 0.04839452728629112, |
| "eval_precision": 0.9880194851769686, |
| "eval_recall": 0.98796, |
| "eval_runtime": 777.527, |
| "eval_samples_per_second": 128.613, |
| "eval_steps_per_second": 8.038, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.2832, |
| "grad_norm": 0.879464328289032, |
| "learning_rate": 1.4915056179775283e-05, |
| "loss": 0.0371, |
| "step": 40100 |
| }, |
| { |
| "epoch": 1.2864, |
| "grad_norm": 0.5318993926048279, |
| "learning_rate": 1.4902215088282506e-05, |
| "loss": 0.0638, |
| "step": 40200 |
| }, |
| { |
| "epoch": 1.2896, |
| "grad_norm": 0.024928994476795197, |
| "learning_rate": 1.4889373996789729e-05, |
| "loss": 0.039, |
| "step": 40300 |
| }, |
| { |
| "epoch": 1.2928, |
| "grad_norm": 15.540450096130371, |
| "learning_rate": 1.4876532905296952e-05, |
| "loss": 0.0392, |
| "step": 40400 |
| }, |
| { |
| "epoch": 1.296, |
| "grad_norm": 3.986953020095825, |
| "learning_rate": 1.4863691813804175e-05, |
| "loss": 0.0518, |
| "step": 40500 |
| }, |
| { |
| "epoch": 1.2992, |
| "grad_norm": 0.03195634484291077, |
| "learning_rate": 1.4850850722311398e-05, |
| "loss": 0.0614, |
| "step": 40600 |
| }, |
| { |
| "epoch": 1.3024, |
| "grad_norm": 0.004710075911134481, |
| "learning_rate": 1.4838009630818621e-05, |
| "loss": 0.0384, |
| "step": 40700 |
| }, |
| { |
| "epoch": 1.3056, |
| "grad_norm": 0.08971494436264038, |
| "learning_rate": 1.4825168539325845e-05, |
| "loss": 0.051, |
| "step": 40800 |
| }, |
| { |
| "epoch": 1.3088, |
| "grad_norm": 0.00958671048283577, |
| "learning_rate": 1.4812327447833068e-05, |
| "loss": 0.042, |
| "step": 40900 |
| }, |
| { |
| "epoch": 1.312, |
| "grad_norm": 26.961130142211914, |
| "learning_rate": 1.4799486356340289e-05, |
| "loss": 0.0512, |
| "step": 41000 |
| }, |
| { |
| "epoch": 1.3152, |
| "grad_norm": 0.12558290362358093, |
| "learning_rate": 1.4786645264847512e-05, |
| "loss": 0.0262, |
| "step": 41100 |
| }, |
| { |
| "epoch": 1.3184, |
| "grad_norm": 0.020398223772644997, |
| "learning_rate": 1.4773804173354735e-05, |
| "loss": 0.0504, |
| "step": 41200 |
| }, |
| { |
| "epoch": 1.3216, |
| "grad_norm": 0.04079282656311989, |
| "learning_rate": 1.4760963081861959e-05, |
| "loss": 0.0467, |
| "step": 41300 |
| }, |
| { |
| "epoch": 1.3248, |
| "grad_norm": 0.01801035739481449, |
| "learning_rate": 1.4748121990369182e-05, |
| "loss": 0.031, |
| "step": 41400 |
| }, |
| { |
| "epoch": 1.328, |
| "grad_norm": 19.165552139282227, |
| "learning_rate": 1.4735280898876405e-05, |
| "loss": 0.0425, |
| "step": 41500 |
| }, |
| { |
| "epoch": 1.3312, |
| "grad_norm": 0.06247144564986229, |
| "learning_rate": 1.4722439807383628e-05, |
| "loss": 0.0377, |
| "step": 41600 |
| }, |
| { |
| "epoch": 1.3344, |
| "grad_norm": 0.07584625482559204, |
| "learning_rate": 1.4709598715890851e-05, |
| "loss": 0.0318, |
| "step": 41700 |
| }, |
| { |
| "epoch": 1.3376000000000001, |
| "grad_norm": 0.659372866153717, |
| "learning_rate": 1.4696757624398074e-05, |
| "loss": 0.0392, |
| "step": 41800 |
| }, |
| { |
| "epoch": 1.3408, |
| "grad_norm": 0.027756713330745697, |
| "learning_rate": 1.4683916532905297e-05, |
| "loss": 0.0518, |
| "step": 41900 |
| }, |
| { |
| "epoch": 1.3439999999999999, |
| "grad_norm": 0.006904853507876396, |
| "learning_rate": 1.467107544141252e-05, |
| "loss": 0.0456, |
| "step": 42000 |
| }, |
| { |
| "epoch": 1.3472, |
| "grad_norm": 0.005585466045886278, |
| "learning_rate": 1.4658234349919744e-05, |
| "loss": 0.046, |
| "step": 42100 |
| }, |
| { |
| "epoch": 1.3504, |
| "grad_norm": 5.473335266113281, |
| "learning_rate": 1.4645393258426967e-05, |
| "loss": 0.0428, |
| "step": 42200 |
| }, |
| { |
| "epoch": 1.3536000000000001, |
| "grad_norm": 10.384184837341309, |
| "learning_rate": 1.4632552166934192e-05, |
| "loss": 0.0512, |
| "step": 42300 |
| }, |
| { |
| "epoch": 1.3568, |
| "grad_norm": 4.152897357940674, |
| "learning_rate": 1.4619711075441415e-05, |
| "loss": 0.0378, |
| "step": 42400 |
| }, |
| { |
| "epoch": 1.3599999999999999, |
| "grad_norm": 0.06695935130119324, |
| "learning_rate": 1.4606869983948638e-05, |
| "loss": 0.0411, |
| "step": 42500 |
| }, |
| { |
| "epoch": 1.3632, |
| "grad_norm": 21.025299072265625, |
| "learning_rate": 1.4594028892455861e-05, |
| "loss": 0.0373, |
| "step": 42600 |
| }, |
| { |
| "epoch": 1.3664, |
| "grad_norm": 13.606021881103516, |
| "learning_rate": 1.4581187800963084e-05, |
| "loss": 0.0454, |
| "step": 42700 |
| }, |
| { |
| "epoch": 1.3696, |
| "grad_norm": 0.17352361977100372, |
| "learning_rate": 1.4568346709470307e-05, |
| "loss": 0.0513, |
| "step": 42800 |
| }, |
| { |
| "epoch": 1.3728, |
| "grad_norm": 1.2343215942382812, |
| "learning_rate": 1.455550561797753e-05, |
| "loss": 0.0699, |
| "step": 42900 |
| }, |
| { |
| "epoch": 1.376, |
| "grad_norm": 0.01240515150129795, |
| "learning_rate": 1.4542664526484754e-05, |
| "loss": 0.0402, |
| "step": 43000 |
| }, |
| { |
| "epoch": 1.3792, |
| "grad_norm": 5.457210063934326, |
| "learning_rate": 1.4529823434991977e-05, |
| "loss": 0.0519, |
| "step": 43100 |
| }, |
| { |
| "epoch": 1.3824, |
| "grad_norm": 0.8150522708892822, |
| "learning_rate": 1.45169823434992e-05, |
| "loss": 0.0605, |
| "step": 43200 |
| }, |
| { |
| "epoch": 1.3856, |
| "grad_norm": 0.046282608062028885, |
| "learning_rate": 1.4504141252006421e-05, |
| "loss": 0.0523, |
| "step": 43300 |
| }, |
| { |
| "epoch": 1.3888, |
| "grad_norm": 0.009331628680229187, |
| "learning_rate": 1.4491300160513644e-05, |
| "loss": 0.0453, |
| "step": 43400 |
| }, |
| { |
| "epoch": 1.392, |
| "grad_norm": 0.15661238133907318, |
| "learning_rate": 1.4478459069020868e-05, |
| "loss": 0.0303, |
| "step": 43500 |
| }, |
| { |
| "epoch": 1.3952, |
| "grad_norm": 5.842204570770264, |
| "learning_rate": 1.446561797752809e-05, |
| "loss": 0.0369, |
| "step": 43600 |
| }, |
| { |
| "epoch": 1.3984, |
| "grad_norm": 32.753719329833984, |
| "learning_rate": 1.4452776886035314e-05, |
| "loss": 0.0423, |
| "step": 43700 |
| }, |
| { |
| "epoch": 1.4016, |
| "grad_norm": 0.0857323631644249, |
| "learning_rate": 1.4439935794542537e-05, |
| "loss": 0.0452, |
| "step": 43800 |
| }, |
| { |
| "epoch": 1.4048, |
| "grad_norm": 0.03770207613706589, |
| "learning_rate": 1.442709470304976e-05, |
| "loss": 0.0455, |
| "step": 43900 |
| }, |
| { |
| "epoch": 1.408, |
| "grad_norm": 0.10206503421068192, |
| "learning_rate": 1.4414253611556983e-05, |
| "loss": 0.0423, |
| "step": 44000 |
| }, |
| { |
| "epoch": 1.408, |
| "eval_accuracy": 0.99009, |
| "eval_f1": 0.9900904784547742, |
| "eval_loss": 0.03986356034874916, |
| "eval_precision": 0.9900932283159651, |
| "eval_recall": 0.99009, |
| "eval_runtime": 775.1079, |
| "eval_samples_per_second": 129.014, |
| "eval_steps_per_second": 8.063, |
| "step": 44000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 156250, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 4000, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 3, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.8683917813152307e+17, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|