| { |
| "best_global_step": 28000, |
| "best_metric": 0.9878109998616666, |
| "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-28000", |
| "epoch": 0.896, |
| "eval_steps": 4000, |
| "global_step": 28000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0032, |
| "grad_norm": 1.3923053741455078, |
| "learning_rate": 3.96e-06, |
| "loss": 0.685, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0064, |
| "grad_norm": 1.665050983428955, |
| "learning_rate": 7.960000000000002e-06, |
| "loss": 0.6654, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0096, |
| "grad_norm": 3.1655123233795166, |
| "learning_rate": 1.196e-05, |
| "loss": 0.6169, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.0128, |
| "grad_norm": 3.3807435035705566, |
| "learning_rate": 1.5960000000000003e-05, |
| "loss": 0.4635, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 0.7963143587112427, |
| "learning_rate": 1.9960000000000002e-05, |
| "loss": 0.2886, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.0192, |
| "grad_norm": 1.5362155437469482, |
| "learning_rate": 1.9987287319422154e-05, |
| "loss": 0.2605, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.0224, |
| "grad_norm": 3.649484395980835, |
| "learning_rate": 1.9974446227929375e-05, |
| "loss": 0.2194, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.0256, |
| "grad_norm": 1.402876377105713, |
| "learning_rate": 1.99616051364366e-05, |
| "loss": 0.2367, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.0288, |
| "grad_norm": 2.708932399749756, |
| "learning_rate": 1.994876404494382e-05, |
| "loss": 0.2061, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 2.4827463626861572, |
| "learning_rate": 1.9935922953451046e-05, |
| "loss": 0.1892, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.0352, |
| "grad_norm": 0.9402571320533752, |
| "learning_rate": 1.9923081861958268e-05, |
| "loss": 0.1819, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.0384, |
| "grad_norm": 0.8088085055351257, |
| "learning_rate": 1.9910240770465493e-05, |
| "loss": 0.1894, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.0416, |
| "grad_norm": 2.8913955688476562, |
| "learning_rate": 1.9897399678972714e-05, |
| "loss": 0.1733, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.0448, |
| "grad_norm": 1.9755029678344727, |
| "learning_rate": 1.988455858747994e-05, |
| "loss": 0.1748, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 2.659393310546875, |
| "learning_rate": 1.987171749598716e-05, |
| "loss": 0.1557, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.0512, |
| "grad_norm": 10.118029594421387, |
| "learning_rate": 1.9858876404494382e-05, |
| "loss": 0.1702, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.0544, |
| "grad_norm": 3.736616373062134, |
| "learning_rate": 1.9846035313001607e-05, |
| "loss": 0.1694, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.0576, |
| "grad_norm": 4.140033721923828, |
| "learning_rate": 1.9833194221508828e-05, |
| "loss": 0.1615, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.0608, |
| "grad_norm": 4.504345893859863, |
| "learning_rate": 1.9820353130016053e-05, |
| "loss": 0.1425, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 5.786899089813232, |
| "learning_rate": 1.9807512038523274e-05, |
| "loss": 0.1588, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.0672, |
| "grad_norm": 9.956130027770996, |
| "learning_rate": 1.97946709470305e-05, |
| "loss": 0.1399, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.0704, |
| "grad_norm": 11.86201286315918, |
| "learning_rate": 1.978182985553772e-05, |
| "loss": 0.173, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.0736, |
| "grad_norm": 0.6308254599571228, |
| "learning_rate": 1.9768988764044946e-05, |
| "loss": 0.1428, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.0768, |
| "grad_norm": 3.123718023300171, |
| "learning_rate": 1.9756147672552167e-05, |
| "loss": 0.1365, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 3.8000500202178955, |
| "learning_rate": 1.9743306581059392e-05, |
| "loss": 0.1528, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.0832, |
| "grad_norm": 2.9149184226989746, |
| "learning_rate": 1.9730465489566613e-05, |
| "loss": 0.1568, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.0864, |
| "grad_norm": 8.345555305480957, |
| "learning_rate": 1.9717624398073838e-05, |
| "loss": 0.139, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.0896, |
| "grad_norm": 1.785736322402954, |
| "learning_rate": 1.970478330658106e-05, |
| "loss": 0.1509, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.0928, |
| "grad_norm": 5.256857872009277, |
| "learning_rate": 1.9691942215088284e-05, |
| "loss": 0.1284, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 2.305225133895874, |
| "learning_rate": 1.967910112359551e-05, |
| "loss": 0.1249, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.0992, |
| "grad_norm": 1.1149404048919678, |
| "learning_rate": 1.966626003210273e-05, |
| "loss": 0.1223, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.1024, |
| "grad_norm": 0.24498392641544342, |
| "learning_rate": 1.9653418940609955e-05, |
| "loss": 0.1415, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.1056, |
| "grad_norm": 3.027209520339966, |
| "learning_rate": 1.9640577849117177e-05, |
| "loss": 0.133, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.1088, |
| "grad_norm": 10.783885955810547, |
| "learning_rate": 1.9627736757624402e-05, |
| "loss": 0.1309, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 7.116244316101074, |
| "learning_rate": 1.9614895666131623e-05, |
| "loss": 0.1056, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.1152, |
| "grad_norm": 8.193492889404297, |
| "learning_rate": 1.9602054574638848e-05, |
| "loss": 0.1201, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.1184, |
| "grad_norm": 0.9174596071243286, |
| "learning_rate": 1.958921348314607e-05, |
| "loss": 0.12, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.1216, |
| "grad_norm": 3.436502695083618, |
| "learning_rate": 1.957637239165329e-05, |
| "loss": 0.1257, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.1248, |
| "grad_norm": 9.131117820739746, |
| "learning_rate": 1.9563531300160516e-05, |
| "loss": 0.1139, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 0.3150777518749237, |
| "learning_rate": 1.9550690208667737e-05, |
| "loss": 0.1381, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.128, |
| "eval_accuracy": 0.9586, |
| "eval_f1": 0.9586218246722099, |
| "eval_loss": 0.1626722663640976, |
| "eval_precision": 0.9598553600795934, |
| "eval_recall": 0.9586, |
| "eval_runtime": 815.7025, |
| "eval_samples_per_second": 122.594, |
| "eval_steps_per_second": 7.662, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.1312, |
| "grad_norm": 8.65023422241211, |
| "learning_rate": 1.9537849117174962e-05, |
| "loss": 0.1026, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.1344, |
| "grad_norm": 8.46996784210205, |
| "learning_rate": 1.9525008025682183e-05, |
| "loss": 0.1251, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.1376, |
| "grad_norm": 6.439260959625244, |
| "learning_rate": 1.9512166934189408e-05, |
| "loss": 0.1338, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.1408, |
| "grad_norm": 1.012399435043335, |
| "learning_rate": 1.949932584269663e-05, |
| "loss": 0.1164, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 0.33647093176841736, |
| "learning_rate": 1.9486484751203855e-05, |
| "loss": 0.1156, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.1472, |
| "grad_norm": 7.39678955078125, |
| "learning_rate": 1.9473643659711076e-05, |
| "loss": 0.1013, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.1504, |
| "grad_norm": 4.556000709533691, |
| "learning_rate": 1.94608025682183e-05, |
| "loss": 0.1038, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.1536, |
| "grad_norm": 0.5087370276451111, |
| "learning_rate": 1.9447961476725522e-05, |
| "loss": 0.1319, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.1568, |
| "grad_norm": 4.385759353637695, |
| "learning_rate": 1.9435120385232747e-05, |
| "loss": 0.1229, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 5.46568489074707, |
| "learning_rate": 1.942227929373997e-05, |
| "loss": 0.1234, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.1632, |
| "grad_norm": 3.3565216064453125, |
| "learning_rate": 1.9409438202247193e-05, |
| "loss": 0.1007, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.1664, |
| "grad_norm": 0.17969129979610443, |
| "learning_rate": 1.9396597110754415e-05, |
| "loss": 0.108, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.1696, |
| "grad_norm": 8.49416732788086, |
| "learning_rate": 1.938375601926164e-05, |
| "loss": 0.1105, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.1728, |
| "grad_norm": 6.072606086730957, |
| "learning_rate": 1.937091492776886e-05, |
| "loss": 0.1082, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 1.2361280918121338, |
| "learning_rate": 1.9358073836276086e-05, |
| "loss": 0.0966, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.1792, |
| "grad_norm": 6.232377052307129, |
| "learning_rate": 1.9345232744783307e-05, |
| "loss": 0.1151, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.1824, |
| "grad_norm": 0.6630802154541016, |
| "learning_rate": 1.9332391653290532e-05, |
| "loss": 0.104, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.1856, |
| "grad_norm": 0.2744814157485962, |
| "learning_rate": 1.9319550561797754e-05, |
| "loss": 0.104, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.1888, |
| "grad_norm": 0.37808698415756226, |
| "learning_rate": 1.930670947030498e-05, |
| "loss": 0.1183, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 11.47230052947998, |
| "learning_rate": 1.9293868378812203e-05, |
| "loss": 0.1258, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.1952, |
| "grad_norm": 7.168028354644775, |
| "learning_rate": 1.928102728731942e-05, |
| "loss": 0.1117, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.1984, |
| "grad_norm": 0.11318526417016983, |
| "learning_rate": 1.9268186195826646e-05, |
| "loss": 0.1004, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.2016, |
| "grad_norm": 9.405433654785156, |
| "learning_rate": 1.9255345104333868e-05, |
| "loss": 0.1195, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.2048, |
| "grad_norm": 0.507453978061676, |
| "learning_rate": 1.9242504012841092e-05, |
| "loss": 0.0987, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 0.15791097283363342, |
| "learning_rate": 1.9229662921348314e-05, |
| "loss": 0.1156, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.2112, |
| "grad_norm": 5.216452121734619, |
| "learning_rate": 1.921682182985554e-05, |
| "loss": 0.1375, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.2144, |
| "grad_norm": 0.6168243885040283, |
| "learning_rate": 1.9203980738362764e-05, |
| "loss": 0.1086, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.2176, |
| "grad_norm": 2.949383497238159, |
| "learning_rate": 1.9191139646869985e-05, |
| "loss": 0.0957, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.2208, |
| "grad_norm": 9.519874572753906, |
| "learning_rate": 1.917829855537721e-05, |
| "loss": 0.0935, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 7.931914329528809, |
| "learning_rate": 1.916545746388443e-05, |
| "loss": 0.1335, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.2272, |
| "grad_norm": 8.498374938964844, |
| "learning_rate": 1.9152616372391656e-05, |
| "loss": 0.1025, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.2304, |
| "grad_norm": 5.054536819458008, |
| "learning_rate": 1.9139775280898878e-05, |
| "loss": 0.1165, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.2336, |
| "grad_norm": 0.17365020513534546, |
| "learning_rate": 1.9126934189406102e-05, |
| "loss": 0.0996, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.2368, |
| "grad_norm": 2.247058629989624, |
| "learning_rate": 1.9114093097913324e-05, |
| "loss": 0.1009, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 15.367574691772461, |
| "learning_rate": 1.910125200642055e-05, |
| "loss": 0.1082, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.2432, |
| "grad_norm": 6.72482967376709, |
| "learning_rate": 1.908841091492777e-05, |
| "loss": 0.1308, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.2464, |
| "grad_norm": 0.051803406327962875, |
| "learning_rate": 1.9075569823434995e-05, |
| "loss": 0.1031, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.2496, |
| "grad_norm": 9.355685234069824, |
| "learning_rate": 1.9062728731942216e-05, |
| "loss": 0.1012, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.2528, |
| "grad_norm": 2.047060012817383, |
| "learning_rate": 1.904988764044944e-05, |
| "loss": 0.1249, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 4.134668350219727, |
| "learning_rate": 1.9037046548956663e-05, |
| "loss": 0.0821, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.256, |
| "eval_accuracy": 0.97609, |
| "eval_f1": 0.9760920422665103, |
| "eval_loss": 0.10814645141363144, |
| "eval_precision": 0.9761013685233434, |
| "eval_recall": 0.97609, |
| "eval_runtime": 817.6077, |
| "eval_samples_per_second": 122.308, |
| "eval_steps_per_second": 7.644, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.2592, |
| "grad_norm": 6.723151206970215, |
| "learning_rate": 1.9024205457463887e-05, |
| "loss": 0.0799, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.2624, |
| "grad_norm": 1.805972933769226, |
| "learning_rate": 1.901136436597111e-05, |
| "loss": 0.1211, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.2656, |
| "grad_norm": 11.118502616882324, |
| "learning_rate": 1.8998523274478334e-05, |
| "loss": 0.0916, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.2688, |
| "grad_norm": 2.3953633308410645, |
| "learning_rate": 1.8985682182985555e-05, |
| "loss": 0.0878, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 0.10008874535560608, |
| "learning_rate": 1.8972841091492777e-05, |
| "loss": 0.0868, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.2752, |
| "grad_norm": 0.06934285163879395, |
| "learning_rate": 1.896e-05, |
| "loss": 0.1155, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.2784, |
| "grad_norm": 0.3781879246234894, |
| "learning_rate": 1.8947158908507223e-05, |
| "loss": 0.0988, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.2816, |
| "grad_norm": 9.6624174118042, |
| "learning_rate": 1.8934317817014448e-05, |
| "loss": 0.1072, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.2848, |
| "grad_norm": 0.09044591337442398, |
| "learning_rate": 1.892147672552167e-05, |
| "loss": 0.1085, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 0.0671633929014206, |
| "learning_rate": 1.8908635634028894e-05, |
| "loss": 0.1081, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.2912, |
| "grad_norm": 8.130873680114746, |
| "learning_rate": 1.8895794542536115e-05, |
| "loss": 0.0968, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.2944, |
| "grad_norm": 1.2537904977798462, |
| "learning_rate": 1.888295345104334e-05, |
| "loss": 0.0877, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.2976, |
| "grad_norm": 6.084417819976807, |
| "learning_rate": 1.887011235955056e-05, |
| "loss": 0.1022, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.3008, |
| "grad_norm": 6.140512943267822, |
| "learning_rate": 1.8857271268057787e-05, |
| "loss": 0.1016, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 1.7347182035446167, |
| "learning_rate": 1.8844430176565008e-05, |
| "loss": 0.092, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.3072, |
| "grad_norm": 0.6796423196792603, |
| "learning_rate": 1.8831589085072233e-05, |
| "loss": 0.1172, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.3104, |
| "grad_norm": 10.664779663085938, |
| "learning_rate": 1.8818747993579454e-05, |
| "loss": 0.0808, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.3136, |
| "grad_norm": 0.4076235294342041, |
| "learning_rate": 1.880590690208668e-05, |
| "loss": 0.0836, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.3168, |
| "grad_norm": 9.418440818786621, |
| "learning_rate": 1.8793065810593904e-05, |
| "loss": 0.0675, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 9.2078857421875, |
| "learning_rate": 1.8780224719101125e-05, |
| "loss": 0.0686, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.3232, |
| "grad_norm": 0.47173646092414856, |
| "learning_rate": 1.876738362760835e-05, |
| "loss": 0.1096, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.3264, |
| "grad_norm": 4.5297322273254395, |
| "learning_rate": 1.875454253611557e-05, |
| "loss": 0.098, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.3296, |
| "grad_norm": 5.099269866943359, |
| "learning_rate": 1.8741701444622796e-05, |
| "loss": 0.1063, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.3328, |
| "grad_norm": 2.588848114013672, |
| "learning_rate": 1.8728860353130018e-05, |
| "loss": 0.0989, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 4.629786968231201, |
| "learning_rate": 1.8716019261637243e-05, |
| "loss": 0.1018, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.3392, |
| "grad_norm": 11.187308311462402, |
| "learning_rate": 1.8703178170144464e-05, |
| "loss": 0.0864, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.3424, |
| "grad_norm": 2.476482391357422, |
| "learning_rate": 1.869033707865169e-05, |
| "loss": 0.0744, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.3456, |
| "grad_norm": 17.418149948120117, |
| "learning_rate": 1.867749598715891e-05, |
| "loss": 0.1189, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.3488, |
| "grad_norm": 11.753310203552246, |
| "learning_rate": 1.8664654895666132e-05, |
| "loss": 0.0832, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 0.41917338967323303, |
| "learning_rate": 1.8651813804173357e-05, |
| "loss": 0.1063, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.3552, |
| "grad_norm": 14.072111129760742, |
| "learning_rate": 1.8638972712680578e-05, |
| "loss": 0.1061, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.3584, |
| "grad_norm": 2.6141397953033447, |
| "learning_rate": 1.8626131621187803e-05, |
| "loss": 0.0934, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.3616, |
| "grad_norm": 3.1363914012908936, |
| "learning_rate": 1.8613290529695024e-05, |
| "loss": 0.0879, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.3648, |
| "grad_norm": 4.260811805725098, |
| "learning_rate": 1.860044943820225e-05, |
| "loss": 0.092, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 0.2677570879459381, |
| "learning_rate": 1.858760834670947e-05, |
| "loss": 0.1202, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.3712, |
| "grad_norm": 0.056061357259750366, |
| "learning_rate": 1.8574767255216696e-05, |
| "loss": 0.0773, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.3744, |
| "grad_norm": 7.95279598236084, |
| "learning_rate": 1.8561926163723917e-05, |
| "loss": 0.0749, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.3776, |
| "grad_norm": 7.4200873374938965, |
| "learning_rate": 1.8549085072231142e-05, |
| "loss": 0.1054, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.3808, |
| "grad_norm": 0.8819625973701477, |
| "learning_rate": 1.8536243980738363e-05, |
| "loss": 0.0998, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 6.32806396484375, |
| "learning_rate": 1.8523402889245588e-05, |
| "loss": 0.0667, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.384, |
| "eval_accuracy": 0.97856, |
| "eval_f1": 0.97856556986665, |
| "eval_loss": 0.1008467897772789, |
| "eval_precision": 0.9786554480535211, |
| "eval_recall": 0.97856, |
| "eval_runtime": 822.182, |
| "eval_samples_per_second": 121.628, |
| "eval_steps_per_second": 7.602, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.3872, |
| "grad_norm": 11.900330543518066, |
| "learning_rate": 1.851056179775281e-05, |
| "loss": 0.0958, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.3904, |
| "grad_norm": 18.242124557495117, |
| "learning_rate": 1.8497720706260034e-05, |
| "loss": 0.075, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.3936, |
| "grad_norm": 0.0790882408618927, |
| "learning_rate": 1.8484879614767256e-05, |
| "loss": 0.071, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.3968, |
| "grad_norm": 16.233280181884766, |
| "learning_rate": 1.847203852327448e-05, |
| "loss": 0.1109, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 12.048758506774902, |
| "learning_rate": 1.8459197431781702e-05, |
| "loss": 0.0703, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.4032, |
| "grad_norm": 9.086562156677246, |
| "learning_rate": 1.8446356340288927e-05, |
| "loss": 0.0706, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.4064, |
| "grad_norm": 9.871477127075195, |
| "learning_rate": 1.843351524879615e-05, |
| "loss": 0.0745, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.4096, |
| "grad_norm": 0.7136130928993225, |
| "learning_rate": 1.8420674157303373e-05, |
| "loss": 0.0544, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.4128, |
| "grad_norm": 10.892882347106934, |
| "learning_rate": 1.8407833065810598e-05, |
| "loss": 0.1109, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 1.3350844383239746, |
| "learning_rate": 1.839499197431782e-05, |
| "loss": 0.0898, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.4192, |
| "grad_norm": 11.215353012084961, |
| "learning_rate": 1.838215088282504e-05, |
| "loss": 0.0963, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.4224, |
| "grad_norm": 0.2309681475162506, |
| "learning_rate": 1.8369309791332262e-05, |
| "loss": 0.0785, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.4256, |
| "grad_norm": 0.0822586640715599, |
| "learning_rate": 1.8356468699839487e-05, |
| "loss": 0.0736, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.4288, |
| "grad_norm": 0.6296452283859253, |
| "learning_rate": 1.834362760834671e-05, |
| "loss": 0.1064, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 7.989764213562012, |
| "learning_rate": 1.8330786516853933e-05, |
| "loss": 0.0885, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.4352, |
| "grad_norm": 0.04016461223363876, |
| "learning_rate": 1.8317945425361158e-05, |
| "loss": 0.0574, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.4384, |
| "grad_norm": 0.03219222649931908, |
| "learning_rate": 1.830510433386838e-05, |
| "loss": 0.0742, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.4416, |
| "grad_norm": 11.071674346923828, |
| "learning_rate": 1.8292263242375605e-05, |
| "loss": 0.0968, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.4448, |
| "grad_norm": 22.920804977416992, |
| "learning_rate": 1.8279422150882826e-05, |
| "loss": 0.0782, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 0.05693759024143219, |
| "learning_rate": 1.826658105939005e-05, |
| "loss": 0.0538, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.4512, |
| "grad_norm": 0.08659256994724274, |
| "learning_rate": 1.8253739967897272e-05, |
| "loss": 0.0699, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.4544, |
| "grad_norm": 3.1322948932647705, |
| "learning_rate": 1.8240898876404497e-05, |
| "loss": 0.0841, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.4576, |
| "grad_norm": 0.025594089180231094, |
| "learning_rate": 1.822805778491172e-05, |
| "loss": 0.1108, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.4608, |
| "grad_norm": 0.5694848299026489, |
| "learning_rate": 1.8215216693418943e-05, |
| "loss": 0.0705, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 5.643801212310791, |
| "learning_rate": 1.8202375601926165e-05, |
| "loss": 0.0773, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.4672, |
| "grad_norm": 1.6325165033340454, |
| "learning_rate": 1.818953451043339e-05, |
| "loss": 0.1244, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.4704, |
| "grad_norm": 0.04731294512748718, |
| "learning_rate": 1.817669341894061e-05, |
| "loss": 0.0523, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.4736, |
| "grad_norm": 9.385772705078125, |
| "learning_rate": 1.8163852327447836e-05, |
| "loss": 0.0739, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.4768, |
| "grad_norm": 0.04752274602651596, |
| "learning_rate": 1.8151011235955057e-05, |
| "loss": 0.0636, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.4523526430130005, |
| "learning_rate": 1.8138170144462282e-05, |
| "loss": 0.0907, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.4832, |
| "grad_norm": 0.10660147666931152, |
| "learning_rate": 1.8125329052969504e-05, |
| "loss": 0.1093, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.4864, |
| "grad_norm": 0.024508710950613022, |
| "learning_rate": 1.811248796147673e-05, |
| "loss": 0.0562, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.4896, |
| "grad_norm": 4.832937240600586, |
| "learning_rate": 1.809964686998395e-05, |
| "loss": 0.0694, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.4928, |
| "grad_norm": 12.76455020904541, |
| "learning_rate": 1.808680577849117e-05, |
| "loss": 0.0525, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 0.244754359126091, |
| "learning_rate": 1.8073964686998396e-05, |
| "loss": 0.0632, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.4992, |
| "grad_norm": 19.315397262573242, |
| "learning_rate": 1.8061123595505618e-05, |
| "loss": 0.0794, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.5024, |
| "grad_norm": 0.05077612027525902, |
| "learning_rate": 1.8048282504012842e-05, |
| "loss": 0.0848, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.5056, |
| "grad_norm": 0.33186858892440796, |
| "learning_rate": 1.8035441412520064e-05, |
| "loss": 0.0894, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.5088, |
| "grad_norm": 0.26919984817504883, |
| "learning_rate": 1.802260032102729e-05, |
| "loss": 0.0801, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 3.566136121749878, |
| "learning_rate": 1.800975922953451e-05, |
| "loss": 0.0754, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.512, |
| "eval_accuracy": 0.98201, |
| "eval_f1": 0.9820134202589396, |
| "eval_loss": 0.07791993767023087, |
| "eval_precision": 0.9820579125315673, |
| "eval_recall": 0.98201, |
| "eval_runtime": 823.0886, |
| "eval_samples_per_second": 121.494, |
| "eval_steps_per_second": 7.593, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.5152, |
| "grad_norm": 8.349530220031738, |
| "learning_rate": 1.7996918138041735e-05, |
| "loss": 0.0724, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.5184, |
| "grad_norm": 0.05041235312819481, |
| "learning_rate": 1.7984077046548956e-05, |
| "loss": 0.0428, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.5216, |
| "grad_norm": 0.3858475685119629, |
| "learning_rate": 1.797123595505618e-05, |
| "loss": 0.0569, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.5248, |
| "grad_norm": 8.561657905578613, |
| "learning_rate": 1.7958394863563403e-05, |
| "loss": 0.0758, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 0.025413183495402336, |
| "learning_rate": 1.7945553772070628e-05, |
| "loss": 0.0548, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.5312, |
| "grad_norm": 0.010517638176679611, |
| "learning_rate": 1.7932712680577852e-05, |
| "loss": 0.0706, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.5344, |
| "grad_norm": 0.08805025368928909, |
| "learning_rate": 1.7919871589085074e-05, |
| "loss": 0.077, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.5376, |
| "grad_norm": 5.7931342124938965, |
| "learning_rate": 1.79070304975923e-05, |
| "loss": 0.0669, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.5408, |
| "grad_norm": 0.07078564912080765, |
| "learning_rate": 1.789418940609952e-05, |
| "loss": 0.1069, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 1.9886382818222046, |
| "learning_rate": 1.7881348314606745e-05, |
| "loss": 0.0501, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.5472, |
| "grad_norm": 6.3300065994262695, |
| "learning_rate": 1.7868507223113966e-05, |
| "loss": 0.0744, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.5504, |
| "grad_norm": 0.18114350736141205, |
| "learning_rate": 1.785566613162119e-05, |
| "loss": 0.0782, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.5536, |
| "grad_norm": 0.2821557819843292, |
| "learning_rate": 1.7842825040128413e-05, |
| "loss": 0.0477, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.5568, |
| "grad_norm": 17.2164306640625, |
| "learning_rate": 1.7829983948635637e-05, |
| "loss": 0.0522, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 11.891914367675781, |
| "learning_rate": 1.781714285714286e-05, |
| "loss": 0.0859, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.5632, |
| "grad_norm": 12.457894325256348, |
| "learning_rate": 1.7804301765650084e-05, |
| "loss": 0.0561, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.5664, |
| "grad_norm": 1.845371961593628, |
| "learning_rate": 1.7791460674157305e-05, |
| "loss": 0.0598, |
| "step": 17700 |
| }, |
| { |
| "epoch": 0.5696, |
| "grad_norm": 5.928323268890381, |
| "learning_rate": 1.7778619582664527e-05, |
| "loss": 0.0591, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.5728, |
| "grad_norm": 0.10898735374212265, |
| "learning_rate": 1.776577849117175e-05, |
| "loss": 0.068, |
| "step": 17900 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 8.659664154052734, |
| "learning_rate": 1.7752937399678973e-05, |
| "loss": 0.0672, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.5792, |
| "grad_norm": 0.1051400676369667, |
| "learning_rate": 1.7740096308186198e-05, |
| "loss": 0.0845, |
| "step": 18100 |
| }, |
| { |
| "epoch": 0.5824, |
| "grad_norm": 2.4647881984710693, |
| "learning_rate": 1.772725521669342e-05, |
| "loss": 0.067, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.5856, |
| "grad_norm": 0.04466241970658302, |
| "learning_rate": 1.7714414125200644e-05, |
| "loss": 0.0657, |
| "step": 18300 |
| }, |
| { |
| "epoch": 0.5888, |
| "grad_norm": 0.014231475070118904, |
| "learning_rate": 1.7701573033707865e-05, |
| "loss": 0.0666, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 10.516510009765625, |
| "learning_rate": 1.768873194221509e-05, |
| "loss": 0.0692, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.5952, |
| "grad_norm": 1.2347426414489746, |
| "learning_rate": 1.767589085072231e-05, |
| "loss": 0.057, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.5984, |
| "grad_norm": 0.09256485849618912, |
| "learning_rate": 1.7663049759229537e-05, |
| "loss": 0.0726, |
| "step": 18700 |
| }, |
| { |
| "epoch": 0.6016, |
| "grad_norm": 9.093379020690918, |
| "learning_rate": 1.7650208667736758e-05, |
| "loss": 0.0902, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.6048, |
| "grad_norm": 2.5893750190734863, |
| "learning_rate": 1.7637367576243983e-05, |
| "loss": 0.0611, |
| "step": 18900 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 0.708980143070221, |
| "learning_rate": 1.7624526484751204e-05, |
| "loss": 0.0726, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.6112, |
| "grad_norm": 0.35581639409065247, |
| "learning_rate": 1.761168539325843e-05, |
| "loss": 0.0681, |
| "step": 19100 |
| }, |
| { |
| "epoch": 0.6144, |
| "grad_norm": 1.3588510751724243, |
| "learning_rate": 1.759884430176565e-05, |
| "loss": 0.0662, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.6176, |
| "grad_norm": 9.142585754394531, |
| "learning_rate": 1.7586003210272875e-05, |
| "loss": 0.0559, |
| "step": 19300 |
| }, |
| { |
| "epoch": 0.6208, |
| "grad_norm": 16.729188919067383, |
| "learning_rate": 1.7573162118780097e-05, |
| "loss": 0.0754, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.624, |
| "grad_norm": 11.582767486572266, |
| "learning_rate": 1.756032102728732e-05, |
| "loss": 0.0681, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.6272, |
| "grad_norm": 0.046063363552093506, |
| "learning_rate": 1.7547479935794543e-05, |
| "loss": 0.0603, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.6304, |
| "grad_norm": 0.294583261013031, |
| "learning_rate": 1.7534638844301768e-05, |
| "loss": 0.0518, |
| "step": 19700 |
| }, |
| { |
| "epoch": 0.6336, |
| "grad_norm": 0.014278042130172253, |
| "learning_rate": 1.7521797752808993e-05, |
| "loss": 0.0576, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.6368, |
| "grad_norm": 0.16866210103034973, |
| "learning_rate": 1.7508956661316214e-05, |
| "loss": 0.0701, |
| "step": 19900 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 6.832259654998779, |
| "learning_rate": 1.749611556982344e-05, |
| "loss": 0.0776, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.64, |
| "eval_accuracy": 0.98464, |
| "eval_f1": 0.9846426496660109, |
| "eval_loss": 0.061699531972408295, |
| "eval_precision": 0.9846784488090538, |
| "eval_recall": 0.98464, |
| "eval_runtime": 814.0166, |
| "eval_samples_per_second": 122.848, |
| "eval_steps_per_second": 7.678, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.6432, |
| "grad_norm": 25.919387817382812, |
| "learning_rate": 1.7483274478330657e-05, |
| "loss": 0.0704, |
| "step": 20100 |
| }, |
| { |
| "epoch": 0.6464, |
| "grad_norm": 0.27127301692962646, |
| "learning_rate": 1.7470433386837882e-05, |
| "loss": 0.0749, |
| "step": 20200 |
| }, |
| { |
| "epoch": 0.6496, |
| "grad_norm": 0.1059252917766571, |
| "learning_rate": 1.7457592295345103e-05, |
| "loss": 0.0793, |
| "step": 20300 |
| }, |
| { |
| "epoch": 0.6528, |
| "grad_norm": 0.6019250154495239, |
| "learning_rate": 1.7444751203852328e-05, |
| "loss": 0.059, |
| "step": 20400 |
| }, |
| { |
| "epoch": 0.656, |
| "grad_norm": 0.28291046619415283, |
| "learning_rate": 1.7431910112359553e-05, |
| "loss": 0.0569, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.6592, |
| "grad_norm": 0.15100154280662537, |
| "learning_rate": 1.7419069020866774e-05, |
| "loss": 0.0398, |
| "step": 20600 |
| }, |
| { |
| "epoch": 0.6624, |
| "grad_norm": 0.017900506034493446, |
| "learning_rate": 1.7406227929374e-05, |
| "loss": 0.0559, |
| "step": 20700 |
| }, |
| { |
| "epoch": 0.6656, |
| "grad_norm": 0.007751260884106159, |
| "learning_rate": 1.739338683788122e-05, |
| "loss": 0.0621, |
| "step": 20800 |
| }, |
| { |
| "epoch": 0.6688, |
| "grad_norm": 5.354798793792725, |
| "learning_rate": 1.7380545746388445e-05, |
| "loss": 0.085, |
| "step": 20900 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 2.312457799911499, |
| "learning_rate": 1.7367704654895667e-05, |
| "loss": 0.0654, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.6752, |
| "grad_norm": 0.10008107125759125, |
| "learning_rate": 1.7354863563402892e-05, |
| "loss": 0.0697, |
| "step": 21100 |
| }, |
| { |
| "epoch": 0.6784, |
| "grad_norm": 0.008539400063455105, |
| "learning_rate": 1.7342022471910113e-05, |
| "loss": 0.0687, |
| "step": 21200 |
| }, |
| { |
| "epoch": 0.6816, |
| "grad_norm": 1.0686814785003662, |
| "learning_rate": 1.7329181380417338e-05, |
| "loss": 0.0491, |
| "step": 21300 |
| }, |
| { |
| "epoch": 0.6848, |
| "grad_norm": 42.248897552490234, |
| "learning_rate": 1.731634028892456e-05, |
| "loss": 0.0464, |
| "step": 21400 |
| }, |
| { |
| "epoch": 0.688, |
| "grad_norm": 17.70836067199707, |
| "learning_rate": 1.7303499197431784e-05, |
| "loss": 0.109, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.6912, |
| "grad_norm": 11.702173233032227, |
| "learning_rate": 1.7290658105939006e-05, |
| "loss": 0.0626, |
| "step": 21600 |
| }, |
| { |
| "epoch": 0.6944, |
| "grad_norm": 0.15207910537719727, |
| "learning_rate": 1.727781701444623e-05, |
| "loss": 0.0617, |
| "step": 21700 |
| }, |
| { |
| "epoch": 0.6976, |
| "grad_norm": 0.7698332667350769, |
| "learning_rate": 1.7264975922953452e-05, |
| "loss": 0.0508, |
| "step": 21800 |
| }, |
| { |
| "epoch": 0.7008, |
| "grad_norm": 0.012268565595149994, |
| "learning_rate": 1.7252134831460677e-05, |
| "loss": 0.0518, |
| "step": 21900 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.07914119213819504, |
| "learning_rate": 1.7239293739967898e-05, |
| "loss": 0.0699, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.7072, |
| "grad_norm": 0.5616400241851807, |
| "learning_rate": 1.7226452648475123e-05, |
| "loss": 0.0649, |
| "step": 22100 |
| }, |
| { |
| "epoch": 0.7104, |
| "grad_norm": 6.950782299041748, |
| "learning_rate": 1.7213611556982345e-05, |
| "loss": 0.0719, |
| "step": 22200 |
| }, |
| { |
| "epoch": 0.7136, |
| "grad_norm": 0.07157002389431, |
| "learning_rate": 1.720077046548957e-05, |
| "loss": 0.0403, |
| "step": 22300 |
| }, |
| { |
| "epoch": 0.7168, |
| "grad_norm": 2.784773588180542, |
| "learning_rate": 1.718792937399679e-05, |
| "loss": 0.0468, |
| "step": 22400 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.088102325797081, |
| "learning_rate": 1.7175088282504012e-05, |
| "loss": 0.0612, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.7232, |
| "grad_norm": 0.373806357383728, |
| "learning_rate": 1.7162247191011237e-05, |
| "loss": 0.0607, |
| "step": 22600 |
| }, |
| { |
| "epoch": 0.7264, |
| "grad_norm": 10.401127815246582, |
| "learning_rate": 1.714940609951846e-05, |
| "loss": 0.0572, |
| "step": 22700 |
| }, |
| { |
| "epoch": 0.7296, |
| "grad_norm": 0.006438109558075666, |
| "learning_rate": 1.7136565008025683e-05, |
| "loss": 0.0472, |
| "step": 22800 |
| }, |
| { |
| "epoch": 0.7328, |
| "grad_norm": 0.15433204174041748, |
| "learning_rate": 1.7123723916532905e-05, |
| "loss": 0.0578, |
| "step": 22900 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 0.29676365852355957, |
| "learning_rate": 1.711088282504013e-05, |
| "loss": 0.0355, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.7392, |
| "grad_norm": 0.007739920634776354, |
| "learning_rate": 1.709804173354735e-05, |
| "loss": 0.0545, |
| "step": 23100 |
| }, |
| { |
| "epoch": 0.7424, |
| "grad_norm": 16.565767288208008, |
| "learning_rate": 1.7085200642054576e-05, |
| "loss": 0.0662, |
| "step": 23200 |
| }, |
| { |
| "epoch": 0.7456, |
| "grad_norm": 0.013131607323884964, |
| "learning_rate": 1.7072359550561797e-05, |
| "loss": 0.0734, |
| "step": 23300 |
| }, |
| { |
| "epoch": 0.7488, |
| "grad_norm": 1.746962308883667, |
| "learning_rate": 1.7059518459069022e-05, |
| "loss": 0.0558, |
| "step": 23400 |
| }, |
| { |
| "epoch": 0.752, |
| "grad_norm": 6.599545955657959, |
| "learning_rate": 1.7046677367576247e-05, |
| "loss": 0.0485, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.7552, |
| "grad_norm": 4.333959102630615, |
| "learning_rate": 1.703383627608347e-05, |
| "loss": 0.0554, |
| "step": 23600 |
| }, |
| { |
| "epoch": 0.7584, |
| "grad_norm": 0.16271114349365234, |
| "learning_rate": 1.7020995184590693e-05, |
| "loss": 0.0367, |
| "step": 23700 |
| }, |
| { |
| "epoch": 0.7616, |
| "grad_norm": 17.760648727416992, |
| "learning_rate": 1.7008154093097915e-05, |
| "loss": 0.0871, |
| "step": 23800 |
| }, |
| { |
| "epoch": 0.7648, |
| "grad_norm": 0.15831367671489716, |
| "learning_rate": 1.699531300160514e-05, |
| "loss": 0.0525, |
| "step": 23900 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 10.259693145751953, |
| "learning_rate": 1.698247191011236e-05, |
| "loss": 0.0643, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.768, |
| "eval_accuracy": 0.98307, |
| "eval_f1": 0.9830752640408086, |
| "eval_loss": 0.07612209022045135, |
| "eval_precision": 0.9832125198389433, |
| "eval_recall": 0.98307, |
| "eval_runtime": 774.4597, |
| "eval_samples_per_second": 129.122, |
| "eval_steps_per_second": 8.07, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.7712, |
| "grad_norm": 0.025170153006911278, |
| "learning_rate": 1.6969630818619586e-05, |
| "loss": 0.0476, |
| "step": 24100 |
| }, |
| { |
| "epoch": 0.7744, |
| "grad_norm": 0.005416714586317539, |
| "learning_rate": 1.6956789727126807e-05, |
| "loss": 0.0625, |
| "step": 24200 |
| }, |
| { |
| "epoch": 0.7776, |
| "grad_norm": 35.962677001953125, |
| "learning_rate": 1.6943948635634032e-05, |
| "loss": 0.0492, |
| "step": 24300 |
| }, |
| { |
| "epoch": 0.7808, |
| "grad_norm": 0.019019972532987595, |
| "learning_rate": 1.6931107544141254e-05, |
| "loss": 0.0567, |
| "step": 24400 |
| }, |
| { |
| "epoch": 0.784, |
| "grad_norm": 4.536252021789551, |
| "learning_rate": 1.691826645264848e-05, |
| "loss": 0.0564, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.7872, |
| "grad_norm": 20.884357452392578, |
| "learning_rate": 1.69054253611557e-05, |
| "loss": 0.059, |
| "step": 24600 |
| }, |
| { |
| "epoch": 0.7904, |
| "grad_norm": 4.154411792755127, |
| "learning_rate": 1.689258426966292e-05, |
| "loss": 0.0567, |
| "step": 24700 |
| }, |
| { |
| "epoch": 0.7936, |
| "grad_norm": 0.23815973103046417, |
| "learning_rate": 1.6879743178170146e-05, |
| "loss": 0.0593, |
| "step": 24800 |
| }, |
| { |
| "epoch": 0.7968, |
| "grad_norm": 0.027695050463080406, |
| "learning_rate": 1.6866902086677368e-05, |
| "loss": 0.0545, |
| "step": 24900 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.052229318767786026, |
| "learning_rate": 1.6854060995184592e-05, |
| "loss": 0.0584, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.8032, |
| "grad_norm": 0.007615574169903994, |
| "learning_rate": 1.6841219903691814e-05, |
| "loss": 0.0492, |
| "step": 25100 |
| }, |
| { |
| "epoch": 0.8064, |
| "grad_norm": 0.34495148062705994, |
| "learning_rate": 1.682837881219904e-05, |
| "loss": 0.0512, |
| "step": 25200 |
| }, |
| { |
| "epoch": 0.8096, |
| "grad_norm": 0.4945124685764313, |
| "learning_rate": 1.681553772070626e-05, |
| "loss": 0.0659, |
| "step": 25300 |
| }, |
| { |
| "epoch": 0.8128, |
| "grad_norm": 3.508509397506714, |
| "learning_rate": 1.6802696629213485e-05, |
| "loss": 0.0431, |
| "step": 25400 |
| }, |
| { |
| "epoch": 0.816, |
| "grad_norm": 9.751060485839844, |
| "learning_rate": 1.6789855537720706e-05, |
| "loss": 0.0609, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.8192, |
| "grad_norm": 13.404821395874023, |
| "learning_rate": 1.677701444622793e-05, |
| "loss": 0.0451, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.8224, |
| "grad_norm": 0.09565193206071854, |
| "learning_rate": 1.6764173354735153e-05, |
| "loss": 0.0545, |
| "step": 25700 |
| }, |
| { |
| "epoch": 0.8256, |
| "grad_norm": 0.010966203175485134, |
| "learning_rate": 1.6751332263242377e-05, |
| "loss": 0.0703, |
| "step": 25800 |
| }, |
| { |
| "epoch": 0.8288, |
| "grad_norm": 0.18196341395378113, |
| "learning_rate": 1.67384911717496e-05, |
| "loss": 0.0392, |
| "step": 25900 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 15.195699691772461, |
| "learning_rate": 1.6725650080256824e-05, |
| "loss": 0.0429, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.8352, |
| "grad_norm": 0.2465728521347046, |
| "learning_rate": 1.6712808988764045e-05, |
| "loss": 0.0546, |
| "step": 26100 |
| }, |
| { |
| "epoch": 0.8384, |
| "grad_norm": 15.004085540771484, |
| "learning_rate": 1.669996789727127e-05, |
| "loss": 0.0716, |
| "step": 26200 |
| }, |
| { |
| "epoch": 0.8416, |
| "grad_norm": 0.17839568853378296, |
| "learning_rate": 1.668712680577849e-05, |
| "loss": 0.0413, |
| "step": 26300 |
| }, |
| { |
| "epoch": 0.8448, |
| "grad_norm": 30.638254165649414, |
| "learning_rate": 1.6674285714285716e-05, |
| "loss": 0.0391, |
| "step": 26400 |
| }, |
| { |
| "epoch": 0.848, |
| "grad_norm": 8.340238571166992, |
| "learning_rate": 1.6661444622792938e-05, |
| "loss": 0.0579, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.8512, |
| "grad_norm": 0.026733344420790672, |
| "learning_rate": 1.6648603531300163e-05, |
| "loss": 0.0647, |
| "step": 26600 |
| }, |
| { |
| "epoch": 0.8544, |
| "grad_norm": 0.013905318453907967, |
| "learning_rate": 1.6635762439807387e-05, |
| "loss": 0.0258, |
| "step": 26700 |
| }, |
| { |
| "epoch": 0.8576, |
| "grad_norm": 0.011157176457345486, |
| "learning_rate": 1.662292134831461e-05, |
| "loss": 0.0497, |
| "step": 26800 |
| }, |
| { |
| "epoch": 0.8608, |
| "grad_norm": 0.029731517657637596, |
| "learning_rate": 1.6610080256821834e-05, |
| "loss": 0.0567, |
| "step": 26900 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 0.3443889319896698, |
| "learning_rate": 1.6597239165329055e-05, |
| "loss": 0.0533, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.8672, |
| "grad_norm": 0.5098379254341125, |
| "learning_rate": 1.6584398073836277e-05, |
| "loss": 0.0532, |
| "step": 27100 |
| }, |
| { |
| "epoch": 0.8704, |
| "grad_norm": 0.045412395149469376, |
| "learning_rate": 1.6571556982343498e-05, |
| "loss": 0.0533, |
| "step": 27200 |
| }, |
| { |
| "epoch": 0.8736, |
| "grad_norm": 3.6077659130096436, |
| "learning_rate": 1.6558715890850723e-05, |
| "loss": 0.0432, |
| "step": 27300 |
| }, |
| { |
| "epoch": 0.8768, |
| "grad_norm": 27.717470169067383, |
| "learning_rate": 1.6545874799357948e-05, |
| "loss": 0.0599, |
| "step": 27400 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 8.342029571533203, |
| "learning_rate": 1.653303370786517e-05, |
| "loss": 0.066, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.8832, |
| "grad_norm": 0.09634260088205338, |
| "learning_rate": 1.6520192616372394e-05, |
| "loss": 0.0516, |
| "step": 27600 |
| }, |
| { |
| "epoch": 0.8864, |
| "grad_norm": 0.13491930067539215, |
| "learning_rate": 1.6507351524879615e-05, |
| "loss": 0.0539, |
| "step": 27700 |
| }, |
| { |
| "epoch": 0.8896, |
| "grad_norm": 0.23464186489582062, |
| "learning_rate": 1.649451043338684e-05, |
| "loss": 0.0415, |
| "step": 27800 |
| }, |
| { |
| "epoch": 0.8928, |
| "grad_norm": 0.46864306926727295, |
| "learning_rate": 1.648166934189406e-05, |
| "loss": 0.0535, |
| "step": 27900 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 6.844827175140381, |
| "learning_rate": 1.6468828250401286e-05, |
| "loss": 0.064, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.896, |
| "eval_accuracy": 0.98781, |
| "eval_f1": 0.9878109998616666, |
| "eval_loss": 0.04953546077013016, |
| "eval_precision": 0.9878187731391986, |
| "eval_recall": 0.98781, |
| "eval_runtime": 778.6432, |
| "eval_samples_per_second": 128.429, |
| "eval_steps_per_second": 8.027, |
| "step": 28000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 156250, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 4000, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 3, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.1889930140587046e+17, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|