| { | |
| "best_global_step": 12000, | |
| "best_metric": 0.97856556986665, | |
| "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-12000", | |
| "epoch": 0.384, | |
| "eval_steps": 4000, | |
| "global_step": 12000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0032, | |
| "grad_norm": 1.3923053741455078, | |
| "learning_rate": 3.96e-06, | |
| "loss": 0.685, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0064, | |
| "grad_norm": 1.665050983428955, | |
| "learning_rate": 7.960000000000002e-06, | |
| "loss": 0.6654, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0096, | |
| "grad_norm": 3.1655123233795166, | |
| "learning_rate": 1.196e-05, | |
| "loss": 0.6169, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0128, | |
| "grad_norm": 3.3807435035705566, | |
| "learning_rate": 1.5960000000000003e-05, | |
| "loss": 0.4635, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.016, | |
| "grad_norm": 0.7963143587112427, | |
| "learning_rate": 1.9960000000000002e-05, | |
| "loss": 0.2886, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0192, | |
| "grad_norm": 1.5362155437469482, | |
| "learning_rate": 1.9987287319422154e-05, | |
| "loss": 0.2605, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.0224, | |
| "grad_norm": 3.649484395980835, | |
| "learning_rate": 1.9974446227929375e-05, | |
| "loss": 0.2194, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.0256, | |
| "grad_norm": 1.402876377105713, | |
| "learning_rate": 1.99616051364366e-05, | |
| "loss": 0.2367, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.0288, | |
| "grad_norm": 2.708932399749756, | |
| "learning_rate": 1.994876404494382e-05, | |
| "loss": 0.2061, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 2.4827463626861572, | |
| "learning_rate": 1.9935922953451046e-05, | |
| "loss": 0.1892, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.0352, | |
| "grad_norm": 0.9402571320533752, | |
| "learning_rate": 1.9923081861958268e-05, | |
| "loss": 0.1819, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.0384, | |
| "grad_norm": 0.8088085055351257, | |
| "learning_rate": 1.9910240770465493e-05, | |
| "loss": 0.1894, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.0416, | |
| "grad_norm": 2.8913955688476562, | |
| "learning_rate": 1.9897399678972714e-05, | |
| "loss": 0.1733, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.0448, | |
| "grad_norm": 1.9755029678344727, | |
| "learning_rate": 1.988455858747994e-05, | |
| "loss": 0.1748, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 2.659393310546875, | |
| "learning_rate": 1.987171749598716e-05, | |
| "loss": 0.1557, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.0512, | |
| "grad_norm": 10.118029594421387, | |
| "learning_rate": 1.9858876404494382e-05, | |
| "loss": 0.1702, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.0544, | |
| "grad_norm": 3.736616373062134, | |
| "learning_rate": 1.9846035313001607e-05, | |
| "loss": 0.1694, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.0576, | |
| "grad_norm": 4.140033721923828, | |
| "learning_rate": 1.9833194221508828e-05, | |
| "loss": 0.1615, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.0608, | |
| "grad_norm": 4.504345893859863, | |
| "learning_rate": 1.9820353130016053e-05, | |
| "loss": 0.1425, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 5.786899089813232, | |
| "learning_rate": 1.9807512038523274e-05, | |
| "loss": 0.1588, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.0672, | |
| "grad_norm": 9.956130027770996, | |
| "learning_rate": 1.97946709470305e-05, | |
| "loss": 0.1399, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.0704, | |
| "grad_norm": 11.86201286315918, | |
| "learning_rate": 1.978182985553772e-05, | |
| "loss": 0.173, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.0736, | |
| "grad_norm": 0.6308254599571228, | |
| "learning_rate": 1.9768988764044946e-05, | |
| "loss": 0.1428, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.0768, | |
| "grad_norm": 3.123718023300171, | |
| "learning_rate": 1.9756147672552167e-05, | |
| "loss": 0.1365, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 3.8000500202178955, | |
| "learning_rate": 1.9743306581059392e-05, | |
| "loss": 0.1528, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.0832, | |
| "grad_norm": 2.9149184226989746, | |
| "learning_rate": 1.9730465489566613e-05, | |
| "loss": 0.1568, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.0864, | |
| "grad_norm": 8.345555305480957, | |
| "learning_rate": 1.9717624398073838e-05, | |
| "loss": 0.139, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.0896, | |
| "grad_norm": 1.785736322402954, | |
| "learning_rate": 1.970478330658106e-05, | |
| "loss": 0.1509, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.0928, | |
| "grad_norm": 5.256857872009277, | |
| "learning_rate": 1.9691942215088284e-05, | |
| "loss": 0.1284, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 2.305225133895874, | |
| "learning_rate": 1.967910112359551e-05, | |
| "loss": 0.1249, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.0992, | |
| "grad_norm": 1.1149404048919678, | |
| "learning_rate": 1.966626003210273e-05, | |
| "loss": 0.1223, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.1024, | |
| "grad_norm": 0.24498392641544342, | |
| "learning_rate": 1.9653418940609955e-05, | |
| "loss": 0.1415, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.1056, | |
| "grad_norm": 3.027209520339966, | |
| "learning_rate": 1.9640577849117177e-05, | |
| "loss": 0.133, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.1088, | |
| "grad_norm": 10.783885955810547, | |
| "learning_rate": 1.9627736757624402e-05, | |
| "loss": 0.1309, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.112, | |
| "grad_norm": 7.116244316101074, | |
| "learning_rate": 1.9614895666131623e-05, | |
| "loss": 0.1056, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.1152, | |
| "grad_norm": 8.193492889404297, | |
| "learning_rate": 1.9602054574638848e-05, | |
| "loss": 0.1201, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.1184, | |
| "grad_norm": 0.9174596071243286, | |
| "learning_rate": 1.958921348314607e-05, | |
| "loss": 0.12, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.1216, | |
| "grad_norm": 3.436502695083618, | |
| "learning_rate": 1.957637239165329e-05, | |
| "loss": 0.1257, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.1248, | |
| "grad_norm": 9.131117820739746, | |
| "learning_rate": 1.9563531300160516e-05, | |
| "loss": 0.1139, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 0.3150777518749237, | |
| "learning_rate": 1.9550690208667737e-05, | |
| "loss": 0.1381, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "eval_accuracy": 0.9586, | |
| "eval_f1": 0.9586218246722099, | |
| "eval_loss": 0.1626722663640976, | |
| "eval_precision": 0.9598553600795934, | |
| "eval_recall": 0.9586, | |
| "eval_runtime": 815.7025, | |
| "eval_samples_per_second": 122.594, | |
| "eval_steps_per_second": 7.662, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.1312, | |
| "grad_norm": 8.65023422241211, | |
| "learning_rate": 1.9537849117174962e-05, | |
| "loss": 0.1026, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.1344, | |
| "grad_norm": 8.46996784210205, | |
| "learning_rate": 1.9525008025682183e-05, | |
| "loss": 0.1251, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.1376, | |
| "grad_norm": 6.439260959625244, | |
| "learning_rate": 1.9512166934189408e-05, | |
| "loss": 0.1338, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.1408, | |
| "grad_norm": 1.012399435043335, | |
| "learning_rate": 1.949932584269663e-05, | |
| "loss": 0.1164, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.144, | |
| "grad_norm": 0.33647093176841736, | |
| "learning_rate": 1.9486484751203855e-05, | |
| "loss": 0.1156, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.1472, | |
| "grad_norm": 7.39678955078125, | |
| "learning_rate": 1.9473643659711076e-05, | |
| "loss": 0.1013, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.1504, | |
| "grad_norm": 4.556000709533691, | |
| "learning_rate": 1.94608025682183e-05, | |
| "loss": 0.1038, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.1536, | |
| "grad_norm": 0.5087370276451111, | |
| "learning_rate": 1.9447961476725522e-05, | |
| "loss": 0.1319, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.1568, | |
| "grad_norm": 4.385759353637695, | |
| "learning_rate": 1.9435120385232747e-05, | |
| "loss": 0.1229, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 5.46568489074707, | |
| "learning_rate": 1.942227929373997e-05, | |
| "loss": 0.1234, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.1632, | |
| "grad_norm": 3.3565216064453125, | |
| "learning_rate": 1.9409438202247193e-05, | |
| "loss": 0.1007, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.1664, | |
| "grad_norm": 0.17969129979610443, | |
| "learning_rate": 1.9396597110754415e-05, | |
| "loss": 0.108, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.1696, | |
| "grad_norm": 8.49416732788086, | |
| "learning_rate": 1.938375601926164e-05, | |
| "loss": 0.1105, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.1728, | |
| "grad_norm": 6.072606086730957, | |
| "learning_rate": 1.937091492776886e-05, | |
| "loss": 0.1082, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.176, | |
| "grad_norm": 1.2361280918121338, | |
| "learning_rate": 1.9358073836276086e-05, | |
| "loss": 0.0966, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.1792, | |
| "grad_norm": 6.232377052307129, | |
| "learning_rate": 1.9345232744783307e-05, | |
| "loss": 0.1151, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.1824, | |
| "grad_norm": 0.6630802154541016, | |
| "learning_rate": 1.9332391653290532e-05, | |
| "loss": 0.104, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.1856, | |
| "grad_norm": 0.2744814157485962, | |
| "learning_rate": 1.9319550561797754e-05, | |
| "loss": 0.104, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.1888, | |
| "grad_norm": 0.37808698415756226, | |
| "learning_rate": 1.930670947030498e-05, | |
| "loss": 0.1183, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 11.47230052947998, | |
| "learning_rate": 1.9293868378812203e-05, | |
| "loss": 0.1258, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.1952, | |
| "grad_norm": 7.168028354644775, | |
| "learning_rate": 1.928102728731942e-05, | |
| "loss": 0.1117, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.1984, | |
| "grad_norm": 0.11318526417016983, | |
| "learning_rate": 1.9268186195826646e-05, | |
| "loss": 0.1004, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.2016, | |
| "grad_norm": 9.405433654785156, | |
| "learning_rate": 1.9255345104333868e-05, | |
| "loss": 0.1195, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.2048, | |
| "grad_norm": 0.507453978061676, | |
| "learning_rate": 1.9242504012841092e-05, | |
| "loss": 0.0987, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.208, | |
| "grad_norm": 0.15791097283363342, | |
| "learning_rate": 1.9229662921348314e-05, | |
| "loss": 0.1156, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.2112, | |
| "grad_norm": 5.216452121734619, | |
| "learning_rate": 1.921682182985554e-05, | |
| "loss": 0.1375, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.2144, | |
| "grad_norm": 0.6168243885040283, | |
| "learning_rate": 1.9203980738362764e-05, | |
| "loss": 0.1086, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.2176, | |
| "grad_norm": 2.949383497238159, | |
| "learning_rate": 1.9191139646869985e-05, | |
| "loss": 0.0957, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.2208, | |
| "grad_norm": 9.519874572753906, | |
| "learning_rate": 1.917829855537721e-05, | |
| "loss": 0.0935, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 7.931914329528809, | |
| "learning_rate": 1.916545746388443e-05, | |
| "loss": 0.1335, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.2272, | |
| "grad_norm": 8.498374938964844, | |
| "learning_rate": 1.9152616372391656e-05, | |
| "loss": 0.1025, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.2304, | |
| "grad_norm": 5.054536819458008, | |
| "learning_rate": 1.9139775280898878e-05, | |
| "loss": 0.1165, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.2336, | |
| "grad_norm": 0.17365020513534546, | |
| "learning_rate": 1.9126934189406102e-05, | |
| "loss": 0.0996, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.2368, | |
| "grad_norm": 2.247058629989624, | |
| "learning_rate": 1.9114093097913324e-05, | |
| "loss": 0.1009, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 15.367574691772461, | |
| "learning_rate": 1.910125200642055e-05, | |
| "loss": 0.1082, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.2432, | |
| "grad_norm": 6.72482967376709, | |
| "learning_rate": 1.908841091492777e-05, | |
| "loss": 0.1308, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.2464, | |
| "grad_norm": 0.051803406327962875, | |
| "learning_rate": 1.9075569823434995e-05, | |
| "loss": 0.1031, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.2496, | |
| "grad_norm": 9.355685234069824, | |
| "learning_rate": 1.9062728731942216e-05, | |
| "loss": 0.1012, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.2528, | |
| "grad_norm": 2.047060012817383, | |
| "learning_rate": 1.904988764044944e-05, | |
| "loss": 0.1249, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 4.134668350219727, | |
| "learning_rate": 1.9037046548956663e-05, | |
| "loss": 0.0821, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "eval_accuracy": 0.97609, | |
| "eval_f1": 0.9760920422665103, | |
| "eval_loss": 0.10814645141363144, | |
| "eval_precision": 0.9761013685233434, | |
| "eval_recall": 0.97609, | |
| "eval_runtime": 817.6077, | |
| "eval_samples_per_second": 122.308, | |
| "eval_steps_per_second": 7.644, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.2592, | |
| "grad_norm": 6.723151206970215, | |
| "learning_rate": 1.9024205457463887e-05, | |
| "loss": 0.0799, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.2624, | |
| "grad_norm": 1.805972933769226, | |
| "learning_rate": 1.901136436597111e-05, | |
| "loss": 0.1211, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.2656, | |
| "grad_norm": 11.118502616882324, | |
| "learning_rate": 1.8998523274478334e-05, | |
| "loss": 0.0916, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.2688, | |
| "grad_norm": 2.3953633308410645, | |
| "learning_rate": 1.8985682182985555e-05, | |
| "loss": 0.0878, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.272, | |
| "grad_norm": 0.10008874535560608, | |
| "learning_rate": 1.8972841091492777e-05, | |
| "loss": 0.0868, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.2752, | |
| "grad_norm": 0.06934285163879395, | |
| "learning_rate": 1.896e-05, | |
| "loss": 0.1155, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.2784, | |
| "grad_norm": 0.3781879246234894, | |
| "learning_rate": 1.8947158908507223e-05, | |
| "loss": 0.0988, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.2816, | |
| "grad_norm": 9.6624174118042, | |
| "learning_rate": 1.8934317817014448e-05, | |
| "loss": 0.1072, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.2848, | |
| "grad_norm": 0.09044591337442398, | |
| "learning_rate": 1.892147672552167e-05, | |
| "loss": 0.1085, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 0.0671633929014206, | |
| "learning_rate": 1.8908635634028894e-05, | |
| "loss": 0.1081, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.2912, | |
| "grad_norm": 8.130873680114746, | |
| "learning_rate": 1.8895794542536115e-05, | |
| "loss": 0.0968, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.2944, | |
| "grad_norm": 1.2537904977798462, | |
| "learning_rate": 1.888295345104334e-05, | |
| "loss": 0.0877, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.2976, | |
| "grad_norm": 6.084417819976807, | |
| "learning_rate": 1.887011235955056e-05, | |
| "loss": 0.1022, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.3008, | |
| "grad_norm": 6.140512943267822, | |
| "learning_rate": 1.8857271268057787e-05, | |
| "loss": 0.1016, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.304, | |
| "grad_norm": 1.7347182035446167, | |
| "learning_rate": 1.8844430176565008e-05, | |
| "loss": 0.092, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.3072, | |
| "grad_norm": 0.6796423196792603, | |
| "learning_rate": 1.8831589085072233e-05, | |
| "loss": 0.1172, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.3104, | |
| "grad_norm": 10.664779663085938, | |
| "learning_rate": 1.8818747993579454e-05, | |
| "loss": 0.0808, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.3136, | |
| "grad_norm": 0.4076235294342041, | |
| "learning_rate": 1.880590690208668e-05, | |
| "loss": 0.0836, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.3168, | |
| "grad_norm": 9.418440818786621, | |
| "learning_rate": 1.8793065810593904e-05, | |
| "loss": 0.0675, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 9.2078857421875, | |
| "learning_rate": 1.8780224719101125e-05, | |
| "loss": 0.0686, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.3232, | |
| "grad_norm": 0.47173646092414856, | |
| "learning_rate": 1.876738362760835e-05, | |
| "loss": 0.1096, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.3264, | |
| "grad_norm": 4.5297322273254395, | |
| "learning_rate": 1.875454253611557e-05, | |
| "loss": 0.098, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.3296, | |
| "grad_norm": 5.099269866943359, | |
| "learning_rate": 1.8741701444622796e-05, | |
| "loss": 0.1063, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.3328, | |
| "grad_norm": 2.588848114013672, | |
| "learning_rate": 1.8728860353130018e-05, | |
| "loss": 0.0989, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.336, | |
| "grad_norm": 4.629786968231201, | |
| "learning_rate": 1.8716019261637243e-05, | |
| "loss": 0.1018, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.3392, | |
| "grad_norm": 11.187308311462402, | |
| "learning_rate": 1.8703178170144464e-05, | |
| "loss": 0.0864, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.3424, | |
| "grad_norm": 2.476482391357422, | |
| "learning_rate": 1.869033707865169e-05, | |
| "loss": 0.0744, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.3456, | |
| "grad_norm": 17.418149948120117, | |
| "learning_rate": 1.867749598715891e-05, | |
| "loss": 0.1189, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.3488, | |
| "grad_norm": 11.753310203552246, | |
| "learning_rate": 1.8664654895666132e-05, | |
| "loss": 0.0832, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 0.41917338967323303, | |
| "learning_rate": 1.8651813804173357e-05, | |
| "loss": 0.1063, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.3552, | |
| "grad_norm": 14.072111129760742, | |
| "learning_rate": 1.8638972712680578e-05, | |
| "loss": 0.1061, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.3584, | |
| "grad_norm": 2.6141397953033447, | |
| "learning_rate": 1.8626131621187803e-05, | |
| "loss": 0.0934, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.3616, | |
| "grad_norm": 3.1363914012908936, | |
| "learning_rate": 1.8613290529695024e-05, | |
| "loss": 0.0879, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.3648, | |
| "grad_norm": 4.260811805725098, | |
| "learning_rate": 1.860044943820225e-05, | |
| "loss": 0.092, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.368, | |
| "grad_norm": 0.2677570879459381, | |
| "learning_rate": 1.858760834670947e-05, | |
| "loss": 0.1202, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.3712, | |
| "grad_norm": 0.056061357259750366, | |
| "learning_rate": 1.8574767255216696e-05, | |
| "loss": 0.0773, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.3744, | |
| "grad_norm": 7.95279598236084, | |
| "learning_rate": 1.8561926163723917e-05, | |
| "loss": 0.0749, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.3776, | |
| "grad_norm": 7.4200873374938965, | |
| "learning_rate": 1.8549085072231142e-05, | |
| "loss": 0.1054, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.3808, | |
| "grad_norm": 0.8819625973701477, | |
| "learning_rate": 1.8536243980738363e-05, | |
| "loss": 0.0998, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 6.32806396484375, | |
| "learning_rate": 1.8523402889245588e-05, | |
| "loss": 0.0667, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "eval_accuracy": 0.97856, | |
| "eval_f1": 0.97856556986665, | |
| "eval_loss": 0.1008467897772789, | |
| "eval_precision": 0.9786554480535211, | |
| "eval_recall": 0.97856, | |
| "eval_runtime": 822.182, | |
| "eval_samples_per_second": 121.628, | |
| "eval_steps_per_second": 7.602, | |
| "step": 12000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 156250, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 4000, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.096626165108723e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |