{ "best_global_step": 12000, "best_metric": 0.97856556986665, "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-12000", "epoch": 0.384, "eval_steps": 4000, "global_step": 12000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0032, "grad_norm": 1.3923053741455078, "learning_rate": 3.96e-06, "loss": 0.685, "step": 100 }, { "epoch": 0.0064, "grad_norm": 1.665050983428955, "learning_rate": 7.960000000000002e-06, "loss": 0.6654, "step": 200 }, { "epoch": 0.0096, "grad_norm": 3.1655123233795166, "learning_rate": 1.196e-05, "loss": 0.6169, "step": 300 }, { "epoch": 0.0128, "grad_norm": 3.3807435035705566, "learning_rate": 1.5960000000000003e-05, "loss": 0.4635, "step": 400 }, { "epoch": 0.016, "grad_norm": 0.7963143587112427, "learning_rate": 1.9960000000000002e-05, "loss": 0.2886, "step": 500 }, { "epoch": 0.0192, "grad_norm": 1.5362155437469482, "learning_rate": 1.9987287319422154e-05, "loss": 0.2605, "step": 600 }, { "epoch": 0.0224, "grad_norm": 3.649484395980835, "learning_rate": 1.9974446227929375e-05, "loss": 0.2194, "step": 700 }, { "epoch": 0.0256, "grad_norm": 1.402876377105713, "learning_rate": 1.99616051364366e-05, "loss": 0.2367, "step": 800 }, { "epoch": 0.0288, "grad_norm": 2.708932399749756, "learning_rate": 1.994876404494382e-05, "loss": 0.2061, "step": 900 }, { "epoch": 0.032, "grad_norm": 2.4827463626861572, "learning_rate": 1.9935922953451046e-05, "loss": 0.1892, "step": 1000 }, { "epoch": 0.0352, "grad_norm": 0.9402571320533752, "learning_rate": 1.9923081861958268e-05, "loss": 0.1819, "step": 1100 }, { "epoch": 0.0384, "grad_norm": 0.8088085055351257, "learning_rate": 1.9910240770465493e-05, "loss": 0.1894, "step": 1200 }, { "epoch": 0.0416, "grad_norm": 2.8913955688476562, "learning_rate": 1.9897399678972714e-05, "loss": 0.1733, "step": 1300 }, { "epoch": 0.0448, "grad_norm": 1.9755029678344727, "learning_rate": 1.988455858747994e-05, "loss": 0.1748, "step": 1400 }, { "epoch": 0.048, "grad_norm": 2.659393310546875, "learning_rate": 1.987171749598716e-05, "loss": 0.1557, "step": 1500 }, { "epoch": 0.0512, "grad_norm": 10.118029594421387, "learning_rate": 1.9858876404494382e-05, "loss": 0.1702, "step": 1600 }, { "epoch": 0.0544, "grad_norm": 3.736616373062134, "learning_rate": 1.9846035313001607e-05, "loss": 0.1694, "step": 1700 }, { "epoch": 0.0576, "grad_norm": 4.140033721923828, "learning_rate": 1.9833194221508828e-05, "loss": 0.1615, "step": 1800 }, { "epoch": 0.0608, "grad_norm": 4.504345893859863, "learning_rate": 1.9820353130016053e-05, "loss": 0.1425, "step": 1900 }, { "epoch": 0.064, "grad_norm": 5.786899089813232, "learning_rate": 1.9807512038523274e-05, "loss": 0.1588, "step": 2000 }, { "epoch": 0.0672, "grad_norm": 9.956130027770996, "learning_rate": 1.97946709470305e-05, "loss": 0.1399, "step": 2100 }, { "epoch": 0.0704, "grad_norm": 11.86201286315918, "learning_rate": 1.978182985553772e-05, "loss": 0.173, "step": 2200 }, { "epoch": 0.0736, "grad_norm": 0.6308254599571228, "learning_rate": 1.9768988764044946e-05, "loss": 0.1428, "step": 2300 }, { "epoch": 0.0768, "grad_norm": 3.123718023300171, "learning_rate": 1.9756147672552167e-05, "loss": 0.1365, "step": 2400 }, { "epoch": 0.08, "grad_norm": 3.8000500202178955, "learning_rate": 1.9743306581059392e-05, "loss": 0.1528, "step": 2500 }, { "epoch": 0.0832, "grad_norm": 2.9149184226989746, "learning_rate": 1.9730465489566613e-05, "loss": 0.1568, "step": 2600 }, { "epoch": 0.0864, "grad_norm": 8.345555305480957, "learning_rate": 1.9717624398073838e-05, "loss": 0.139, "step": 2700 }, { "epoch": 0.0896, "grad_norm": 1.785736322402954, "learning_rate": 1.970478330658106e-05, "loss": 0.1509, "step": 2800 }, { "epoch": 0.0928, "grad_norm": 5.256857872009277, "learning_rate": 1.9691942215088284e-05, "loss": 0.1284, "step": 2900 }, { "epoch": 0.096, "grad_norm": 2.305225133895874, "learning_rate": 1.967910112359551e-05, "loss": 0.1249, "step": 3000 }, { "epoch": 0.0992, "grad_norm": 1.1149404048919678, "learning_rate": 1.966626003210273e-05, "loss": 0.1223, "step": 3100 }, { "epoch": 0.1024, "grad_norm": 0.24498392641544342, "learning_rate": 1.9653418940609955e-05, "loss": 0.1415, "step": 3200 }, { "epoch": 0.1056, "grad_norm": 3.027209520339966, "learning_rate": 1.9640577849117177e-05, "loss": 0.133, "step": 3300 }, { "epoch": 0.1088, "grad_norm": 10.783885955810547, "learning_rate": 1.9627736757624402e-05, "loss": 0.1309, "step": 3400 }, { "epoch": 0.112, "grad_norm": 7.116244316101074, "learning_rate": 1.9614895666131623e-05, "loss": 0.1056, "step": 3500 }, { "epoch": 0.1152, "grad_norm": 8.193492889404297, "learning_rate": 1.9602054574638848e-05, "loss": 0.1201, "step": 3600 }, { "epoch": 0.1184, "grad_norm": 0.9174596071243286, "learning_rate": 1.958921348314607e-05, "loss": 0.12, "step": 3700 }, { "epoch": 0.1216, "grad_norm": 3.436502695083618, "learning_rate": 1.957637239165329e-05, "loss": 0.1257, "step": 3800 }, { "epoch": 0.1248, "grad_norm": 9.131117820739746, "learning_rate": 1.9563531300160516e-05, "loss": 0.1139, "step": 3900 }, { "epoch": 0.128, "grad_norm": 0.3150777518749237, "learning_rate": 1.9550690208667737e-05, "loss": 0.1381, "step": 4000 }, { "epoch": 0.128, "eval_accuracy": 0.9586, "eval_f1": 0.9586218246722099, "eval_loss": 0.1626722663640976, "eval_precision": 0.9598553600795934, "eval_recall": 0.9586, "eval_runtime": 815.7025, "eval_samples_per_second": 122.594, "eval_steps_per_second": 7.662, "step": 4000 }, { "epoch": 0.1312, "grad_norm": 8.65023422241211, "learning_rate": 1.9537849117174962e-05, "loss": 0.1026, "step": 4100 }, { "epoch": 0.1344, "grad_norm": 8.46996784210205, "learning_rate": 1.9525008025682183e-05, "loss": 0.1251, "step": 4200 }, { "epoch": 0.1376, "grad_norm": 6.439260959625244, "learning_rate": 1.9512166934189408e-05, "loss": 0.1338, "step": 4300 }, { "epoch": 0.1408, "grad_norm": 1.012399435043335, "learning_rate": 1.949932584269663e-05, "loss": 0.1164, "step": 4400 }, { "epoch": 0.144, "grad_norm": 0.33647093176841736, "learning_rate": 1.9486484751203855e-05, "loss": 0.1156, "step": 4500 }, { "epoch": 0.1472, "grad_norm": 7.39678955078125, "learning_rate": 1.9473643659711076e-05, "loss": 0.1013, "step": 4600 }, { "epoch": 0.1504, "grad_norm": 4.556000709533691, "learning_rate": 1.94608025682183e-05, "loss": 0.1038, "step": 4700 }, { "epoch": 0.1536, "grad_norm": 0.5087370276451111, "learning_rate": 1.9447961476725522e-05, "loss": 0.1319, "step": 4800 }, { "epoch": 0.1568, "grad_norm": 4.385759353637695, "learning_rate": 1.9435120385232747e-05, "loss": 0.1229, "step": 4900 }, { "epoch": 0.16, "grad_norm": 5.46568489074707, "learning_rate": 1.942227929373997e-05, "loss": 0.1234, "step": 5000 }, { "epoch": 0.1632, "grad_norm": 3.3565216064453125, "learning_rate": 1.9409438202247193e-05, "loss": 0.1007, "step": 5100 }, { "epoch": 0.1664, "grad_norm": 0.17969129979610443, "learning_rate": 1.9396597110754415e-05, "loss": 0.108, "step": 5200 }, { "epoch": 0.1696, "grad_norm": 8.49416732788086, "learning_rate": 1.938375601926164e-05, "loss": 0.1105, "step": 5300 }, { "epoch": 0.1728, "grad_norm": 6.072606086730957, "learning_rate": 1.937091492776886e-05, "loss": 0.1082, "step": 5400 }, { "epoch": 0.176, "grad_norm": 1.2361280918121338, "learning_rate": 1.9358073836276086e-05, "loss": 0.0966, "step": 5500 }, { "epoch": 0.1792, "grad_norm": 6.232377052307129, "learning_rate": 1.9345232744783307e-05, "loss": 0.1151, "step": 5600 }, { "epoch": 0.1824, "grad_norm": 0.6630802154541016, "learning_rate": 1.9332391653290532e-05, "loss": 0.104, "step": 5700 }, { "epoch": 0.1856, "grad_norm": 0.2744814157485962, "learning_rate": 1.9319550561797754e-05, "loss": 0.104, "step": 5800 }, { "epoch": 0.1888, "grad_norm": 0.37808698415756226, "learning_rate": 1.930670947030498e-05, "loss": 0.1183, "step": 5900 }, { "epoch": 0.192, "grad_norm": 11.47230052947998, "learning_rate": 1.9293868378812203e-05, "loss": 0.1258, "step": 6000 }, { "epoch": 0.1952, "grad_norm": 7.168028354644775, "learning_rate": 1.928102728731942e-05, "loss": 0.1117, "step": 6100 }, { "epoch": 0.1984, "grad_norm": 0.11318526417016983, "learning_rate": 1.9268186195826646e-05, "loss": 0.1004, "step": 6200 }, { "epoch": 0.2016, "grad_norm": 9.405433654785156, "learning_rate": 1.9255345104333868e-05, "loss": 0.1195, "step": 6300 }, { "epoch": 0.2048, "grad_norm": 0.507453978061676, "learning_rate": 1.9242504012841092e-05, "loss": 0.0987, "step": 6400 }, { "epoch": 0.208, "grad_norm": 0.15791097283363342, "learning_rate": 1.9229662921348314e-05, "loss": 0.1156, "step": 6500 }, { "epoch": 0.2112, "grad_norm": 5.216452121734619, "learning_rate": 1.921682182985554e-05, "loss": 0.1375, "step": 6600 }, { "epoch": 0.2144, "grad_norm": 0.6168243885040283, "learning_rate": 1.9203980738362764e-05, "loss": 0.1086, "step": 6700 }, { "epoch": 0.2176, "grad_norm": 2.949383497238159, "learning_rate": 1.9191139646869985e-05, "loss": 0.0957, "step": 6800 }, { "epoch": 0.2208, "grad_norm": 9.519874572753906, "learning_rate": 1.917829855537721e-05, "loss": 0.0935, "step": 6900 }, { "epoch": 0.224, "grad_norm": 7.931914329528809, "learning_rate": 1.916545746388443e-05, "loss": 0.1335, "step": 7000 }, { "epoch": 0.2272, "grad_norm": 8.498374938964844, "learning_rate": 1.9152616372391656e-05, "loss": 0.1025, "step": 7100 }, { "epoch": 0.2304, "grad_norm": 5.054536819458008, "learning_rate": 1.9139775280898878e-05, "loss": 0.1165, "step": 7200 }, { "epoch": 0.2336, "grad_norm": 0.17365020513534546, "learning_rate": 1.9126934189406102e-05, "loss": 0.0996, "step": 7300 }, { "epoch": 0.2368, "grad_norm": 2.247058629989624, "learning_rate": 1.9114093097913324e-05, "loss": 0.1009, "step": 7400 }, { "epoch": 0.24, "grad_norm": 15.367574691772461, "learning_rate": 1.910125200642055e-05, "loss": 0.1082, "step": 7500 }, { "epoch": 0.2432, "grad_norm": 6.72482967376709, "learning_rate": 1.908841091492777e-05, "loss": 0.1308, "step": 7600 }, { "epoch": 0.2464, "grad_norm": 0.051803406327962875, "learning_rate": 1.9075569823434995e-05, "loss": 0.1031, "step": 7700 }, { "epoch": 0.2496, "grad_norm": 9.355685234069824, "learning_rate": 1.9062728731942216e-05, "loss": 0.1012, "step": 7800 }, { "epoch": 0.2528, "grad_norm": 2.047060012817383, "learning_rate": 1.904988764044944e-05, "loss": 0.1249, "step": 7900 }, { "epoch": 0.256, "grad_norm": 4.134668350219727, "learning_rate": 1.9037046548956663e-05, "loss": 0.0821, "step": 8000 }, { "epoch": 0.256, "eval_accuracy": 0.97609, "eval_f1": 0.9760920422665103, "eval_loss": 0.10814645141363144, "eval_precision": 0.9761013685233434, "eval_recall": 0.97609, "eval_runtime": 817.6077, "eval_samples_per_second": 122.308, "eval_steps_per_second": 7.644, "step": 8000 }, { "epoch": 0.2592, "grad_norm": 6.723151206970215, "learning_rate": 1.9024205457463887e-05, "loss": 0.0799, "step": 8100 }, { "epoch": 0.2624, "grad_norm": 1.805972933769226, "learning_rate": 1.901136436597111e-05, "loss": 0.1211, "step": 8200 }, { "epoch": 0.2656, "grad_norm": 11.118502616882324, "learning_rate": 1.8998523274478334e-05, "loss": 0.0916, "step": 8300 }, { "epoch": 0.2688, "grad_norm": 2.3953633308410645, "learning_rate": 1.8985682182985555e-05, "loss": 0.0878, "step": 8400 }, { "epoch": 0.272, "grad_norm": 0.10008874535560608, "learning_rate": 1.8972841091492777e-05, "loss": 0.0868, "step": 8500 }, { "epoch": 0.2752, "grad_norm": 0.06934285163879395, "learning_rate": 1.896e-05, "loss": 0.1155, "step": 8600 }, { "epoch": 0.2784, "grad_norm": 0.3781879246234894, "learning_rate": 1.8947158908507223e-05, "loss": 0.0988, "step": 8700 }, { "epoch": 0.2816, "grad_norm": 9.6624174118042, "learning_rate": 1.8934317817014448e-05, "loss": 0.1072, "step": 8800 }, { "epoch": 0.2848, "grad_norm": 0.09044591337442398, "learning_rate": 1.892147672552167e-05, "loss": 0.1085, "step": 8900 }, { "epoch": 0.288, "grad_norm": 0.0671633929014206, "learning_rate": 1.8908635634028894e-05, "loss": 0.1081, "step": 9000 }, { "epoch": 0.2912, "grad_norm": 8.130873680114746, "learning_rate": 1.8895794542536115e-05, "loss": 0.0968, "step": 9100 }, { "epoch": 0.2944, "grad_norm": 1.2537904977798462, "learning_rate": 1.888295345104334e-05, "loss": 0.0877, "step": 9200 }, { "epoch": 0.2976, "grad_norm": 6.084417819976807, "learning_rate": 1.887011235955056e-05, "loss": 0.1022, "step": 9300 }, { "epoch": 0.3008, "grad_norm": 6.140512943267822, "learning_rate": 1.8857271268057787e-05, "loss": 0.1016, "step": 9400 }, { "epoch": 0.304, "grad_norm": 1.7347182035446167, "learning_rate": 1.8844430176565008e-05, "loss": 0.092, "step": 9500 }, { "epoch": 0.3072, "grad_norm": 0.6796423196792603, "learning_rate": 1.8831589085072233e-05, "loss": 0.1172, "step": 9600 }, { "epoch": 0.3104, "grad_norm": 10.664779663085938, "learning_rate": 1.8818747993579454e-05, "loss": 0.0808, "step": 9700 }, { "epoch": 0.3136, "grad_norm": 0.4076235294342041, "learning_rate": 1.880590690208668e-05, "loss": 0.0836, "step": 9800 }, { "epoch": 0.3168, "grad_norm": 9.418440818786621, "learning_rate": 1.8793065810593904e-05, "loss": 0.0675, "step": 9900 }, { "epoch": 0.32, "grad_norm": 9.2078857421875, "learning_rate": 1.8780224719101125e-05, "loss": 0.0686, "step": 10000 }, { "epoch": 0.3232, "grad_norm": 0.47173646092414856, "learning_rate": 1.876738362760835e-05, "loss": 0.1096, "step": 10100 }, { "epoch": 0.3264, "grad_norm": 4.5297322273254395, "learning_rate": 1.875454253611557e-05, "loss": 0.098, "step": 10200 }, { "epoch": 0.3296, "grad_norm": 5.099269866943359, "learning_rate": 1.8741701444622796e-05, "loss": 0.1063, "step": 10300 }, { "epoch": 0.3328, "grad_norm": 2.588848114013672, "learning_rate": 1.8728860353130018e-05, "loss": 0.0989, "step": 10400 }, { "epoch": 0.336, "grad_norm": 4.629786968231201, "learning_rate": 1.8716019261637243e-05, "loss": 0.1018, "step": 10500 }, { "epoch": 0.3392, "grad_norm": 11.187308311462402, "learning_rate": 1.8703178170144464e-05, "loss": 0.0864, "step": 10600 }, { "epoch": 0.3424, "grad_norm": 2.476482391357422, "learning_rate": 1.869033707865169e-05, "loss": 0.0744, "step": 10700 }, { "epoch": 0.3456, "grad_norm": 17.418149948120117, "learning_rate": 1.867749598715891e-05, "loss": 0.1189, "step": 10800 }, { "epoch": 0.3488, "grad_norm": 11.753310203552246, "learning_rate": 1.8664654895666132e-05, "loss": 0.0832, "step": 10900 }, { "epoch": 0.352, "grad_norm": 0.41917338967323303, "learning_rate": 1.8651813804173357e-05, "loss": 0.1063, "step": 11000 }, { "epoch": 0.3552, "grad_norm": 14.072111129760742, "learning_rate": 1.8638972712680578e-05, "loss": 0.1061, "step": 11100 }, { "epoch": 0.3584, "grad_norm": 2.6141397953033447, "learning_rate": 1.8626131621187803e-05, "loss": 0.0934, "step": 11200 }, { "epoch": 0.3616, "grad_norm": 3.1363914012908936, "learning_rate": 1.8613290529695024e-05, "loss": 0.0879, "step": 11300 }, { "epoch": 0.3648, "grad_norm": 4.260811805725098, "learning_rate": 1.860044943820225e-05, "loss": 0.092, "step": 11400 }, { "epoch": 0.368, "grad_norm": 0.2677570879459381, "learning_rate": 1.858760834670947e-05, "loss": 0.1202, "step": 11500 }, { "epoch": 0.3712, "grad_norm": 0.056061357259750366, "learning_rate": 1.8574767255216696e-05, "loss": 0.0773, "step": 11600 }, { "epoch": 0.3744, "grad_norm": 7.95279598236084, "learning_rate": 1.8561926163723917e-05, "loss": 0.0749, "step": 11700 }, { "epoch": 0.3776, "grad_norm": 7.4200873374938965, "learning_rate": 1.8549085072231142e-05, "loss": 0.1054, "step": 11800 }, { "epoch": 0.3808, "grad_norm": 0.8819625973701477, "learning_rate": 1.8536243980738363e-05, "loss": 0.0998, "step": 11900 }, { "epoch": 0.384, "grad_norm": 6.32806396484375, "learning_rate": 1.8523402889245588e-05, "loss": 0.0667, "step": 12000 }, { "epoch": 0.384, "eval_accuracy": 0.97856, "eval_f1": 0.97856556986665, "eval_loss": 0.1008467897772789, "eval_precision": 0.9786554480535211, "eval_recall": 0.97856, "eval_runtime": 822.182, "eval_samples_per_second": 121.628, "eval_steps_per_second": 7.602, "step": 12000 } ], "logging_steps": 100, "max_steps": 156250, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 4000, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.096626165108723e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }