codebert_vuln_tokcls / checkpoint-2875 /trainer_state.json
taeuk1's picture
Upload folder using huggingface_hub
da74a91 verified
{
"best_metric": 0.0025361417792737484,
"best_model_checkpoint": "./codebert-vuln-tokcls/checkpoint-2875",
"epoch": 1.0,
"eval_steps": 500,
"global_step": 2875,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.017391304347826087,
"grad_norm": 0.41397327184677124,
"learning_rate": 4.9710144927536237e-05,
"loss": 0.1154,
"step": 50
},
{
"epoch": 0.034782608695652174,
"grad_norm": 0.555982768535614,
"learning_rate": 4.9420289855072464e-05,
"loss": 0.0386,
"step": 100
},
{
"epoch": 0.05217391304347826,
"grad_norm": 0.3676406145095825,
"learning_rate": 4.91304347826087e-05,
"loss": 0.0315,
"step": 150
},
{
"epoch": 0.06956521739130435,
"grad_norm": 0.574974536895752,
"learning_rate": 4.884057971014493e-05,
"loss": 0.028,
"step": 200
},
{
"epoch": 0.08695652173913043,
"grad_norm": 0.41208797693252563,
"learning_rate": 4.855072463768116e-05,
"loss": 0.0252,
"step": 250
},
{
"epoch": 0.10434782608695652,
"grad_norm": 0.34254035353660583,
"learning_rate": 4.8260869565217394e-05,
"loss": 0.0231,
"step": 300
},
{
"epoch": 0.12173913043478261,
"grad_norm": 0.17080576717853546,
"learning_rate": 4.797101449275362e-05,
"loss": 0.0232,
"step": 350
},
{
"epoch": 0.1391304347826087,
"grad_norm": 0.1468990445137024,
"learning_rate": 4.7681159420289855e-05,
"loss": 0.0194,
"step": 400
},
{
"epoch": 0.1565217391304348,
"grad_norm": 0.20004335045814514,
"learning_rate": 4.739130434782609e-05,
"loss": 0.0198,
"step": 450
},
{
"epoch": 0.17391304347826086,
"grad_norm": 0.6127797961235046,
"learning_rate": 4.710144927536232e-05,
"loss": 0.0167,
"step": 500
},
{
"epoch": 0.19130434782608696,
"grad_norm": 0.2098260074853897,
"learning_rate": 4.681159420289855e-05,
"loss": 0.0195,
"step": 550
},
{
"epoch": 0.20869565217391303,
"grad_norm": 0.26583606004714966,
"learning_rate": 4.6521739130434785e-05,
"loss": 0.0158,
"step": 600
},
{
"epoch": 0.22608695652173913,
"grad_norm": 0.30250421166419983,
"learning_rate": 4.623188405797101e-05,
"loss": 0.0163,
"step": 650
},
{
"epoch": 0.24347826086956523,
"grad_norm": 0.2696796655654907,
"learning_rate": 4.594202898550725e-05,
"loss": 0.0131,
"step": 700
},
{
"epoch": 0.2608695652173913,
"grad_norm": 0.4439266324043274,
"learning_rate": 4.565217391304348e-05,
"loss": 0.0136,
"step": 750
},
{
"epoch": 0.2782608695652174,
"grad_norm": 0.4392126798629761,
"learning_rate": 4.5362318840579715e-05,
"loss": 0.0137,
"step": 800
},
{
"epoch": 0.2956521739130435,
"grad_norm": 0.36367908120155334,
"learning_rate": 4.507246376811595e-05,
"loss": 0.0116,
"step": 850
},
{
"epoch": 0.3130434782608696,
"grad_norm": 0.3119848072528839,
"learning_rate": 4.478260869565218e-05,
"loss": 0.0129,
"step": 900
},
{
"epoch": 0.33043478260869563,
"grad_norm": 0.3177811801433563,
"learning_rate": 4.449275362318841e-05,
"loss": 0.0095,
"step": 950
},
{
"epoch": 0.34782608695652173,
"grad_norm": 0.9409816265106201,
"learning_rate": 4.4202898550724645e-05,
"loss": 0.0087,
"step": 1000
},
{
"epoch": 0.3652173913043478,
"grad_norm": 0.19778360426425934,
"learning_rate": 4.391304347826087e-05,
"loss": 0.011,
"step": 1050
},
{
"epoch": 0.3826086956521739,
"grad_norm": 0.21794016659259796,
"learning_rate": 4.362318840579711e-05,
"loss": 0.009,
"step": 1100
},
{
"epoch": 0.4,
"grad_norm": 0.3412404954433441,
"learning_rate": 4.3333333333333334e-05,
"loss": 0.0096,
"step": 1150
},
{
"epoch": 0.41739130434782606,
"grad_norm": 0.27317434549331665,
"learning_rate": 4.304347826086957e-05,
"loss": 0.0081,
"step": 1200
},
{
"epoch": 0.43478260869565216,
"grad_norm": 0.0843023881316185,
"learning_rate": 4.27536231884058e-05,
"loss": 0.0082,
"step": 1250
},
{
"epoch": 0.45217391304347826,
"grad_norm": 0.17682024836540222,
"learning_rate": 4.246376811594203e-05,
"loss": 0.0077,
"step": 1300
},
{
"epoch": 0.46956521739130436,
"grad_norm": 0.41122502088546753,
"learning_rate": 4.2173913043478264e-05,
"loss": 0.0059,
"step": 1350
},
{
"epoch": 0.48695652173913045,
"grad_norm": 0.2303285449743271,
"learning_rate": 4.18840579710145e-05,
"loss": 0.0075,
"step": 1400
},
{
"epoch": 0.5043478260869565,
"grad_norm": 0.1975090056657791,
"learning_rate": 4.1594202898550726e-05,
"loss": 0.0068,
"step": 1450
},
{
"epoch": 0.5217391304347826,
"grad_norm": 0.30486345291137695,
"learning_rate": 4.130434782608696e-05,
"loss": 0.0056,
"step": 1500
},
{
"epoch": 0.5391304347826087,
"grad_norm": 0.08862517029047012,
"learning_rate": 4.101449275362319e-05,
"loss": 0.0054,
"step": 1550
},
{
"epoch": 0.5565217391304348,
"grad_norm": 0.3306777775287628,
"learning_rate": 4.072463768115942e-05,
"loss": 0.0055,
"step": 1600
},
{
"epoch": 0.5739130434782609,
"grad_norm": 0.15247593820095062,
"learning_rate": 4.0434782608695655e-05,
"loss": 0.0045,
"step": 1650
},
{
"epoch": 0.591304347826087,
"grad_norm": 0.2155187875032425,
"learning_rate": 4.014492753623188e-05,
"loss": 0.0074,
"step": 1700
},
{
"epoch": 0.6086956521739131,
"grad_norm": 0.14913377165794373,
"learning_rate": 3.985507246376812e-05,
"loss": 0.0051,
"step": 1750
},
{
"epoch": 0.6260869565217392,
"grad_norm": 0.10386908054351807,
"learning_rate": 3.956521739130435e-05,
"loss": 0.0058,
"step": 1800
},
{
"epoch": 0.6434782608695652,
"grad_norm": 0.25304195284843445,
"learning_rate": 3.927536231884058e-05,
"loss": 0.0063,
"step": 1850
},
{
"epoch": 0.6608695652173913,
"grad_norm": 0.18636035919189453,
"learning_rate": 3.898550724637681e-05,
"loss": 0.0054,
"step": 1900
},
{
"epoch": 0.6782608695652174,
"grad_norm": 0.26325523853302,
"learning_rate": 3.869565217391305e-05,
"loss": 0.004,
"step": 1950
},
{
"epoch": 0.6956521739130435,
"grad_norm": 0.21789908409118652,
"learning_rate": 3.8405797101449274e-05,
"loss": 0.0045,
"step": 2000
},
{
"epoch": 0.7130434782608696,
"grad_norm": 0.3563387393951416,
"learning_rate": 3.811594202898551e-05,
"loss": 0.0048,
"step": 2050
},
{
"epoch": 0.7304347826086957,
"grad_norm": 0.09919373691082001,
"learning_rate": 3.7826086956521736e-05,
"loss": 0.0049,
"step": 2100
},
{
"epoch": 0.7478260869565218,
"grad_norm": 0.7440153360366821,
"learning_rate": 3.753623188405797e-05,
"loss": 0.0054,
"step": 2150
},
{
"epoch": 0.7652173913043478,
"grad_norm": 0.14773143827915192,
"learning_rate": 3.7246376811594204e-05,
"loss": 0.0039,
"step": 2200
},
{
"epoch": 0.782608695652174,
"grad_norm": 0.09251661598682404,
"learning_rate": 3.695652173913043e-05,
"loss": 0.0039,
"step": 2250
},
{
"epoch": 0.8,
"grad_norm": 0.02156330831348896,
"learning_rate": 3.6666666666666666e-05,
"loss": 0.004,
"step": 2300
},
{
"epoch": 0.8173913043478261,
"grad_norm": 0.30019891262054443,
"learning_rate": 3.63768115942029e-05,
"loss": 0.0038,
"step": 2350
},
{
"epoch": 0.8347826086956521,
"grad_norm": 0.08380095660686493,
"learning_rate": 3.6086956521739134e-05,
"loss": 0.0047,
"step": 2400
},
{
"epoch": 0.8521739130434782,
"grad_norm": 0.17295339703559875,
"learning_rate": 3.579710144927537e-05,
"loss": 0.0037,
"step": 2450
},
{
"epoch": 0.8695652173913043,
"grad_norm": 0.02843887358903885,
"learning_rate": 3.5507246376811596e-05,
"loss": 0.0033,
"step": 2500
},
{
"epoch": 0.8869565217391304,
"grad_norm": 0.35363340377807617,
"learning_rate": 3.521739130434783e-05,
"loss": 0.0042,
"step": 2550
},
{
"epoch": 0.9043478260869565,
"grad_norm": 0.6101410984992981,
"learning_rate": 3.4927536231884064e-05,
"loss": 0.0035,
"step": 2600
},
{
"epoch": 0.9217391304347826,
"grad_norm": 0.043613508343696594,
"learning_rate": 3.463768115942029e-05,
"loss": 0.0041,
"step": 2650
},
{
"epoch": 0.9391304347826087,
"grad_norm": 0.12494798004627228,
"learning_rate": 3.4347826086956526e-05,
"loss": 0.0038,
"step": 2700
},
{
"epoch": 0.9565217391304348,
"grad_norm": 0.03580565005540848,
"learning_rate": 3.405797101449276e-05,
"loss": 0.0044,
"step": 2750
},
{
"epoch": 0.9739130434782609,
"grad_norm": 0.12163197249174118,
"learning_rate": 3.376811594202899e-05,
"loss": 0.0029,
"step": 2800
},
{
"epoch": 0.991304347826087,
"grad_norm": 0.09125994145870209,
"learning_rate": 3.347826086956522e-05,
"loss": 0.0035,
"step": 2850
},
{
"epoch": 1.0,
"eval_loss": 0.0025361417792737484,
"eval_runtime": 96.0353,
"eval_samples_per_second": 29.937,
"eval_steps_per_second": 3.749,
"step": 2875
}
],
"logging_steps": 50,
"max_steps": 8625,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 6009564108171264.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}