phi2-lora-malicious-classifier / eval_metrics.json
hurtmongoose's picture
Phi-2 LoRA fine-tuned for 3-class adversarial prompt detection
98c6413 verified
{
"eval_loss": 0.462687611579895,
"eval_accuracy": 0.8475894245723172,
"eval_precision_weighted": 0.8428169632185296,
"eval_recall_weighted": 0.8475894245723172,
"eval_f1_weighted": 0.8440311242475405,
"eval_MCC": 0.751470722356998,
"eval_balanced_accuracy": 0.7994490993426729,
"eval_per_class": {
"jailbreaking": {
"TP": 259,
"FP": 97,
"FN": 130,
"TN": 1443,
"FNR": 0.3341902313624679,
"FPR": 0.06298701298701298,
"Specificity": 0.937012987012987
},
"prompt injection": {
"TP": 434,
"FP": 97,
"FN": 136,
"TN": 1262,
"FNR": 0.23859649122807017,
"FPR": 0.07137601177336277,
"Specificity": 0.9286239882266373
},
"unharmful": {
"TP": 942,
"FP": 100,
"FN": 28,
"TN": 859,
"FNR": 0.0288659793814433,
"FPR": 0.10427528675703858,
"Specificity": 0.8957247132429614
}
},
"eval_runtime": 68.6103,
"eval_samples_per_second": 28.115,
"eval_steps_per_second": 3.527,
"epoch": 10.0
}