jainsatyam26's picture
Upload guard-safety-classifier model
30c7d87 verified
{
"model_name": "microsoft/deberta-v3-small",
"max_len": 128,
"batch_size": 64,
"epochs": 1,
"lr": 2e-05,
"weight_decay": 0.01,
"warmup_steps": 500,
"grad_clip": 1.0,
"seed": 42,
"w_is_safe": 1.0,
"w_category": 1.0,
"w_categories": 0.5,
"save_steps": 200,
"eval_steps": 500,
"num_categories": 26,
"num_multi_labels": 28,
"category_classes": [
"animal_abuse",
"benign",
"child_abuse",
"code_vulnerabilities",
"controversial_topics_politics",
"cwe_compliance",
"dangerous_expert_advice",
"discrimination_stereotype_injustice",
"drug_abuse_weapons_banned_substance",
"financial_crime_property_crime_theft",
"fraud_deception_misinformation",
"gender_bias",
"hate_speech_offensive_language",
"jailbreak_prompt_injection",
"malware_hacking_cyberattack",
"misinformation_regarding_ethics_laws_and_safety",
"mitre_compliance",
"non_violent_unethical_behavior",
"orientation_bias",
"privacy_violation",
"race_bias",
"religious_bias",
"self_harm",
"sexually_explicit_adult_content",
"terrorism_organized_crime",
"violence_aiding_and_abetting_incitement"
],
"multi_label_classes": [
" ",
",",
"_",
"a",
"b",
"c",
"d",
"e",
"f",
"g",
"h",
"i",
"j",
"k",
"l",
"m",
"n",
"o",
"p",
"r",
"s",
"t",
"u",
"v",
"w",
"x",
"y",
"z"
],
"best_val_loss": 1.0249000663187966,
"test_metrics": {
"loss": 1.0232949212993905,
"is_safe_acc": 0.9276446754604681,
"category_f1": 0.5036962280648937,
"categories_f1": 0.9067776039136755
}
}