{ "model": "cortyx_v2_final.pt", "base": "microsoft/deberta-v3-small", "version": "v2.0", "epochs": 10, "best_f1_macro": 0.6129, "labels": [ "safe", "mild_toxicity", "severe_toxicity", "harassment", "hate_speech", "threat", "insult", "profanity", "sexual_content", "violence", "self_harm", "extremism", "illegal_instruction", "jailbreak_attempt", "prompt_injection", "obfuscated_toxicity", "misinformation_risk" ], "thresholds": { "safe": 0.5, "mild_toxicity": 0.7, "severe_toxicity": 0.4, "harassment": 0.5, "hate_speech": 0.45, "threat": 0.4, "insult": 0.55, "profanity": 0.6, "sexual_content": 0.45, "violence": 0.4, "self_harm": 0.35, "extremism": 0.4, "illegal_instruction": 0.45, "jailbreak_attempt": 0.45, "prompt_injection": 0.45, "obfuscated_toxicity": 0.5, "misinformation_risk": 0.5 }, "datasets": [ "QuantaSparkLabs/cortyx-safety-dataset", "google/civil_comments", "lmsys/toxic-chat", "lmsys/lmsys-chat-1m", "cardiffnlp/tweet_eval" ], "training_samples": 1844, "val_samples": 462 }