Spaces:
Runtime error
Runtime error
Update infer.py
Browse files
infer.py
CHANGED
|
@@ -75,16 +75,23 @@ class PromptGuardAnomalyDetector(AbstractAnomalyDetector):
|
|
| 75 |
threshold = threshold or self._threshold
|
| 76 |
anomalies = self.classifier(embeddings)
|
| 77 |
print(anomalies)
|
|
|
|
| 78 |
# [{'label': 'JAILBREAK', 'score': 0.9999452829360962}]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
if anomalies:
|
| 80 |
known_attack_vectors = [
|
| 81 |
KnownAttackVector(
|
| 82 |
-
known_prompt=anomaly
|
| 83 |
similarity_percentage=anomaly["score"],
|
| 84 |
source="meta-llama/Llama-Prompt-Guard-2-86M",
|
| 85 |
)
|
| 86 |
for anomaly in anomalies
|
| 87 |
-
if anomaly["score"] >= threshold
|
| 88 |
]
|
| 89 |
return AnomalyResult(anomaly=True, reason=known_attack_vectors)
|
| 90 |
return AnomalyResult(anomaly=False)
|
|
|
|
| 75 |
threshold = threshold or self._threshold
|
| 76 |
anomalies = self.classifier(embeddings)
|
| 77 |
print(anomalies)
|
| 78 |
+
# promptguard 1
|
| 79 |
# [{'label': 'JAILBREAK', 'score': 0.9999452829360962}]
|
| 80 |
+
|
| 81 |
+
# promptguard 2
|
| 82 |
+
# [{'label': 'LABEL_0', 'score': 0.9999452829360962}]
|
| 83 |
+
# [{'label': 'LABEL_1', 'score': 0.9999452829360962}]
|
| 84 |
+
# "LABEL_0" (Negative classification, benign)
|
| 85 |
+
# "LABEL_1" (Positive classification, malicious)
|
| 86 |
if anomalies:
|
| 87 |
known_attack_vectors = [
|
| 88 |
KnownAttackVector(
|
| 89 |
+
known_prompt="PromptGuard detected anomaly",
|
| 90 |
similarity_percentage=anomaly["score"],
|
| 91 |
source="meta-llama/Llama-Prompt-Guard-2-86M",
|
| 92 |
)
|
| 93 |
for anomaly in anomalies
|
| 94 |
+
if anomaly["score"] >= threshold and anomaly["label"] == "LABEL_1" # LABEL_0 is negative == benign
|
| 95 |
]
|
| 96 |
return AnomalyResult(anomaly=True, reason=known_attack_vectors)
|
| 97 |
return AnomalyResult(anomaly=False)
|