Spaces:
Sleeping
Sleeping
fix(api): treat 'myslf' / 'my self' / 'me' as self-directed in pre-filter
Browse filesThe reflexive lookahead only matched the canonical spelling "myself",
so typos ("myslf", "mysef"), the space-separated form ("my self"), and
the bare 1st-person object ("kill me") slipped through to \S+ and got
flagged as Directed Aggression — exactly wrong for self-harm input.
Broadens the lookahead with \s* between "my" and "self" (and the other
reflexives), an enumerated set of common typos, and "me\b". The \b on
"me" preserves matching for names that start with "me" (Megan, Melissa).
- app.py +10 -1
- tests/test_explicit_threat_regex.py +13 -0
app.py
CHANGED
|
@@ -103,7 +103,16 @@ STAGE2_LABELS = ["Anxiety", "Bipolar", "Depression", "Personality Disorder", "St
|
|
| 103 |
_VIOLENT_VERBS = (
|
| 104 |
r"kill|murder|hurt|harm|beat|stab|shoot|attack|strangle|choke|smash|bash|destroy|punch"
|
| 105 |
)
|
| 106 |
-
_REFLEXIVES =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
# Idiomatic nouns following "the/this/that" that flip the verb to a non-violent sense.
|
| 108 |
_IDIOM_NOUNS_AFTER_DET = (
|
| 109 |
r"mood|vibe|game|lights?|engine|noise|breeze|shit|messenger|heat|traffic|odds|"
|
|
|
|
| 103 |
_VIOLENT_VERBS = (
|
| 104 |
r"kill|murder|hurt|harm|beat|stab|shoot|attack|strangle|choke|smash|bash|destroy|punch"
|
| 105 |
)
|
| 106 |
+
_REFLEXIVES = (
|
| 107 |
+
# \s* tolerates the space-separated form ("my self", "your self").
|
| 108 |
+
r"my\s*self|your\s*self|him\s*self|her\s*self|it\s*self|"
|
| 109 |
+
r"them\s*selves|our\s*selves|your\s*selves|"
|
| 110 |
+
# Common typos of "myself" + bare 1st-person object pronoun.
|
| 111 |
+
# "kill me" within a 1st-person-intent frame is self-directed in
|
| 112 |
+
# practice (paranoid "they wanna kill me" still matches the modal
|
| 113 |
+
# frame, but that's better routed to the model than flagged as DA).
|
| 114 |
+
r"myslf|mysef|meself|me"
|
| 115 |
+
)
|
| 116 |
# Idiomatic nouns following "the/this/that" that flip the verb to a non-violent sense.
|
| 117 |
_IDIOM_NOUNS_AFTER_DET = (
|
| 118 |
r"mood|vibe|game|lights?|engine|noise|breeze|shit|messenger|heat|traffic|odds|"
|
tests/test_explicit_threat_regex.py
CHANGED
|
@@ -38,6 +38,9 @@ SHOULD_MATCH = [
|
|
| 38 |
# Slang targets.
|
| 39 |
"i finna kill that mf",
|
| 40 |
"gonna beat that fool",
|
|
|
|
|
|
|
|
|
|
| 41 |
]
|
| 42 |
|
| 43 |
SHOULD_NOT_MATCH = [
|
|
@@ -47,6 +50,16 @@ SHOULD_NOT_MATCH = [
|
|
| 47 |
"gonna kill myself tonight",
|
| 48 |
"need to hurt myself",
|
| 49 |
"I will harm myself",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
# No modal verb prefix.
|
| 51 |
"killing me softly with this song",
|
| 52 |
"kill the noise", # bare imperative, no modal
|
|
|
|
| 38 |
# Slang targets.
|
| 39 |
"i finna kill that mf",
|
| 40 |
"gonna beat that fool",
|
| 41 |
+
# Names starting with "me" must still match (no \b inside the name).
|
| 42 |
+
"I wanna kill Megan",
|
| 43 |
+
"gonna hurt Melissa",
|
| 44 |
]
|
| 45 |
|
| 46 |
SHOULD_NOT_MATCH = [
|
|
|
|
| 50 |
"gonna kill myself tonight",
|
| 51 |
"need to hurt myself",
|
| 52 |
"I will harm myself",
|
| 53 |
+
# Self-directed: typos, space-separated reflexive, and bare "me".
|
| 54 |
+
"i wanna kill myslf",
|
| 55 |
+
"I want to kill mysef",
|
| 56 |
+
"I'll kill meself",
|
| 57 |
+
"I wanna kill my self",
|
| 58 |
+
"need to hurt my self",
|
| 59 |
+
"wanna harm my self",
|
| 60 |
+
"I wanna kill me",
|
| 61 |
+
"I'm gonna kill me",
|
| 62 |
+
"need to hurt me",
|
| 63 |
# No modal verb prefix.
|
| 64 |
"killing me softly with this song",
|
| 65 |
"kill the noise", # bare imperative, no modal
|