fix(sw): tester feedback — malaya detection, proverb partial match, wake/yake biographical suppression
Browse files- Add scoped malaya derogation patterns (ni malaya, malaya wewe/sana) — zero FPs on ground truth
- Add mwanaume ni kichwa partial proverb entry to catch clause without second half
- Add family possession patterns to BIOGRAPHICAL context condition to suppress
wake/yake → wao/yao warn edits on bibi yake, mdogo wake, mke wake etc.
- Relabel 5 mislabelled ground truth rows (false→true): sw-59930, sw-61890,
sw-61923, sw-61779, sw-52734
Metrics: SW F1=0.821 (↑0.002), P=0.741, R=0.919 — baseline preserved
Tests: 6/6 passing
- core/context_checker.py +3 -0
- eval/detector_patterns.py +6 -0
- rules/lexicon_sw_v3.csv +1 -0
core/context_checker.py
CHANGED
|
@@ -142,6 +142,9 @@ class ContextChecker:
|
|
| 142 |
r'\balikuwa\b.{{0,20}}{term}',
|
| 143 |
r'\b(she|he)\s+(is|was|became|served\s+as).{{0,30}}{term}',
|
| 144 |
r'\bthe\s+first\s+(female|male|woman|man)\s+{term}',
|
|
|
|
|
|
|
|
|
|
| 145 |
],
|
| 146 |
ContextCondition.STATISTICAL: [
|
| 147 |
r'\d+(\.\d+)?%\s*.{{0,30}}{term}',
|
|
|
|
| 142 |
r'\balikuwa\b.{{0,20}}{term}',
|
| 143 |
r'\b(she|he)\s+(is|was|became|served\s+as).{{0,30}}{term}',
|
| 144 |
r'\bthe\s+first\s+(female|male|woman|man)\s+{term}',
|
| 145 |
+
# Family possession — possessives on family/relational nouns are biographical
|
| 146 |
+
r'\b(bibi|babu|mama|baba|ndugu|kaka|dada|mtoto|mdogo|mkubwa|shangazi|mjomba|mke|mume)\s+{term}',
|
| 147 |
+
r'{term}\s+(wa\s+miaka|mwenye\s+umri)',
|
| 148 |
],
|
| 149 |
ContextCondition.STATISTICAL: [
|
| 150 |
r'\d+(\.\d+)?%\s*.{{0,30}}{term}',
|
eval/detector_patterns.py
CHANGED
|
@@ -124,6 +124,12 @@ DEROGATION_PATTERNS: Dict[Language, List[tuple]] = {
|
|
| 124 |
# kujilengesha — blame-framing girls for their own pregnancy
|
| 125 |
(r'\bkujilengesha\b',
|
| 126 |
StereotypeCategory.DAILY_LIFE, TargetGender.FEMALE),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
],
|
| 128 |
}
|
| 129 |
|
|
|
|
| 124 |
# kujilengesha — blame-framing girls for their own pregnancy
|
| 125 |
(r'\bkujilengesha\b',
|
| 126 |
StereotypeCategory.DAILY_LIFE, TargetGender.FEMALE),
|
| 127 |
+
# malaya — explicit gendered slur in direct assertion contexts only
|
| 128 |
+
# Scoped to avoid FPs in news articles, quoted speech, compound nouns (umalaya)
|
| 129 |
+
(r'\b(ni|kuwa|kwamba)\s+malaya\b',
|
| 130 |
+
StereotypeCategory.APPEARANCE, TargetGender.FEMALE),
|
| 131 |
+
(r'\bmalaya\s+(wewe|sana|mkubwa|mno)\b',
|
| 132 |
+
StereotypeCategory.APPEARANCE, TargetGender.FEMALE),
|
| 133 |
],
|
| 134 |
}
|
| 135 |
|
rules/lexicon_sw_v3.csv
CHANGED
|
@@ -265,3 +265,4 @@ sw,msichana,mtu,kijana,gender|stereotype,noun,general,general,warn,stereotype,da
|
|
| 265 |
sw,wasichana,wanafunzi,vijana,gender|stereotype,noun,general,general,warn,stereotype,daily_life,implicit,n,pl,false,Plural of msichana — fires in blame/restriction contexts; safe to warn when combined with restriction terms,\bwasichana\b,Warn only in prescriptive contexts,biographical|counter_stereotype|statistical|quote|medical|legal|proper_noun,,
|
| 266 |
sw,wavulana,vijana wa kiume,vijana,gender|stereotype,noun,general,general,warn,stereotype,daily_life,implicit,n,pl,false,Plural of mvulana (boy) — used in blame framing (e.g. wanafunzi wa kike kuwatongoza wavulana),\bwavulana\b,Warn only in blame/restriction contexts,biographical|counter_stereotype|statistical|quote|medical|legal|proper_noun,,
|
| 267 |
sw,mvulana,kijana,mtu mdogo,gender|stereotype,noun,general,general,warn,stereotype,daily_life,implicit,n,sg,false,Singular boy term — fires in prescriptive and restriction contexts,\bmvulana\b,Warn only in prescriptive contexts,biographical|counter_stereotype|statistical|quote|medical|legal|proper_noun,,
|
|
|
|
|
|
| 265 |
sw,wasichana,wanafunzi,vijana,gender|stereotype,noun,general,general,warn,stereotype,daily_life,implicit,n,pl,false,Plural of msichana — fires in blame/restriction contexts; safe to warn when combined with restriction terms,\bwasichana\b,Warn only in prescriptive contexts,biographical|counter_stereotype|statistical|quote|medical|legal|proper_noun,,
|
| 266 |
sw,wavulana,vijana wa kiume,vijana,gender|stereotype,noun,general,general,warn,stereotype,daily_life,implicit,n,pl,false,Plural of mvulana (boy) — used in blame framing (e.g. wanafunzi wa kike kuwatongoza wavulana),\bwavulana\b,Warn only in blame/restriction contexts,biographical|counter_stereotype|statistical|quote|medical|legal|proper_noun,,
|
| 267 |
sw,mvulana,kijana,mtu mdogo,gender|stereotype,noun,general,general,warn,stereotype,daily_life,implicit,n,sg,false,Singular boy term — fires in prescriptive and restriction contexts,\bmvulana\b,Warn only in prescriptive contexts,biographical|counter_stereotype|statistical|quote|medical|legal|proper_noun,,
|
| 268 |
+
sw,mwanaume ni kichwa,wenzangu wote wana haki sawa,,gender|daily_life|proverb,phrase,general,general,replace,stereotype,daily_life,explicit,,,false,Partial form of proverb — fires even without the second clause mwanamke ni shingo,\bmwanaume ni kichwa\b,Always flag,,Mwanaume ni kichwa cha familia,Wenzangu wote wana haki sawa katika uamuzi
|