lynn-twinkl
commited on
Commit
·
d9116a1
1
Parent(s):
70e9053
Function now return individual scores for all markers as well as index
Browse files- functions/necessity_index.py +33 -13
functions/necessity_index.py
CHANGED
|
@@ -1,23 +1,25 @@
|
|
|
|
|
| 1 |
import re
|
| 2 |
|
| 3 |
# ---------------- MARKERS ----------------
|
| 4 |
|
| 5 |
LEXICON = {
|
| 6 |
-
"urgency_markers": [
|
| 7 |
-
"
|
| 8 |
-
"
|
| 9 |
-
"immediate", "critical", "necessary", "essential", "dire"
|
| 10 |
],
|
| 11 |
"severity_markers": [
|
| 12 |
-
"trauma","difficult", "
|
| 13 |
-
"desperate", "
|
|
|
|
| 14 |
],
|
|
|
|
| 15 |
"vulnerability_markers": [
|
| 16 |
-
"asd", "send", "disability", "special needs", "diagnosis", "vulnerable",
|
| 17 |
"fragile", "risk", "sen", 'adhd','add','dyslexia', 'trans', 'queer',
|
| 18 |
'lgbtq', 'refugee', 'refugees', 'autism', 'autisitc', 'neurodivergent',
|
| 19 |
'low income', "poverty", "deprived", "poor", "disadvantaged", 'underserved',
|
| 20 |
-
'therapy', 'therapeutic', "aln"
|
| 21 |
],
|
| 22 |
"emotional_appeal": [
|
| 23 |
"help", "support", "deserve", "hope", "lives", "transform",
|
|
@@ -49,11 +51,23 @@ WEIGHTS = {
|
|
| 49 |
def compute_necessity(text):
|
| 50 |
|
| 51 |
if not isinstance(text, str):
|
| 52 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
text_lower = text.lower()
|
| 55 |
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
for category, keywords in LEXICON.items():
|
| 58 |
# For each keyword, count how many times it appears in text
|
| 59 |
# (simple usage of re.findall)
|
|
@@ -63,8 +77,14 @@ def compute_necessity(text):
|
|
| 63 |
pattern = r'\b' + re.escape(kw) + r'\b'
|
| 64 |
matches = re.findall(pattern, text_lower)
|
| 65 |
category_count += len(matches)
|
|
|
|
|
|
|
| 66 |
|
| 67 |
-
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
-
return
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
import re
|
| 3 |
|
| 4 |
# ---------------- MARKERS ----------------
|
| 5 |
|
| 6 |
LEXICON = {
|
| 7 |
+
"urgency_markers": [ "urgent", "need", "shortage", "emergency", "limited",
|
| 8 |
+
"difficulties", "crisis", "immediate", "critical", "necessary",
|
| 9 |
+
"essential", "dire", "catastrophe"
|
|
|
|
| 10 |
],
|
| 11 |
"severity_markers": [
|
| 12 |
+
"trauma","difficult", "profound", "severe", "extreme", "struggling",
|
| 13 |
+
"desperate", "suffering", "devastating", "harsh", "violent", "challenge",
|
| 14 |
+
"danger"
|
| 15 |
],
|
| 16 |
+
|
| 17 |
"vulnerability_markers": [
|
| 18 |
+
"asd", "send", "disability", "disabilities", "special needs", "diagnosis", "vulnerable",
|
| 19 |
"fragile", "risk", "sen", 'adhd','add','dyslexia', 'trans', 'queer',
|
| 20 |
'lgbtq', 'refugee', 'refugees', 'autism', 'autisitc', 'neurodivergent',
|
| 21 |
'low income', "poverty", "deprived", "poor", "disadvantaged", 'underserved',
|
| 22 |
+
'therapy', 'therapeutic', "aln", "semh", 'violence'
|
| 23 |
],
|
| 24 |
"emotional_appeal": [
|
| 25 |
"help", "support", "deserve", "hope", "lives", "transform",
|
|
|
|
| 51 |
def compute_necessity(text):
|
| 52 |
|
| 53 |
if not isinstance(text, str):
|
| 54 |
+
return pd.Series({
|
| 55 |
+
"necessity_index": 0.0,
|
| 56 |
+
"urgency_score": 0.0,
|
| 57 |
+
"severity_score": 0.0,
|
| 58 |
+
"vulnerability_score": 0.0,
|
| 59 |
+
})
|
| 60 |
|
| 61 |
text_lower = text.lower()
|
| 62 |
|
| 63 |
+
totals = {
|
| 64 |
+
"necessity_index" : 0.0,
|
| 65 |
+
"urgency_score" : 0.0,
|
| 66 |
+
"severity_score" : 0.0,
|
| 67 |
+
"vulnerability_score" : 0.0,
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
|
| 71 |
for category, keywords in LEXICON.items():
|
| 72 |
# For each keyword, count how many times it appears in text
|
| 73 |
# (simple usage of re.findall)
|
|
|
|
| 77 |
pattern = r'\b' + re.escape(kw) + r'\b'
|
| 78 |
matches = re.findall(pattern, text_lower)
|
| 79 |
category_count += len(matches)
|
| 80 |
+
|
| 81 |
+
totals['necessity_index'] += WEIGHTS[category] * category_count
|
| 82 |
|
| 83 |
+
if category == "urgency_markers":
|
| 84 |
+
totals['urgency_score'] += category_count
|
| 85 |
+
elif category == "severity_markers":
|
| 86 |
+
totals['severity_score'] += category_count
|
| 87 |
+
elif category == "vulnerability__markers":
|
| 88 |
+
totals['vulnerability_score'] += category_count
|
| 89 |
|
| 90 |
+
return pd.Series(totals)
|