File size: 3,582 Bytes
d9116a1 d15bf17 ef6706a a87c3ff d9116a1 ef6706a d9116a1 ef6706a d9116a1 ef6706a d9116a1 ef6706a a87c3ff d0ecd99 ef6706a a87c3ff cf1fac1 ef6706a a87c3ff ef6706a e411c92 ef6706a d9116a1 ef6706a d9116a1 ef6706a d9116a1 ef6706a d9116a1 cf1fac1 d9116a1 ef6706a d9116a1 d15bf17 9168705 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import pandas as pd
import numpy as np
import re
# ---------------- MARKERS ----------------
LEXICON = {
"urgency_markers": [
"urgent", "need", "shortage", "emergency", "limited",
"difficulties", "crisis", "immediate", "critical", "necessary",
"essential", "dire", "catastrophe"
],
"severity_markers": [
"trauma","difficult", "profound", "severe", "extreme", "struggling",
"desperate", "suffering", "devastating", "harsh", "violent", "challenge",
"danger"
],
"vulnerability_markers": [
"asd", "send", "disability", "disabilities", "special needs", "diagnosis", "vulnerable",
"fragile", "risk", "sen", 'adhd','add','dyslexia', 'trans', 'queer',
'lgbtq', 'refugee', 'refugees', 'autism', 'autisitc', 'neurodivergent',
'low income', "poverty", "deprived", "poor", "disadvantaged", 'underserved',
'therapy', 'therapeutic', "aln", "semh", 'violence', 'mental health', 'depressed',
'anxious', 'anxiety', 'ill', 'sick','down syndrome', 'epilepsy',
],
"emotional_appeal": [
"help", "support", "deserve", "hope", "lives", "transform",
"improve", "amazing", "difference", "dream", "opportunity",
"empower", "nurture", "change", "impact", "grateful", "please",
"!", 'passion', 'passionate', 'committed', 'life-changing',
'thank you', 'thankful', 'love'
],
"superlatives": [
"most", "every", "all", "huge", "massive", "dramatically",
"significantly", "really", "very", "extremely", "entirely",
"absolutely", "completely", "totally", "utterly"
]
}
# --------------- WEIGHTS --------------
WEIGHTS = {
'urgency_markers': 3,
'severity_markers': 2.5,
'vulnerability_markers': 3,
'emotional_appeal': 2,
'superlatives': 1.0
}
# ------------ FUNCTION ----------------
def compute_necessity(text):
if not isinstance(text, str):
return pd.Series({
"necessity_index": 0.0,
"urgency_score": 0.0,
"severity_score": 0.0,
"vulnerability_score": 0.0,
})
text_lower = text.lower()
totals = {
"necessity_index" : 0.0,
"urgency_score" : 0.0,
"severity_score" : 0.0,
"vulnerability_score" : 0.0,
}
for category, keywords in LEXICON.items():
# For each keyword, count how many times it appears in text
# (simple usage of re.findall)
category_count = 0
for kw in keywords:
# Escape special characters in kw to ensure correct regex matching
pattern = r'\b' + re.escape(kw) + r'\b'
matches = re.findall(pattern, text_lower)
category_count += len(matches)
totals['necessity_index'] += WEIGHTS[category] * category_count
if category == "urgency_markers":
totals['urgency_score'] += category_count
elif category == "severity_markers":
totals['severity_score'] += category_count
elif category == "vulnerability_markers":
totals['vulnerability_score'] += category_count
return pd.Series(totals)
## -------- SCALING FUNCTION --------
def index_scaler(values):
x_min = np.min(values)
x_max = np.max(values)
return [(x - x_min) / (x_max - x_min) if x_max != x_min else 0.5 for x in values]
## -------- CATEGORIZE FUNCTION -------
def qcut_labels(series, bins=(0,.25,.75,.95,1), labels=('low','medium','high','priority')):
return pd.qcut(series, q=bins, labels=labels)
|