File size: 3,582 Bytes
d9116a1
d15bf17
ef6706a
 
 
 
 
a87c3ff
 
d9116a1
 
ef6706a
 
d9116a1
 
 
ef6706a
d9116a1
ef6706a
d9116a1
ef6706a
 
 
a87c3ff
d0ecd99
ef6706a
 
 
 
 
a87c3ff
cf1fac1
ef6706a
 
 
 
 
 
 
 
 
 
 
 
a87c3ff
 
ef6706a
e411c92
ef6706a
 
 
 
 
 
 
 
 
d9116a1
 
 
 
 
 
ef6706a
 
 
d9116a1
 
 
 
 
 
 
 
ef6706a
 
 
 
 
 
 
 
 
d9116a1
 
ef6706a
d9116a1
 
 
 
cf1fac1
d9116a1
ef6706a
d9116a1
d15bf17
 
 
 
 
 
 
 
 
9168705
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import pandas as pd
import numpy as np
import re

# ---------------- MARKERS ----------------

LEXICON = {
    "urgency_markers": [
        "urgent", "need", "shortage", "emergency", "limited", 
        "difficulties", "crisis", "immediate", "critical", "necessary",
        "essential", "dire", "catastrophe"
    ],
    "severity_markers": [
        "trauma","difficult", "profound", "severe", "extreme", "struggling",
        "desperate", "suffering", "devastating", "harsh", "violent", "challenge",
        "danger"
    ],

    "vulnerability_markers": [
        "asd", "send", "disability", "disabilities", "special needs", "diagnosis", "vulnerable",
        "fragile", "risk", "sen", 'adhd','add','dyslexia', 'trans', 'queer',
        'lgbtq', 'refugee', 'refugees', 'autism', 'autisitc', 'neurodivergent',
        'low income', "poverty", "deprived", "poor", "disadvantaged", 'underserved',
        'therapy', 'therapeutic', "aln", "semh", 'violence', 'mental health', 'depressed',
        'anxious', 'anxiety', 'ill', 'sick','down syndrome', 'epilepsy',
    ],
    "emotional_appeal": [
        "help", "support", "deserve", "hope", "lives", "transform",
        "improve", "amazing", "difference", "dream", "opportunity",
        "empower", "nurture", "change", "impact", "grateful", "please",
        "!", 'passion', 'passionate', 'committed', 'life-changing',
        'thank you', 'thankful', 'love'
    ],
    "superlatives": [
        "most", "every", "all", "huge", "massive", "dramatically",
        "significantly", "really", "very", "extremely", "entirely",
        "absolutely", "completely", "totally", "utterly"
    ]
}


# --------------- WEIGHTS --------------

WEIGHTS = {
    'urgency_markers': 3,
    'severity_markers': 2.5,
    'vulnerability_markers': 3,
    'emotional_appeal': 2,
    'superlatives': 1.0
}


# ------------ FUNCTION ----------------

def compute_necessity(text):

    if not isinstance(text, str):
        return pd.Series({
            "necessity_index": 0.0,
            "urgency_score": 0.0,
            "severity_score": 0.0,
            "vulnerability_score": 0.0,
        })

    text_lower = text.lower()

    totals = {
            "necessity_index" : 0.0,
            "urgency_score" : 0.0,
            "severity_score" : 0.0,
            "vulnerability_score" : 0.0,
            }


    for category, keywords in LEXICON.items():
        # For each keyword, count how many times it appears in text
        # (simple usage of re.findall)
        category_count = 0
        for kw in keywords:
            # Escape special characters in kw to ensure correct regex matching
            pattern = r'\b' + re.escape(kw) + r'\b'
            matches = re.findall(pattern, text_lower)
            category_count += len(matches)
    
        totals['necessity_index'] += WEIGHTS[category] * category_count

        if category == "urgency_markers":
            totals['urgency_score'] += category_count
        elif category == "severity_markers":
            totals['severity_score'] += category_count
        elif category == "vulnerability_markers":
            totals['vulnerability_score'] += category_count

    return pd.Series(totals)


## -------- SCALING FUNCTION --------


def index_scaler(values):
    x_min = np.min(values)
    x_max = np.max(values)
    return [(x - x_min) / (x_max - x_min) if x_max != x_min else 0.5 for x in values]


## -------- CATEGORIZE FUNCTION -------

def qcut_labels(series, bins=(0,.25,.75,.95,1), labels=('low','medium','high','priority')):
    return pd.qcut(series, q=bins, labels=labels)