lynn-twinkl commited on
Commit
d9116a1
·
1 Parent(s): 70e9053

Function now return individual scores for all markers as well as index

Browse files
Files changed (1) hide show
  1. functions/necessity_index.py +33 -13
functions/necessity_index.py CHANGED
@@ -1,23 +1,25 @@
 
1
  import re
2
 
3
  # ---------------- MARKERS ----------------
4
 
5
  LEXICON = {
6
- "urgency_markers": [
7
- "urgent", "need", "shortage", "struggling", "challenge",
8
- "emergency", "tight", "limited", "difficulties", "crisis",
9
- "immediate", "critical", "necessary", "essential", "dire"
10
  ],
11
  "severity_markers": [
12
- "trauma","difficult", "violence", "profound", "severe", "extreme",
13
- "desperate", "worst", "suffering", "devastating", "harsh",
 
14
  ],
 
15
  "vulnerability_markers": [
16
- "asd", "send", "disability", "special needs", "diagnosis", "vulnerable",
17
  "fragile", "risk", "sen", 'adhd','add','dyslexia', 'trans', 'queer',
18
  'lgbtq', 'refugee', 'refugees', 'autism', 'autisitc', 'neurodivergent',
19
  'low income', "poverty", "deprived", "poor", "disadvantaged", 'underserved',
20
- 'therapy', 'therapeutic', "aln"
21
  ],
22
  "emotional_appeal": [
23
  "help", "support", "deserve", "hope", "lives", "transform",
@@ -49,11 +51,23 @@ WEIGHTS = {
49
  def compute_necessity(text):
50
 
51
  if not isinstance(text, str):
52
- return 0.0
 
 
 
 
 
53
 
54
  text_lower = text.lower()
55
 
56
- score = 0.0
 
 
 
 
 
 
 
57
  for category, keywords in LEXICON.items():
58
  # For each keyword, count how many times it appears in text
59
  # (simple usage of re.findall)
@@ -63,8 +77,14 @@ def compute_necessity(text):
63
  pattern = r'\b' + re.escape(kw) + r'\b'
64
  matches = re.findall(pattern, text_lower)
65
  category_count += len(matches)
 
 
66
 
67
- # Weight the occurrences by the category weighting
68
- score += WEIGHTS[category] * category_count
 
 
 
 
69
 
70
- return score
 
1
+ import pandas as pd
2
  import re
3
 
4
  # ---------------- MARKERS ----------------
5
 
6
  LEXICON = {
7
+ "urgency_markers": [ "urgent", "need", "shortage", "emergency", "limited",
8
+ "difficulties", "crisis", "immediate", "critical", "necessary",
9
+ "essential", "dire", "catastrophe"
 
10
  ],
11
  "severity_markers": [
12
+ "trauma","difficult", "profound", "severe", "extreme", "struggling",
13
+ "desperate", "suffering", "devastating", "harsh", "violent", "challenge",
14
+ "danger"
15
  ],
16
+
17
  "vulnerability_markers": [
18
+ "asd", "send", "disability", "disabilities", "special needs", "diagnosis", "vulnerable",
19
  "fragile", "risk", "sen", 'adhd','add','dyslexia', 'trans', 'queer',
20
  'lgbtq', 'refugee', 'refugees', 'autism', 'autisitc', 'neurodivergent',
21
  'low income', "poverty", "deprived", "poor", "disadvantaged", 'underserved',
22
+ 'therapy', 'therapeutic', "aln", "semh", 'violence'
23
  ],
24
  "emotional_appeal": [
25
  "help", "support", "deserve", "hope", "lives", "transform",
 
51
  def compute_necessity(text):
52
 
53
  if not isinstance(text, str):
54
+ return pd.Series({
55
+ "necessity_index": 0.0,
56
+ "urgency_score": 0.0,
57
+ "severity_score": 0.0,
58
+ "vulnerability_score": 0.0,
59
+ })
60
 
61
  text_lower = text.lower()
62
 
63
+ totals = {
64
+ "necessity_index" : 0.0,
65
+ "urgency_score" : 0.0,
66
+ "severity_score" : 0.0,
67
+ "vulnerability_score" : 0.0,
68
+ }
69
+
70
+
71
  for category, keywords in LEXICON.items():
72
  # For each keyword, count how many times it appears in text
73
  # (simple usage of re.findall)
 
77
  pattern = r'\b' + re.escape(kw) + r'\b'
78
  matches = re.findall(pattern, text_lower)
79
  category_count += len(matches)
80
+
81
+ totals['necessity_index'] += WEIGHTS[category] * category_count
82
 
83
+ if category == "urgency_markers":
84
+ totals['urgency_score'] += category_count
85
+ elif category == "severity_markers":
86
+ totals['severity_score'] += category_count
87
+ elif category == "vulnerability__markers":
88
+ totals['vulnerability_score'] += category_count
89
 
90
+ return pd.Series(totals)