Spaces:

TwinklData
/

Community_Collections_App

Sleeping

Community_Collections_App / src /necessity_index.py

lynn-twinkl

Changed name to src for consistency

19fcede 9 months ago

3.58 kB

	import pandas as pd
	import numpy as np
	import re

	# ---------------- MARKERS ----------------

	LEXICON = {
	"urgency_markers": [
	"urgent", "need", "shortage", "emergency", "limited",
	"difficulties", "crisis", "immediate", "critical", "necessary",
	"essential", "dire", "catastrophe"
	],
	"severity_markers": [
	"trauma","difficult", "profound", "severe", "extreme", "struggling",
	"desperate", "suffering", "devastating", "harsh", "violent", "challenge",
	"danger"
	],

	"vulnerability_markers": [
	"asd", "send", "disability", "disabilities", "special needs", "diagnosis", "vulnerable",
	"fragile", "risk", "sen", 'adhd','add','dyslexia', 'trans', 'queer',
	'lgbtq', 'refugee', 'refugees', 'autism', 'autisitc', 'neurodivergent',
	'low income', "poverty", "deprived", "poor", "disadvantaged", 'underserved',
	'therapy', 'therapeutic', "aln", "semh", 'violence', 'mental health', 'depressed',
	'anxious', 'anxiety', 'ill', 'sick','down syndrome', 'epilepsy',
	],
	"emotional_appeal": [
	"help", "support", "deserve", "hope", "lives", "transform",
	"improve", "amazing", "difference", "dream", "opportunity",
	"empower", "nurture", "change", "impact", "grateful", "please",
	"!", 'passion', 'passionate', 'committed', 'life-changing',
	'thank you', 'thankful', 'love'
	],
	"superlatives": [
	"most", "every", "all", "huge", "massive", "dramatically",
	"significantly", "really", "very", "extremely", "entirely",
	"absolutely", "completely", "totally", "utterly"
	]
	}


	# --------------- WEIGHTS --------------

	WEIGHTS = {
	'urgency_markers': 3,
	'severity_markers': 2.5,
	'vulnerability_markers': 3,
	'emotional_appeal': 2,
	'superlatives': 1.0
	}


	# ------------ FUNCTION ----------------

	def compute_necessity(text):

	if not isinstance(text, str):
	return pd.Series({
	"necessity_index": 0.0,
	"urgency_score": 0.0,
	"severity_score": 0.0,
	"vulnerability_score": 0.0,
	})

	text_lower = text.lower()

	totals = {
	"necessity_index" : 0.0,
	"urgency_score" : 0.0,
	"severity_score" : 0.0,
	"vulnerability_score" : 0.0,
	}


	for category, keywords in LEXICON.items():
	# For each keyword, count how many times it appears in text
	# (simple usage of re.findall)
	category_count = 0
	for kw in keywords:
	# Escape special characters in kw to ensure correct regex matching
	pattern = r'\b' + re.escape(kw) + r'\b'
	matches = re.findall(pattern, text_lower)
	category_count += len(matches)

	totals['necessity_index'] += WEIGHTS[category] * category_count

	if category == "urgency_markers":
	totals['urgency_score'] += category_count
	elif category == "severity_markers":
	totals['severity_score'] += category_count
	elif category == "vulnerability_markers":
	totals['vulnerability_score'] += category_count

	return pd.Series(totals)


	## -------- SCALING FUNCTION --------


	def index_scaler(values):
	x_min = np.min(values)
	x_max = np.max(values)
	return [(x - x_min) / (x_max - x_min) if x_max != x_min else 0.5 for x in values]


	## -------- CATEGORIZE FUNCTION -------

	def qcut_labels(series, bins=(0,.25,.75,.95,1), labels=('low','medium','high','priority')):
	return pd.qcut(series, q=bins, labels=labels)