import pandas as pd
import numpy as np
import re

# ---------------- MARKERS ----------------

LEXICON = {
    "urgency_markers": [
        "urgent", "need", "shortage", "emergency", "limited", 
        "difficulties", "crisis", "immediate", "critical", "necessary",
        "essential", "dire", "catastrophe"
    ],
    "severity_markers": [
        "trauma","difficult", "profound", "severe", "extreme", "struggling",
        "desperate", "suffering", "devastating", "harsh", "violent", "challenge",
        "danger"
    ],

    "vulnerability_markers": [
        "asd", "send", "disability", "disabilities", "special needs", "diagnosis", "vulnerable",
        "fragile", "risk", "sen", 'adhd','add','dyslexia', 'trans', 'queer',
        'lgbtq', 'refugee', 'refugees', 'autism', 'autisitc', 'neurodivergent',
        'low income', "poverty", "deprived", "poor", "disadvantaged", 'underserved',
        'therapy', 'therapeutic', "aln", "semh", 'violence', 'mental health', 'depressed',
        'anxious', 'anxiety', 'ill', 'sick','down syndrome', 'epilepsy',
    ],
    "emotional_appeal": [
        "help", "support", "deserve", "hope", "lives", "transform",
        "improve", "amazing", "difference", "dream", "opportunity",
        "empower", "nurture", "change", "impact", "grateful", "please",
        "!", 'passion', 'passionate', 'committed', 'life-changing',
        'thank you', 'thankful', 'love'
    ],
    "superlatives": [
        "most", "every", "all", "huge", "massive", "dramatically",
        "significantly", "really", "very", "extremely", "entirely",
        "absolutely", "completely", "totally", "utterly"
    ]
}


# --------------- WEIGHTS --------------

WEIGHTS = {
    'urgency_markers': 3,
    'severity_markers': 2.5,
    'vulnerability_markers': 3,
    'emotional_appeal': 2,
    'superlatives': 1.0
}


# ------------ FUNCTION ----------------

def compute_necessity(text):

    if not isinstance(text, str):
        return pd.Series({
            "necessity_index": 0.0,
            "urgency_score": 0.0,
            "severity_score": 0.0,
            "vulnerability_score": 0.0,
        })

    text_lower = text.lower()

    totals = {
            "necessity_index" : 0.0,
            "urgency_score" : 0.0,
            "severity_score" : 0.0,
            "vulnerability_score" : 0.0,
            }


    for category, keywords in LEXICON.items():
        # For each keyword, count how many times it appears in text
        # (simple usage of re.findall)
        category_count = 0
        for kw in keywords:
            # Escape special characters in kw to ensure correct regex matching
            pattern = r'\b' + re.escape(kw) + r'\b'
            matches = re.findall(pattern, text_lower)
            category_count += len(matches)
    
        totals['necessity_index'] += WEIGHTS[category] * category_count

        if category == "urgency_markers":
            totals['urgency_score'] += category_count
        elif category == "severity_markers":
            totals['severity_score'] += category_count
        elif category == "vulnerability_markers":
            totals['vulnerability_score'] += category_count

    return pd.Series(totals)


## -------- SCALING FUNCTION --------


def index_scaler(values):
    x_min = np.min(values)
    x_max = np.max(values)
    return [(x - x_min) / (x_max - x_min) if x_max != x_min else 0.5 for x in values]


## -------- CATEGORIZE FUNCTION -------

def qcut_labels(series, bins=(0,.25,.75,.95,1), labels=('low','medium','high','priority')):
    return pd.qcut(series, q=bins, labels=labels)