Spaces:

Ace-119
/

StressDetect

Sleeping

File size: 10,016 Bytes

fb3e070

"""
utils/sentiment.py
==================
Lightweight keyword-based positive-sentiment detector.

Used as a post-processing correction layer during inference to prevent
clearly positive / happy text from being mislabelled as stressed.

The detector counts positive and negative sentiment indicators in the
input text and returns a dampening factor in [0, 1].  When multiplied
by the raw model stress probability, the result ensures that sentences
like *"I am happy"* or *"I love her and she loves me back"* receive
stress scores well below 10 %.

It also detects *negated* stress phrases such as *"not stressed"* or
*"don't feel anxious"* and applies a moderate dampening factor so that
explicit denials of stress are not treated as stress indicators.

Design
------
- Only *dampens* — never *inflates* — the model score.
- Requires a strong positive signal **and** the absence of negative /
  stress indicators before applying any correction.
- Keeps a generous margin so that ambiguous or mixed-sentiment text is
  left to the model.
- Negated-stress detection replaces matched phrases with a placeholder
  before the negative-hit scan so that "not stressed" does not block
  dampening the way genuine stress words would.
"""

from __future__ import annotations

import re


# ---------------------------------------------------------------------------
# Positive-sentiment indicators (case-insensitive, word-boundary)
# ---------------------------------------------------------------------------

_POSITIVE_PATTERNS: list[re.Pattern[str]] = [
    re.compile(
        r"\b("
        r"happy|happiness|joy|joyful|joyous|elated|bliss|blissful"
        r"|cheerful|delighted|delightful|ecstatic|euphoric"
        r"|glad|pleased|wonderful|amazing|awesome|fantastic|fabulous"
        r"|terrific|magnificent|marvelous|superb|splendid"
        r"|great|excellent|incredible|brilliant|outstanding|perfect"
        r"|phenomenal|glorious|enjoyable|pleasant|overjoyed|jubilant"
        r"|love|loved|loving|adore|adored|cherish|cherished"
        r"|grateful|thankful|blessed|fortunate|lucky"
        r"|optimistic|hopeful|enthusiastic|excited|thrilled"
        r"|content|contented|satisfied|fulfilled|peaceful"
        r"|proud|confident|empowered|inspired|motivated"
        r"|relaxed|calm|serene|tranquil|comfortable"
        r"|smile|smiling|smiled|laugh|laughing|laughed|grin|grinning|grinned"
        r"|celebrate|celebrating|celebrated|celebration"
        r"|beautiful|gorgeous"
        r")\b",
        re.IGNORECASE,
    ),
]

_POSITIVE_PHRASES: list[re.Pattern[str]] = [
    re.compile(
        r"\b("
        r"feel(?:s|ing)?\s+(?:great|good|amazing|wonderful|fantastic|awesome|fine|nice|blessed|happy|excellent|incredible|brilliant)"
        r"|love\s+(?:my|this|her|him|them|it|life)"
        r"|loves?\s+me"
        r"|(?:good|great|wonderful|amazing|fantastic|awesome|excellent)\s+(?:day|time|mood|news|life|morning|evening|night)"
        r"|having\s+(?:a\s+)?(?:great|good|wonderful|amazing|fantastic|blast|ball)"
        r"|(?:in|into)\s+(?:a\s+)?(?:great|good|wonderful|amazing|fantastic|awesome|excellent)\s+mood"
        r"|so\s+(?:happy|glad|grateful|thankful|excited|thrilled|proud|pleased)"
        r"|life\s+is\s+(?:good|great|beautiful|wonderful|amazing)"
        r"|couldn['\u2019]?t\s+be\s+(?:happier|better)"
        r"|on\s+top\s+of\s+the\s+world"
        r"|over\s+the\s+moon"
        r"|best\s+(?:day|time|thing)"
        r")\b",
        re.IGNORECASE,
    ),
]

# ---------------------------------------------------------------------------
# Negated-stress patterns — stress keywords explicitly preceded by a negator.
# These are detected BEFORE the negative-hit scan and replaced with a
# placeholder so that "not stressed" is treated as a positive/neutral
# signal rather than a stress indicator.
# ---------------------------------------------------------------------------

_NEGATED_STRESS_PATTERNS: list[re.Pattern[str]] = [
    re.compile(
        r"\b(?:not|no\s+longer|don['\u2019]?t|doesn['\u2019]?t|isn['\u2019]?t"
        r"|aren['\u2019]?t|wasn['\u2019]?t|weren['\u2019]?t|haven['\u2019]?t"
        r"|hasn['\u2019]?t|hadn['\u2019]?t|won['\u2019]?t|wouldn['\u2019]?t"
        r"|can['\u2019]?t|less|barely|hardly|never)\s+"
        r"(?:\w+\s+){0,3}"
        r"(?:stress(?:ed|ful|ing)?|anxious|anxiety|worried|worrying|worry"
        r"|depress(?:ed|ion|ing)?|overwhelm(?:ed|ing)?|panic(?:king)?"
        r"|scared|afraid|fear(?:ful)?|exhausted|nervous|tense)\b",
        re.IGNORECASE,
    ),
]

# ---------------------------------------------------------------------------
# Negative / stress indicators — if ANY of these are present we leave
# the model score alone (even when positive words also appear).
# ---------------------------------------------------------------------------

_NEGATIVE_PATTERNS: list[re.Pattern[str]] = [
    re.compile(
        r"\b("
        r"stress(?:ed|ful|ing)?|anxious|anxiety|worried|worrying|worry"
        r"|depress(?:ed|ion|ing)?|sad|sadness|miserable|unhappy"
        r"|angry|anger|furious|frustrated|frustration|irritated"
        r"|scared|afraid|fear(?:ful)?|terrified|panic(?:king)?"
        r"|overwhelm(?:ed|ing)?|exhausted|burnt?\s*out"
        r"|hopeless|helpless|desperate|despair"
        r"|lonely|isolated|alone|abandoned"
        r"|hate|hating|loathe|detest|resent"
        r"|cry(?:ing)?|sob(?:bing)?|tears|weep(?:ing)?"
        r"|suffer(?:ing)?|pain(?:ful)?|hurt(?:ing)?"
        r"|fail(?:ed|ing|ure)?|ruin(?:ed)?"
        r"|can['\u2019]?t\s+(?:take|handle|cope|stand|bear|sleep|breathe)"
        r"|don['\u2019]?t\s+(?:know\s+what\s+to\s+do|want\s+to)"
        r"|give\s+up|giving\s+up"
        r"|breakdown|break\s+down|falling\s+apart"
        r"|insomnia|nightmare|nightmares"
        r"|debt|bankrupt|fired|layoff"
        r"|suicide|suicidal|self[- ]?harm"
        r"|deadline[s]?|overdue"
        r"|sleep\s+depriv(?:ed|ation)|sleepless(?:ness)?"
        r"|haven['\u2019]?t\s+slept|not\s+slept|no\s+sleep|no\s+rest"
        r"|piling\s+up|pile\s+up|buried\s+(?:in|under)"
        r"|falling\s+behind|can['\u2019]?t\s+keep\s+up"
        r"|wearing\s+(?:me\s+)?down|breaking\s+down|worn\s+out"
        r"|drowning\s+in|stretched\s+thin|at\s+my\s+(?:limit|breaking\s+point)"
        r"|running\s+on\s+(?:empty|no\s+sleep|fumes)"
        r")\b",
        re.IGNORECASE,
    ),
]


# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------


def compute_sentiment_dampening(text: str) -> float:
    """Return a dampening factor in ``[0.0, 1.0]`` for the stress score.

    * ``1.0`` → no change (leave the model score as-is).
    * Values ``< 1.0`` → reduce (dampen) the stress score.

    The factor is only lowered when:
    1. Negated stress phrases are found (e.g. "not stressed"), OR
    2. At least one positive indicator is found **and** zero genuine
       negative/stress indicators remain.

    Parameters
    ----------
    text : str
        The raw user input.

    Returns
    -------
    float
        Multiplicative dampening factor for the stress probability.
    """
    if not text or not text.strip():
        return 1.0

    text_lower = text.lower()

    # ── Negation detection ──
    # Replace negated stress phrases with a neutral placeholder so they
    # are not counted as genuine stress indicators below.
    processed = text_lower
    negation_hits = 0
    for pat in _NEGATED_STRESS_PATTERNS:
        matches = pat.findall(processed)
        negation_hits += len(matches)
        processed = pat.sub("__negated__", processed)

    # Count genuine (non-negated) negative hits on the processed text.
    negative_hits = 0
    for pat in _NEGATIVE_PATTERNS:
        negative_hits += len(pat.findall(processed))

    if negative_hits > 0:
        # Genuine stress indicators remain → leave model score unchanged.
        return 1.0

    # Count positive hits (single words + phrases) on the original text.
    positive_hits = 0
    for pat in _POSITIVE_PATTERNS:
        positive_hits += len(pat.findall(text_lower))
    for pat in _POSITIVE_PHRASES:
        positive_hits += len(pat.findall(text_lower))

    # ── Negation-based dampening ──
    # When stress keywords are explicitly negated and no genuine stress
    # indicators remain, apply a moderate dampening factor.
    if negation_hits > 0:
        if positive_hits >= 1:
            # Negated stress + positive words → strong dampening.
            return 0.06
        # Negated stress alone → moderate dampening.
        return 0.35 if negation_hits == 1 else 0.22

    if positive_hits == 0:
        return 1.0

    # ── Standard positive dampening ──
    # 1 positive hit  → factor 0.08  (score capped at ~8 % of raw)
    # 2 positive hits → factor 0.05
    # 3+ positive hits → factor 0.03
    if positive_hits >= 3:
        return 0.03
    if positive_hits >= 2:
        return 0.05
    return 0.08


def get_sentiment_score(text: str) -> float:
    """Return a sentiment score in ``[0.0, 1.0]`` for a piece of text.

    * ``0.0`` → strongly positive (no stress signals).
    * ``1.0`` → strongly negative / stressful.
    * ``0.5`` → neutral (no clear signal either way).

    The score is derived from the same keyword lists used by
    :func:`compute_sentiment_dampening`.
    """
    if not text or not text.strip():
        return 0.5

    text_lower = text.lower()

    negative_hits = 0
    for pat in _NEGATIVE_PATTERNS:
        negative_hits += len(pat.findall(text_lower))

    positive_hits = 0
    for pat in _POSITIVE_PATTERNS:
        positive_hits += len(pat.findall(text_lower))
    for pat in _POSITIVE_PHRASES:
        positive_hits += len(pat.findall(text_lower))

    total = positive_hits + negative_hits
    if total == 0:
        return 0.5

    # Map to [0, 1]: more negative → closer to 1.0
    return negative_hits / total