StressDetect / utils /sentiment.py
Ace-119's picture
Add sentiment analysis, text preprocessing, and RL reward signal
fb3e070
"""
utils/sentiment.py
==================
Lightweight keyword-based positive-sentiment detector.
Used as a post-processing correction layer during inference to prevent
clearly positive / happy text from being mislabelled as stressed.
The detector counts positive and negative sentiment indicators in the
input text and returns a dampening factor in [0, 1]. When multiplied
by the raw model stress probability, the result ensures that sentences
like *"I am happy"* or *"I love her and she loves me back"* receive
stress scores well below 10 %.
It also detects *negated* stress phrases such as *"not stressed"* or
*"don't feel anxious"* and applies a moderate dampening factor so that
explicit denials of stress are not treated as stress indicators.
Design
------
- Only *dampens* β€” never *inflates* β€” the model score.
- Requires a strong positive signal **and** the absence of negative /
stress indicators before applying any correction.
- Keeps a generous margin so that ambiguous or mixed-sentiment text is
left to the model.
- Negated-stress detection replaces matched phrases with a placeholder
before the negative-hit scan so that "not stressed" does not block
dampening the way genuine stress words would.
"""
from __future__ import annotations
import re
# ---------------------------------------------------------------------------
# Positive-sentiment indicators (case-insensitive, word-boundary)
# ---------------------------------------------------------------------------
_POSITIVE_PATTERNS: list[re.Pattern[str]] = [
re.compile(
r"\b("
r"happy|happiness|joy|joyful|joyous|elated|bliss|blissful"
r"|cheerful|delighted|delightful|ecstatic|euphoric"
r"|glad|pleased|wonderful|amazing|awesome|fantastic|fabulous"
r"|terrific|magnificent|marvelous|superb|splendid"
r"|great|excellent|incredible|brilliant|outstanding|perfect"
r"|phenomenal|glorious|enjoyable|pleasant|overjoyed|jubilant"
r"|love|loved|loving|adore|adored|cherish|cherished"
r"|grateful|thankful|blessed|fortunate|lucky"
r"|optimistic|hopeful|enthusiastic|excited|thrilled"
r"|content|contented|satisfied|fulfilled|peaceful"
r"|proud|confident|empowered|inspired|motivated"
r"|relaxed|calm|serene|tranquil|comfortable"
r"|smile|smiling|smiled|laugh|laughing|laughed|grin|grinning|grinned"
r"|celebrate|celebrating|celebrated|celebration"
r"|beautiful|gorgeous"
r")\b",
re.IGNORECASE,
),
]
_POSITIVE_PHRASES: list[re.Pattern[str]] = [
re.compile(
r"\b("
r"feel(?:s|ing)?\s+(?:great|good|amazing|wonderful|fantastic|awesome|fine|nice|blessed|happy|excellent|incredible|brilliant)"
r"|love\s+(?:my|this|her|him|them|it|life)"
r"|loves?\s+me"
r"|(?:good|great|wonderful|amazing|fantastic|awesome|excellent)\s+(?:day|time|mood|news|life|morning|evening|night)"
r"|having\s+(?:a\s+)?(?:great|good|wonderful|amazing|fantastic|blast|ball)"
r"|(?:in|into)\s+(?:a\s+)?(?:great|good|wonderful|amazing|fantastic|awesome|excellent)\s+mood"
r"|so\s+(?:happy|glad|grateful|thankful|excited|thrilled|proud|pleased)"
r"|life\s+is\s+(?:good|great|beautiful|wonderful|amazing)"
r"|couldn['\u2019]?t\s+be\s+(?:happier|better)"
r"|on\s+top\s+of\s+the\s+world"
r"|over\s+the\s+moon"
r"|best\s+(?:day|time|thing)"
r")\b",
re.IGNORECASE,
),
]
# ---------------------------------------------------------------------------
# Negated-stress patterns β€” stress keywords explicitly preceded by a negator.
# These are detected BEFORE the negative-hit scan and replaced with a
# placeholder so that "not stressed" is treated as a positive/neutral
# signal rather than a stress indicator.
# ---------------------------------------------------------------------------
_NEGATED_STRESS_PATTERNS: list[re.Pattern[str]] = [
re.compile(
r"\b(?:not|no\s+longer|don['\u2019]?t|doesn['\u2019]?t|isn['\u2019]?t"
r"|aren['\u2019]?t|wasn['\u2019]?t|weren['\u2019]?t|haven['\u2019]?t"
r"|hasn['\u2019]?t|hadn['\u2019]?t|won['\u2019]?t|wouldn['\u2019]?t"
r"|can['\u2019]?t|less|barely|hardly|never)\s+"
r"(?:\w+\s+){0,3}"
r"(?:stress(?:ed|ful|ing)?|anxious|anxiety|worried|worrying|worry"
r"|depress(?:ed|ion|ing)?|overwhelm(?:ed|ing)?|panic(?:king)?"
r"|scared|afraid|fear(?:ful)?|exhausted|nervous|tense)\b",
re.IGNORECASE,
),
]
# ---------------------------------------------------------------------------
# Negative / stress indicators β€” if ANY of these are present we leave
# the model score alone (even when positive words also appear).
# ---------------------------------------------------------------------------
_NEGATIVE_PATTERNS: list[re.Pattern[str]] = [
re.compile(
r"\b("
r"stress(?:ed|ful|ing)?|anxious|anxiety|worried|worrying|worry"
r"|depress(?:ed|ion|ing)?|sad|sadness|miserable|unhappy"
r"|angry|anger|furious|frustrated|frustration|irritated"
r"|scared|afraid|fear(?:ful)?|terrified|panic(?:king)?"
r"|overwhelm(?:ed|ing)?|exhausted|burnt?\s*out"
r"|hopeless|helpless|desperate|despair"
r"|lonely|isolated|alone|abandoned"
r"|hate|hating|loathe|detest|resent"
r"|cry(?:ing)?|sob(?:bing)?|tears|weep(?:ing)?"
r"|suffer(?:ing)?|pain(?:ful)?|hurt(?:ing)?"
r"|fail(?:ed|ing|ure)?|ruin(?:ed)?"
r"|can['\u2019]?t\s+(?:take|handle|cope|stand|bear|sleep|breathe)"
r"|don['\u2019]?t\s+(?:know\s+what\s+to\s+do|want\s+to)"
r"|give\s+up|giving\s+up"
r"|breakdown|break\s+down|falling\s+apart"
r"|insomnia|nightmare|nightmares"
r"|debt|bankrupt|fired|layoff"
r"|suicide|suicidal|self[- ]?harm"
r"|deadline[s]?|overdue"
r"|sleep\s+depriv(?:ed|ation)|sleepless(?:ness)?"
r"|haven['\u2019]?t\s+slept|not\s+slept|no\s+sleep|no\s+rest"
r"|piling\s+up|pile\s+up|buried\s+(?:in|under)"
r"|falling\s+behind|can['\u2019]?t\s+keep\s+up"
r"|wearing\s+(?:me\s+)?down|breaking\s+down|worn\s+out"
r"|drowning\s+in|stretched\s+thin|at\s+my\s+(?:limit|breaking\s+point)"
r"|running\s+on\s+(?:empty|no\s+sleep|fumes)"
r")\b",
re.IGNORECASE,
),
]
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def compute_sentiment_dampening(text: str) -> float:
"""Return a dampening factor in ``[0.0, 1.0]`` for the stress score.
* ``1.0`` β†’ no change (leave the model score as-is).
* Values ``< 1.0`` β†’ reduce (dampen) the stress score.
The factor is only lowered when:
1. Negated stress phrases are found (e.g. "not stressed"), OR
2. At least one positive indicator is found **and** zero genuine
negative/stress indicators remain.
Parameters
----------
text : str
The raw user input.
Returns
-------
float
Multiplicative dampening factor for the stress probability.
"""
if not text or not text.strip():
return 1.0
text_lower = text.lower()
# ── Negation detection ──
# Replace negated stress phrases with a neutral placeholder so they
# are not counted as genuine stress indicators below.
processed = text_lower
negation_hits = 0
for pat in _NEGATED_STRESS_PATTERNS:
matches = pat.findall(processed)
negation_hits += len(matches)
processed = pat.sub("__negated__", processed)
# Count genuine (non-negated) negative hits on the processed text.
negative_hits = 0
for pat in _NEGATIVE_PATTERNS:
negative_hits += len(pat.findall(processed))
if negative_hits > 0:
# Genuine stress indicators remain β†’ leave model score unchanged.
return 1.0
# Count positive hits (single words + phrases) on the original text.
positive_hits = 0
for pat in _POSITIVE_PATTERNS:
positive_hits += len(pat.findall(text_lower))
for pat in _POSITIVE_PHRASES:
positive_hits += len(pat.findall(text_lower))
# ── Negation-based dampening ──
# When stress keywords are explicitly negated and no genuine stress
# indicators remain, apply a moderate dampening factor.
if negation_hits > 0:
if positive_hits >= 1:
# Negated stress + positive words β†’ strong dampening.
return 0.06
# Negated stress alone β†’ moderate dampening.
return 0.35 if negation_hits == 1 else 0.22
if positive_hits == 0:
return 1.0
# ── Standard positive dampening ──
# 1 positive hit β†’ factor 0.08 (score capped at ~8 % of raw)
# 2 positive hits β†’ factor 0.05
# 3+ positive hits β†’ factor 0.03
if positive_hits >= 3:
return 0.03
if positive_hits >= 2:
return 0.05
return 0.08
def get_sentiment_score(text: str) -> float:
"""Return a sentiment score in ``[0.0, 1.0]`` for a piece of text.
* ``0.0`` β†’ strongly positive (no stress signals).
* ``1.0`` β†’ strongly negative / stressful.
* ``0.5`` β†’ neutral (no clear signal either way).
The score is derived from the same keyword lists used by
:func:`compute_sentiment_dampening`.
"""
if not text or not text.strip():
return 0.5
text_lower = text.lower()
negative_hits = 0
for pat in _NEGATIVE_PATTERNS:
negative_hits += len(pat.findall(text_lower))
positive_hits = 0
for pat in _POSITIVE_PATTERNS:
positive_hits += len(pat.findall(text_lower))
for pat in _POSITIVE_PHRASES:
positive_hits += len(pat.findall(text_lower))
total = positive_hits + negative_hits
if total == 0:
return 0.5
# Map to [0, 1]: more negative β†’ closer to 1.0
return negative_hits / total