Spaces:
Sleeping
Sleeping
| """ | |
| utils/sentiment.py | |
| ================== | |
| Lightweight keyword-based positive-sentiment detector. | |
| Used as a post-processing correction layer during inference to prevent | |
| clearly positive / happy text from being mislabelled as stressed. | |
| The detector counts positive and negative sentiment indicators in the | |
| input text and returns a dampening factor in [0, 1]. When multiplied | |
| by the raw model stress probability, the result ensures that sentences | |
| like *"I am happy"* or *"I love her and she loves me back"* receive | |
| stress scores well below 10 %. | |
| It also detects *negated* stress phrases such as *"not stressed"* or | |
| *"don't feel anxious"* and applies a moderate dampening factor so that | |
| explicit denials of stress are not treated as stress indicators. | |
| Design | |
| ------ | |
| - Only *dampens* β never *inflates* β the model score. | |
| - Requires a strong positive signal **and** the absence of negative / | |
| stress indicators before applying any correction. | |
| - Keeps a generous margin so that ambiguous or mixed-sentiment text is | |
| left to the model. | |
| - Negated-stress detection replaces matched phrases with a placeholder | |
| before the negative-hit scan so that "not stressed" does not block | |
| dampening the way genuine stress words would. | |
| """ | |
| from __future__ import annotations | |
| import re | |
| # --------------------------------------------------------------------------- | |
| # Positive-sentiment indicators (case-insensitive, word-boundary) | |
| # --------------------------------------------------------------------------- | |
| _POSITIVE_PATTERNS: list[re.Pattern[str]] = [ | |
| re.compile( | |
| r"\b(" | |
| r"happy|happiness|joy|joyful|joyous|elated|bliss|blissful" | |
| r"|cheerful|delighted|delightful|ecstatic|euphoric" | |
| r"|glad|pleased|wonderful|amazing|awesome|fantastic|fabulous" | |
| r"|terrific|magnificent|marvelous|superb|splendid" | |
| r"|great|excellent|incredible|brilliant|outstanding|perfect" | |
| r"|phenomenal|glorious|enjoyable|pleasant|overjoyed|jubilant" | |
| r"|love|loved|loving|adore|adored|cherish|cherished" | |
| r"|grateful|thankful|blessed|fortunate|lucky" | |
| r"|optimistic|hopeful|enthusiastic|excited|thrilled" | |
| r"|content|contented|satisfied|fulfilled|peaceful" | |
| r"|proud|confident|empowered|inspired|motivated" | |
| r"|relaxed|calm|serene|tranquil|comfortable" | |
| r"|smile|smiling|smiled|laugh|laughing|laughed|grin|grinning|grinned" | |
| r"|celebrate|celebrating|celebrated|celebration" | |
| r"|beautiful|gorgeous" | |
| r")\b", | |
| re.IGNORECASE, | |
| ), | |
| ] | |
| _POSITIVE_PHRASES: list[re.Pattern[str]] = [ | |
| re.compile( | |
| r"\b(" | |
| r"feel(?:s|ing)?\s+(?:great|good|amazing|wonderful|fantastic|awesome|fine|nice|blessed|happy|excellent|incredible|brilliant)" | |
| r"|love\s+(?:my|this|her|him|them|it|life)" | |
| r"|loves?\s+me" | |
| r"|(?:good|great|wonderful|amazing|fantastic|awesome|excellent)\s+(?:day|time|mood|news|life|morning|evening|night)" | |
| r"|having\s+(?:a\s+)?(?:great|good|wonderful|amazing|fantastic|blast|ball)" | |
| r"|(?:in|into)\s+(?:a\s+)?(?:great|good|wonderful|amazing|fantastic|awesome|excellent)\s+mood" | |
| r"|so\s+(?:happy|glad|grateful|thankful|excited|thrilled|proud|pleased)" | |
| r"|life\s+is\s+(?:good|great|beautiful|wonderful|amazing)" | |
| r"|couldn['\u2019]?t\s+be\s+(?:happier|better)" | |
| r"|on\s+top\s+of\s+the\s+world" | |
| r"|over\s+the\s+moon" | |
| r"|best\s+(?:day|time|thing)" | |
| r")\b", | |
| re.IGNORECASE, | |
| ), | |
| ] | |
| # --------------------------------------------------------------------------- | |
| # Negated-stress patterns β stress keywords explicitly preceded by a negator. | |
| # These are detected BEFORE the negative-hit scan and replaced with a | |
| # placeholder so that "not stressed" is treated as a positive/neutral | |
| # signal rather than a stress indicator. | |
| # --------------------------------------------------------------------------- | |
| _NEGATED_STRESS_PATTERNS: list[re.Pattern[str]] = [ | |
| re.compile( | |
| r"\b(?:not|no\s+longer|don['\u2019]?t|doesn['\u2019]?t|isn['\u2019]?t" | |
| r"|aren['\u2019]?t|wasn['\u2019]?t|weren['\u2019]?t|haven['\u2019]?t" | |
| r"|hasn['\u2019]?t|hadn['\u2019]?t|won['\u2019]?t|wouldn['\u2019]?t" | |
| r"|can['\u2019]?t|less|barely|hardly|never)\s+" | |
| r"(?:\w+\s+){0,3}" | |
| r"(?:stress(?:ed|ful|ing)?|anxious|anxiety|worried|worrying|worry" | |
| r"|depress(?:ed|ion|ing)?|overwhelm(?:ed|ing)?|panic(?:king)?" | |
| r"|scared|afraid|fear(?:ful)?|exhausted|nervous|tense)\b", | |
| re.IGNORECASE, | |
| ), | |
| ] | |
| # --------------------------------------------------------------------------- | |
| # Negative / stress indicators β if ANY of these are present we leave | |
| # the model score alone (even when positive words also appear). | |
| # --------------------------------------------------------------------------- | |
| _NEGATIVE_PATTERNS: list[re.Pattern[str]] = [ | |
| re.compile( | |
| r"\b(" | |
| r"stress(?:ed|ful|ing)?|anxious|anxiety|worried|worrying|worry" | |
| r"|depress(?:ed|ion|ing)?|sad|sadness|miserable|unhappy" | |
| r"|angry|anger|furious|frustrated|frustration|irritated" | |
| r"|scared|afraid|fear(?:ful)?|terrified|panic(?:king)?" | |
| r"|overwhelm(?:ed|ing)?|exhausted|burnt?\s*out" | |
| r"|hopeless|helpless|desperate|despair" | |
| r"|lonely|isolated|alone|abandoned" | |
| r"|hate|hating|loathe|detest|resent" | |
| r"|cry(?:ing)?|sob(?:bing)?|tears|weep(?:ing)?" | |
| r"|suffer(?:ing)?|pain(?:ful)?|hurt(?:ing)?" | |
| r"|fail(?:ed|ing|ure)?|ruin(?:ed)?" | |
| r"|can['\u2019]?t\s+(?:take|handle|cope|stand|bear|sleep|breathe)" | |
| r"|don['\u2019]?t\s+(?:know\s+what\s+to\s+do|want\s+to)" | |
| r"|give\s+up|giving\s+up" | |
| r"|breakdown|break\s+down|falling\s+apart" | |
| r"|insomnia|nightmare|nightmares" | |
| r"|debt|bankrupt|fired|layoff" | |
| r"|suicide|suicidal|self[- ]?harm" | |
| r"|deadline[s]?|overdue" | |
| r"|sleep\s+depriv(?:ed|ation)|sleepless(?:ness)?" | |
| r"|haven['\u2019]?t\s+slept|not\s+slept|no\s+sleep|no\s+rest" | |
| r"|piling\s+up|pile\s+up|buried\s+(?:in|under)" | |
| r"|falling\s+behind|can['\u2019]?t\s+keep\s+up" | |
| r"|wearing\s+(?:me\s+)?down|breaking\s+down|worn\s+out" | |
| r"|drowning\s+in|stretched\s+thin|at\s+my\s+(?:limit|breaking\s+point)" | |
| r"|running\s+on\s+(?:empty|no\s+sleep|fumes)" | |
| r")\b", | |
| re.IGNORECASE, | |
| ), | |
| ] | |
| # --------------------------------------------------------------------------- | |
| # Public API | |
| # --------------------------------------------------------------------------- | |
| def compute_sentiment_dampening(text: str) -> float: | |
| """Return a dampening factor in ``[0.0, 1.0]`` for the stress score. | |
| * ``1.0`` β no change (leave the model score as-is). | |
| * Values ``< 1.0`` β reduce (dampen) the stress score. | |
| The factor is only lowered when: | |
| 1. Negated stress phrases are found (e.g. "not stressed"), OR | |
| 2. At least one positive indicator is found **and** zero genuine | |
| negative/stress indicators remain. | |
| Parameters | |
| ---------- | |
| text : str | |
| The raw user input. | |
| Returns | |
| ------- | |
| float | |
| Multiplicative dampening factor for the stress probability. | |
| """ | |
| if not text or not text.strip(): | |
| return 1.0 | |
| text_lower = text.lower() | |
| # ββ Negation detection ββ | |
| # Replace negated stress phrases with a neutral placeholder so they | |
| # are not counted as genuine stress indicators below. | |
| processed = text_lower | |
| negation_hits = 0 | |
| for pat in _NEGATED_STRESS_PATTERNS: | |
| matches = pat.findall(processed) | |
| negation_hits += len(matches) | |
| processed = pat.sub("__negated__", processed) | |
| # Count genuine (non-negated) negative hits on the processed text. | |
| negative_hits = 0 | |
| for pat in _NEGATIVE_PATTERNS: | |
| negative_hits += len(pat.findall(processed)) | |
| if negative_hits > 0: | |
| # Genuine stress indicators remain β leave model score unchanged. | |
| return 1.0 | |
| # Count positive hits (single words + phrases) on the original text. | |
| positive_hits = 0 | |
| for pat in _POSITIVE_PATTERNS: | |
| positive_hits += len(pat.findall(text_lower)) | |
| for pat in _POSITIVE_PHRASES: | |
| positive_hits += len(pat.findall(text_lower)) | |
| # ββ Negation-based dampening ββ | |
| # When stress keywords are explicitly negated and no genuine stress | |
| # indicators remain, apply a moderate dampening factor. | |
| if negation_hits > 0: | |
| if positive_hits >= 1: | |
| # Negated stress + positive words β strong dampening. | |
| return 0.06 | |
| # Negated stress alone β moderate dampening. | |
| return 0.35 if negation_hits == 1 else 0.22 | |
| if positive_hits == 0: | |
| return 1.0 | |
| # ββ Standard positive dampening ββ | |
| # 1 positive hit β factor 0.08 (score capped at ~8 % of raw) | |
| # 2 positive hits β factor 0.05 | |
| # 3+ positive hits β factor 0.03 | |
| if positive_hits >= 3: | |
| return 0.03 | |
| if positive_hits >= 2: | |
| return 0.05 | |
| return 0.08 | |
| def get_sentiment_score(text: str) -> float: | |
| """Return a sentiment score in ``[0.0, 1.0]`` for a piece of text. | |
| * ``0.0`` β strongly positive (no stress signals). | |
| * ``1.0`` β strongly negative / stressful. | |
| * ``0.5`` β neutral (no clear signal either way). | |
| The score is derived from the same keyword lists used by | |
| :func:`compute_sentiment_dampening`. | |
| """ | |
| if not text or not text.strip(): | |
| return 0.5 | |
| text_lower = text.lower() | |
| negative_hits = 0 | |
| for pat in _NEGATIVE_PATTERNS: | |
| negative_hits += len(pat.findall(text_lower)) | |
| positive_hits = 0 | |
| for pat in _POSITIVE_PATTERNS: | |
| positive_hits += len(pat.findall(text_lower)) | |
| for pat in _POSITIVE_PHRASES: | |
| positive_hits += len(pat.findall(text_lower)) | |
| total = positive_hits + negative_hits | |
| if total == 0: | |
| return 0.5 | |
| # Map to [0, 1]: more negative β closer to 1.0 | |
| return negative_hits / total | |