File size: 2,582 Bytes
4ae4ae8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
"""Cognitive distortion parser — detects distortion labels in model output."""

from __future__ import annotations

# Trigger phrases that indicate the model is naming a distortion
DISTORTION_TRIGGERS: dict[str, list[str]] = {
    "catastrophizing": [
        "catastrophiz", "worst case", "jumping to the worst",
        "imagining the worst", "end of the world",
    ],
    "overgeneralization": [
        "overgeneraliz", "always", "never", "every time",
        "\"always\"", "\"never\"", "the word 'always'",
        "the word 'never'",
    ],
    "all-or-nothing thinking": [
        "all-or-nothing", "black and white", "black-and-white",
        "either/or", "all or nothing", "binary thinking",
        "no middle ground",
    ],
    "mind-reading": [
        "mind-read", "mind read", "assuming what they think",
        "guessing their", "assuming they",
        "you're reading their mind",
    ],
    "fortune-telling": [
        "fortune-tell", "fortune tell", "predicting",
        "jumping ahead", "you're predicting",
        "crystal ball",
    ],
    "should-statements": [
        "should statement", "shoulding yourself", "'should'",
        "\"should\"", "must/should", "the word 'should'",
    ],
    "emotional reasoning": [
        "emotional reasoning", "feeling it doesn't make it",
        "feeling something doesn't make it true",
        "just because you feel",
    ],
    "labeling": [
        "labeling yourself", "putting a label",
        "you're not a", "that's a label",
        "calling yourself",
    ],
    "personalization": [
        "personaliz", "taking responsibility for",
        "blaming yourself for", "not everything is about",
        "not your fault",
    ],
    "mental filter": [
        "mental filter", "filtering out", "only seeing the negative",
        "ignoring the positive", "focusing only on",
    ],
    "disqualifying the positive": [
        "disqualifying", "dismissing the positive",
        "doesn't count", "that was just luck",
    ],
}


def detect_distortions(text: str) -> list[str]:
    """Detect cognitive distortions mentioned in model output.

    Args:
        text: The model's response text.

    Returns:
        List of distortion names detected (may be empty).
    """
    text_lower = text.lower()
    detected = []

    for distortion, triggers in DISTORTION_TRIGGERS.items():
        for trigger in triggers:
            if trigger.lower() in text_lower:
                if distortion not in detected:
                    detected.append(distortion)
                break

    return detected