File size: 10,364 Bytes
d6c18ca
 
62ff3c4
d6c18ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e169c4c
d6c18ca
 
 
 
 
e169c4c
 
62ff3c4
 
 
d6c18ca
 
 
 
4228096
d6c18ca
62ff3c4
 
 
 
d6c18ca
62ff3c4
d6c18ca
62ff3c4
d6c18ca
 
 
62ff3c4
d6c18ca
 
 
 
 
 
 
 
 
 
 
 
62ff3c4
 
 
d6c18ca
 
 
9f96a54
d6c18ca
 
9f96a54
 
 
 
d6c18ca
 
 
 
 
 
 
 
 
62ff3c4
d6c18ca
 
62ff3c4
 
 
 
d6c18ca
62ff3c4
d6c18ca
 
62ff3c4
 
d6c18ca
62ff3c4
 
d6c18ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f96a54
d6c18ca
 
 
 
9f96a54
d6c18ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62ff3c4
 
d6c18ca
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
from huggingface_hub import InferenceClient
from src.config import BASE_MODEL, MY_MODEL, HF_TOKEN
import os
from src.utils.profile import load_schema, create_empty_profile, extract_profile_updates, merge_profile, profile_to_summary
from src.utils.resources import load_resources, filter_resources, score_resources, format_resources_for_context

RECOMMENDATION_KEYWORDS = [
    "show me", "show options", "show recommendations", "give me options",
    "what options", "what facilities", "find me", "recommend", "recommendations",
    "options near", "what's available", "what is available", "what are my options",
    "just show", "see options", "see recommendations", "list options",
    "i want to see", "can you show", "what do you have",
]

REQUIRED_PROFILE_FIELDS = [
    ("clinical", "primary_focus"),      # type of help
    ("preferences", "setting"),         # treatment setting
    ("logistics", "insurance"),         # payment / insurance
]


def _has_enough_for_recommendation(profile: dict) -> bool:
    """Return True when the four key fields are collected."""
    for category, field in REQUIRED_PROFILE_FIELDS:
        value = profile.get(category, {}).get(field)
        if not value:
            return False
    # location: either zipcode or region
    zipcode = profile.get("logistics", {}).get("zipcode")
    region = profile.get("logistics", {}).get("region")
    if not zipcode and not region:
        return False
    return True


def _user_wants_recommendations(user_input: str) -> bool:
    """Return True if the user is explicitly asking for recommendations."""
    lower = user_input.lower()
    return any(kw in lower for kw in RECOMMENDATION_KEYWORDS)


CRISIS_KEYWORDS = [
    "suicid", "kill myself", "want to die", "end my life", "take my life",
    "don't want to live", "dont want to live", "no reason to live",
    "better off dead", "self-harm", "self harm", "hurt myself", "cutting myself",
    "overdose", "overdosing",
]

CRISIS_RESPONSE = (
    "I'm really sorry you're going through something this difficult. "
    "You don't have to handle it alone.\n\n"
    "Please reach out for immediate support:\n\n"
    "**Behavioral Health Help Line (BHHL)**\n"
    "πŸ“ž Call or text: **833-773-2445**\n"
    "Available 24 hours a day, 7 days a week, 365 days a year.\n"
    "Anyone may contact the Help Line if they or a family member are experiencing "
    "a mental health or substance use disorder crisis.\n\n"
    "If you or someone near you may be in immediate danger, please call **911** right now."
)


def _is_crisis(text: str) -> bool:
    lower = text.lower()
    return any(kw in lower for kw in CRISIS_KEYWORDS)


class Chatbot:

    def __init__(self):
        """
        Initialize the chatbot with a HF model ID
        """
        model_id = MY_MODEL if MY_MODEL else BASE_MODEL # define MY_MODEL in config.py if you create a new model in the HuggingFace Hub
        self.client = InferenceClient(model=model_id, token=HF_TOKEN)
        # Initialize user profile
        current_dir = os.path.dirname(os.path.abspath(__file__))
        data_dir = os.path.join(current_dir, '..', 'data')
        self.profile_schema = load_schema(os.path.join(data_dir, 'user_profile_schema.json'))
        self.user_profile = create_empty_profile()
        # Load treatment resources once
        knowledge_dir = os.path.join(data_dir, '..', 'references', 'knowledge')
        resources_paths = [
            os.path.join(knowledge_dir, 'ma_resources.csv'),
            os.path.join(knowledge_dir, 'resources', 'boston_resources.csv'),
        ]
        self.resources = load_resources(resources_paths)

    def reset(self):
        """Reset conversation state for a new session without re-initializing the client or resources."""
        self.user_profile = create_empty_profile()

    def update_profile(self, user_input):
        """
        Scan user input for profile-relevant information and merge it
        into the running user profile.

        Args:
            user_input (str): The user's message text.
        """
        updates = extract_profile_updates(self.profile_schema, user_input)
        merge_profile(self.user_profile, updates)

    def format_prompt(self, user_input, turn_number=0):
        """
        Format the user's input into a list of chat messages with system context.
        Updates the user profile with any new information detected from the message.

        This method:
        1. Loads system prompt from system_prompt.md
        2. Updates user profile from schema-based extraction
        3. Injects profile summary into the system prompt so the model knows what's been gathered
        4. Returns a list of message dicts for the chat completion API

        Args:
            user_input (str): The user's question
            turn_number (int): Zero-indexed turn count (0 = first user message)

        Returns:
            list[dict]: A list of message dicts with 'role' and 'content' keys
        """
        # Get the directory where this file is located
        current_dir = os.path.dirname(os.path.abspath(__file__))

        # Load system prompt
        system_prompt_path = os.path.join(current_dir, '../data/system_prompt.md')
        with open(system_prompt_path, 'r', encoding='utf-8') as f:
            system_prompt = f.read().strip()

        # Update user profile from this message
        self.update_profile(user_input)

        # Build profile summary for the prompt
        profile_summary = profile_to_summary(self.user_profile)

        # Build system message with profile context
        system_content = system_prompt
        if profile_summary:
            system_content = system_content + "\n\n" + profile_summary

        # On the 4th user message (turn_number >= 3), nudge the model to ask a matching question
        if turn_number >= 3:
            missing = []
            clinical = self.user_profile.get("clinical", {})
            preferences = self.user_profile.get("preferences", {})
            logistics = self.user_profile.get("logistics", {})
            if not clinical.get("primary_focus"):
                missing.append("type of help needed (substance use, mental health, or both)")
            if not preferences.get("setting"):
                missing.append("preferred treatment setting")
            if not logistics.get("insurance"):
                missing.append("payment method or insurance")
            if not logistics.get("zipcode") and not logistics.get("region"):
                missing.append("location (zip code or city)")

            if missing:
                nudge = (
                    "\n\n[PACING INSTRUCTION β€” DO NOT REPEAT TO USER] "
                    "You have now had several exchanges. You MUST include a question relevant to "
                    "treatment matching in this response. Ask about one of the following missing fields: "
                    + "; ".join(missing)
                    + ". Ask it warmly and conversationally β€” one question only."
                )
                system_content = system_content + nudge

        # Return structured messages for chat completion API
        messages = [{"role": "system", "content": system_content}]

        return messages

    def get_response(self, user_input, history=None):
        """
        Generate a response to the user's question, with resource recommendations
        appended when the user profile contains enough information to match.

        Args:
            user_input (str): The user's question

        Returns:
            str: The chatbot's response, optionally followed by top 3 resources
        """
        # 0. Hard crisis check β€” bypass LLM entirely if crisis keywords detected
        if _is_crisis(user_input):
            print("[Harbor] Crisis keywords detected β€” returning crisis response.")
            return CRISIS_RESPONSE

        # 1. Format messages (also updates profile)
        turn_number = len(history) if history else 0
        messages = self.format_prompt(user_input, turn_number=turn_number)

        # 1b. After the user's first message, return a fixed follow-up instead of calling the LLM.
        # Profile has already been updated above so the first message is not lost.
        if history and len(history) == 1:
            return (
                "Thank you for sharing that. Before I give you any recommendations, "
                "can you tell me a little bit about you or the loved one you are concerned about?"
            )

        # 2. Inject verified facility data into the system prompt so the LLM presents
        # recommendations naturally in its own voice β€” only when the profile has enough
        # info or the user is explicitly asking for options.
        if _has_enough_for_recommendation(self.user_profile) or _user_wants_recommendations(user_input):
            filtered = filter_resources(self.resources, self.user_profile)
            top = score_resources(filtered, self.user_profile)
            if top:
                facility_context = format_resources_for_context(top)
                messages[0]["content"] = messages[0]["content"] + "\n\n" + facility_context
            elif _has_enough_for_recommendation(self.user_profile):
                messages[0]["content"] = messages[0]["content"] + (
                    "\n\n[NOTE β€” No verified facilities matched the user's profile. "
                    "Let the user know you couldn't find a direct match and ask if "
                    "they'd be open to broader options.]"
                )

        # 3. Append conversation history then current user message
        for item in (history or []):
            if isinstance(item, dict):
                messages.append({"role": item["role"], "content": item["content"]})
            else:
                human, assistant = item
                messages.append({"role": "user", "content": human})
                if assistant is not None:
                    messages.append({"role": "assistant", "content": assistant})
        messages.append({"role": "user", "content": user_input})

        # 4. Generate LLM response via chat completion API
        result = self.client.chat_completion(
            messages=messages,
            max_tokens=512,
            temperature=0.7,
        )
        return result.choices[0].message.content.strip()