Spaces:

dedp
/

harbor

Sleeping

File size: 7,698 Bytes

import json
import re


def load_schema(schema_path):
    """Load the user profile schema from a JSON file."""
    with open(schema_path, 'r', encoding='utf-8') as f:
        return json.load(f)


def create_empty_profile():
    """
    Create an empty user profile with all fields set to null/empty.
    This represents a user we know nothing about yet.
    """
    return {
        "demographics": {
            "population": None,
            "identity_factors": [],
            "language": None,
            "pronouns": None
        },
        "logistics": {
            "zipcode": None,
            "region": None,
            "profession": None,
            "accessibility_needs": [],
            "insurance": None,
            "treatment_history": None
        },
        "status": {
            "current_state": None,
            "crisis_level": None,
            "temporary_factors": []
        },
        "clinical": {
            "primary_focus": None,
            "substances": []
        },
        "preferences": {
            "setting": None,
            "therapy_approach": None,
            "scheduling": [],
            "barriers": [],
            "contact_channel": None
        }
    }


def extract_profile_updates(schema, user_input):
    """
    Scan user input against the schema and return a dict of detected profile updates.

    For 'single' type fields, returns the first matched option value.
    For 'multi' type fields, returns a list of all matched option values.
    For 'extracted' type fields (zipcode, region, treatment_history), uses
    pattern matching or returns raw text snippets.

    Args:
        schema: The loaded profile schema dict.
        user_input: The user's message text.

    Returns:
        dict: Nested dict mirroring the profile structure, containing only
              fields where matches were found.
    """
    input_lower = user_input.lower()
    updates = {}

    for category_name, category in schema.items():
        category_updates = {}

        for field_name, field_def in category.items():
            field_type = field_def.get("type")

            if field_type == "extracted":
                # Special handling for pattern-based or free-text fields
                value = _extract_field(field_name, field_def, user_input, input_lower)
                if value is not None:
                    category_updates[field_name] = value

            elif field_type in ("single", "multi"):
                matches = []
                for option in field_def.get("options", []):
                    for keyword in option.get("keywords", []):
                        if keyword and keyword.lower() in input_lower:
                            matches.append(option["value"])
                            break  # one keyword match per option is enough

                if matches:
                    if field_type == "single":
                        category_updates[field_name] = matches[0]
                    else:
                        category_updates[field_name] = matches

        if category_updates:
            updates[category_name] = category_updates

    return updates


def _extract_field(field_name, field_def, user_input, input_lower):
    """Handle extraction for non-option fields like zipcode and treatment_history."""
    if field_name == "zipcode":
        pattern = field_def.get("pattern", r"\b\d{5}\b")
        match = re.search(pattern, user_input)
        if match:
            return match.group()
        return None

    if field_name == "region":
        # Region is typically set explicitly or by the LLM, not keyword-matched.
        # We do a lightweight check for common geographic indicators.
        geo_patterns = [
            r"\bin\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)",  # "in Boston", "in Pocahontas County"
            r"\bnear\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)",  # "near Springfield"
            r"\bfrom\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)",  # "from Cambridge"
        ]
        for pattern in geo_patterns:
            match = re.search(pattern, user_input)
            if match:
                return match.group(1)
        return None

    if field_name == "treatment_history":
        history_keywords = ["rehab", "treatment before", "been to", "tried",
                            "previous treatment", "went to", "was in",
                            "12-step", "residential before", "relapsed"]
        for keyword in history_keywords:
            if keyword in input_lower:
                return user_input  # store the raw message as context
        return None

    return None


def merge_profile(profile, updates):
    """
    Merge new updates into the existing profile.

    - For 'single' fields (non-list values): new values overwrite old ones.
    - For 'multi' fields (list values): new values are appended (no duplicates).
    - None values in updates are ignored (don't clear existing data).

    Args:
        profile: The current user profile dict (modified in place).
        updates: The updates dict from extract_profile_updates().

    Returns:
        dict: The updated profile (same object as input).
    """
    for category_name, category_updates in updates.items():
        if category_name not in profile:
            continue

        for field_name, new_value in category_updates.items():
            if field_name not in profile[category_name]:
                continue

            if new_value is None:
                continue

            existing = profile[category_name][field_name]

            if isinstance(existing, list) and isinstance(new_value, list):
                # Append new values, skip duplicates
                for v in new_value:
                    if v not in existing:
                        existing.append(v)
            elif isinstance(existing, list) and not isinstance(new_value, list):
                # Single value going into a list field
                if new_value not in existing:
                    existing.append(new_value)
            else:
                # Single value field: overwrite
                profile[category_name][field_name] = new_value

    return profile


def profile_to_summary(profile):
    """
    Convert a user profile dict into a concise text summary for injection
    into the system prompt. Only includes fields that have been filled in.

    Returns:
        str: A human-readable summary, or empty string if profile is empty.
    """
    lines = []

    category_labels = {
        "demographics": "Demographics",
        "logistics": "Logistics & History",
        "status": "Current Status",
        "clinical": "Clinical Needs",
        "preferences": "Preferences & Barriers"
    }

    for category_name, category_label in category_labels.items():
        category = profile.get(category_name, {})
        category_lines = []

        for field_name, value in category.items():
            if value is None:
                continue
            if isinstance(value, list) and len(value) == 0:
                continue

            # Format the field name nicely
            display_name = field_name.replace("_", " ").title()

            if isinstance(value, list):
                category_lines.append(f"  - {display_name}: {', '.join(str(v) for v in value)}")
            else:
                category_lines.append(f"  - {display_name}: {value}")

        if category_lines:
            lines.append(f"[{category_label}]")
            lines.extend(category_lines)

    if not lines:
        return ""

    header = (
        "USER PROFILE (already collected — DO NOT ask the user again for any of these details):\n"
    )
    return header + "\n".join(lines)