File size: 3,025 Bytes
4ae4ae8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
"""Configuration for REFRAME app."""

import os

# --- Backend Selection ---
# Auto: Ollama locally, llama-cpp-python on HF Spaces (no Ollama daemon there).
# Override with REFRAME_BACKEND=ollama|llamacpp.
_ON_SPACE = bool(os.environ.get("SPACE_ID"))
BACKEND = os.environ.get("REFRAME_BACKEND", "llamacpp" if _ON_SPACE else "ollama")

# Optional fully-offline local run (no network) — uses only the HF cache.
# Set REFRAME_OFFLINE=1 locally. Never applied on a Space (it must download once).
if os.environ.get("REFRAME_OFFLINE") and not _ON_SPACE:
    os.environ.setdefault("HF_HUB_OFFLINE", "1")
    os.environ.setdefault("TRANSFORMERS_OFFLINE", "1")

# --- Ollama Config (local) ---
OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "gemma4_mentalhealthbuddy_v1")
OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434")
# Keep the chat model resident between turns (-1 = never unload). Avoids slow,
# failure-prone cold reloads when RAM is tight (STT + LLM share system memory).
OLLAMA_KEEP_ALIVE = os.environ.get("OLLAMA_KEEP_ALIVE", -1)

# --- llama-cpp-python Config (HF Spaces) ---
GGUF_REPO_ID = os.environ.get("GGUF_REPO_ID", "")  # e.g. "username/mentalhealthbuddy-gguf"
GGUF_FILENAME = os.environ.get("GGUF_FILENAME", "mentalhealthbuddy_q4_k_m.gguf")
GGUF_LOCAL_PATH = os.environ.get("GGUF_LOCAL_PATH", "")  # if already downloaded

# --- Generation Parameters ---
MAX_TOKENS = 256
TEMPERATURE = 0.4
TOP_P = 0.9
REPEAT_PENALTY = 1.1

# --- App Settings ---
APP_TITLE = "REFRAME"
APP_SUBTITLE = "Reframe your thinking."
RESPONSE_DELAY = 1.5  # seconds before model starts responding (thoughtfulness)
STREAM_SPEED = 0.02  # seconds between tokens when streaming

# --- Session Limits ---
MAX_CARDS = 50
MAX_SESSION_SUMMARIES = 20

# --- Cognitive Distortions ---
DISTORTIONS = [
    "catastrophizing",
    "overgeneralization",
    "all-or-nothing thinking",
    "mind-reading",
    "fortune-telling",
    "should-statements",
    "emotional reasoning",
    "labeling",
    "personalization",
    "mental filter",
    "disqualifying the positive",
]

# --- Crisis Keywords (C-SSRS framework) ---
CRISIS_KEYWORDS = [
    "kill myself",
    "end my life",
    "suicide",
    "suicidal",
    "want to die",
    "better off dead",
    "no reason to live",
    "can't go on",
    "self-harm",
    "hurt myself",
    "cutting myself",
    "overdose",
]

# --- Helplines ---
HELPLINES = [
    {"name": "988 Suicide & Crisis Lifeline (US)", "contact": "Call or text 988"},
    {"name": "Crisis Text Line (US)", "contact": "Text HOME to 741741"},
    {"name": "Samaritans (UK/IE)", "contact": "Call 116 123 (free, 24/7)"},
    {"name": "Befrienders Worldwide", "contact": "befrienders.org"},
]

# --- Speech-to-Text (optional, requires requirements-stt.txt) ---
STT_ENABLED = True  # Set False to hide mic entirely
# STT_MODEL = os.environ.get("STT_MODEL", "openai/whisper-small")
STT_MODEL = os.environ.get("STT_MODEL", "CohereLabs/cohere-transcribe-03-2026")  # gated — needs HF login + access request