pikam00 commited on
Commit
60817f8
·
verified ·
1 Parent(s): 74f089b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -129
app.py CHANGED
@@ -5,30 +5,22 @@ import torch
5
  import random
6
  import re
7
 
8
- # ========================
9
- # Models
10
- # ========================
11
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
12
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
13
 
14
- # ========================
15
- # Load corpus (journal.txt in same folder)
16
- # ========================
17
  with open("journal.txt", "r", encoding="utf-8") as f:
18
  raw_text = f.read()
19
 
20
- # ========================
21
- # --- KEEPING THE OG STRIP/SANITIZER STUFF ---
22
- # Remove role tags and chat-log lines from the corpus so they never leak
23
- # ========================
24
  ROLE_TAGS = re.compile(
25
  r'\[/?(?:USER|ASST)\]|\</?(?:user|assistant)\>|<\|(?:user|assistant)\|>',
26
  re.IGNORECASE,
27
  )
28
 
29
  def clean_corpus(text: str) -> str:
30
- text = ROLE_TAGS.sub('', text or '')
31
- out_lines = []
32
  for line in text.splitlines():
33
  low = line.strip().lower()
34
  if low.startswith("user wrote:"): continue
@@ -37,26 +29,26 @@ def clean_corpus(text: str) -> str:
37
  if low.startswith("/assistant wrote:"): continue
38
  if low.startswith("user:"): continue
39
  if low.startswith("assistant:"): continue
40
- out_lines.append(line)
41
- return "\n".join(out_lines)
42
 
43
  journal_text = clean_corpus(raw_text)
44
 
45
- # ========================
46
- # Chunk + embed (simple)
47
- # ========================
48
- def preprocess_text(text):
49
- cleaned = text.strip()
50
- sents = [s.strip() for s in cleaned.split('.') if s.strip()]
51
  sentence_chunks = [s for s in sents if len(s) > 10]
52
 
53
  combined = []
54
  for i in range(0, len(sents), 3):
55
- chunk = '. '.join(sents[i:i+3]).strip()
56
  if len(chunk) > 20:
57
  combined.append(chunk)
58
 
59
- paras = [p.strip() for p in cleaned.split('\n\n') if p.strip() and len(p) > 30]
60
 
61
  seen, chunks = set(), []
62
  for c in sentence_chunks + combined + paras:
@@ -67,22 +59,26 @@ def preprocess_text(text):
67
  return chunks
68
 
69
  chunks = preprocess_text(journal_text)
70
- embeddings = embedder.encode(chunks, convert_to_tensor=True)
 
71
 
72
- def get_top_chunks(query, top_k=5):
73
- if not query:
74
  return []
75
  q = embedder.encode(query, convert_to_tensor=True)
76
  q = q / q.norm()
77
  M = embeddings / embeddings.norm(dim=1, keepdim=True)
 
 
 
 
78
  sims = torch.matmul(M, q)
79
- k = min(top_k, len(chunks))
80
  scores, idxs = torch.topk(sims, k=k)
81
- out = []
82
  for i, idx in enumerate(idxs):
83
  if scores[i].item() > 0.25:
84
- out.append(chunks[int(idx)])
85
- return out
86
 
87
  def join_context(chunks_list, max_chars=900):
88
  out = ""
@@ -93,102 +89,76 @@ def join_context(chunks_list, max_chars=900):
93
  out += (("\n\n" if out else "") + c)
94
  return out
95
 
96
- # ========================
97
- # Tiny safety (quiet unless triggered)
98
- # ========================
99
- CRISIS_TERMS = [
100
- "suicide","kill myself","end my life","self-harm",
101
- "hurt myself","overdose","harm others","kill someone"
102
- ]
103
  def is_crisis(msg: str) -> bool:
104
  m = (msg or "").lower()
105
  return any(t in m for t in CRISIS_TERMS)
106
 
107
- # ========================
108
- # Emotion gate (only help if feelings are mentioned)
109
- # ========================
110
  EMOTION_HINTS = [
111
- "i feel", "i'm feeling", "i am feeling", "feelings",
112
  "overwhelmed", "stressed", "anxious", "sad", "lonely",
113
  "angry", "upset", "worried", "guilty", "ashamed",
114
  "proud", "happy", "excited", "tired", "burned out", "burnt out"
115
  ]
 
116
  def mentions_emotion(msg: str) -> bool:
117
  m = (msg or "").lower()
118
  return any(k in m for k in EMOTION_HINTS)
119
 
120
- # ========================
121
- # Personas (simple) + break ideas
122
- # ========================
123
- BREAKS = {
124
- "Sage": [
125
- "Look out the window for 1 minute and notice what moves.",
126
- "Breathe in 4, out 6, slowly.",
127
- "Think of three natural places you enjoy."
128
- ],
129
- "Buddy": [
130
- "Stand and stretch for 20 seconds.",
131
- "Send a kind message to a friend.",
132
- "Play a short upbeat song."
133
- ],
134
- "Monk": [
135
- "Close your eyes, breathe 4 in, hold 4, breathe 4 out.",
136
- "Choose one small task to finish after this.",
137
- "Turn your phone face down for a minute."
138
- ],
139
- "Librarian": [
140
- "Write one sentence starting with: 'Today I noticed...'.",
141
- "Put three things neatly in place.",
142
- "Organize a small space for 1 minute."
143
- ],
144
- "Cozy": [
145
- "Sip water slowly like a warm drink.",
146
- "Wrap yourself in a blanket for 1 minute.",
147
- "Notice three soft textures nearby."
148
- ],
149
- }
150
- TONES = {
151
- "Sage": "calm, thoughtful, nature imagery",
152
- "Buddy": "upbeat, encouraging, simple language",
153
- "Monk": "minimalist, focused, mindful",
154
- "Librarian": "gentle, organized, caring",
155
- "Cozy": "warm, comforting, home-like",
156
- }
157
- CURRENT_PERSONA = {"name": "Cozy"} # kept mutable in a dict for simplicity
158
-
159
- def set_persona(name: str) -> str:
160
- names = list(TONES.keys())
161
- lookup = {n.lower(): n for n in names}
162
- key = (name or "").strip().lower()
163
- if key in lookup:
164
- CURRENT_PERSONA["name"] = lookup[key]
165
- return f"Persona set to {CURRENT_PERSONA['name']}."
166
- return "Unknown persona. Options: Sage, Buddy, Monk, Librarian, Cozy."
167
-
168
- def pick_break() -> str:
169
- persona = CURRENT_PERSONA["name"]
170
- return random.choice(BREAKS.get(persona, BREAKS["Cozy"]))
171
-
172
- # ========================
173
- # Chat handler
174
- # ========================
175
- HELP_TEXT = (
176
- "Type `/personas` to see options, or `/persona NAME` to switch. "
177
- "Choices: Sage, Buddy, Monk, Librarian, Cozy."
178
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
 
180
  def respond(message, history):
181
  msg = (message or "").strip()
182
-
183
- # Commands (no extra UI)
184
- low = msg.lower()
185
- if low == "/personas":
186
- return HELP_TEXT
187
- if low.startswith("/persona"):
188
- parts = msg.split(maxsplit=1)
189
- if len(parts) == 1:
190
- return "Usage: `/persona NAME` — " + HELP_TEXT
191
- return set_persona(parts[1])
192
 
193
  # Safety
194
  if is_crisis(msg):
@@ -198,49 +168,60 @@ def respond(message, history):
198
  "• Elsewhere: contact local emergency services."
199
  )
200
 
201
- # If user hasn't talked about emotions yet → friend-like greeting only
202
  if not mentions_emotion(msg):
203
  return ("Hey, I’m Otium. I’m here to listen whenever you want to talk about your day "
204
- "or how you’re feeling <3")
205
 
206
- # Emotions present → retrieve, reflect, short follow-up, one tiny break
207
- top = get_top_chunks(msg, top_k=5)
208
- context_block = join_context(top)
209
 
210
  system_msg = (
211
  "You are Otium, a warm journaling buddy. Not medical advice. "
212
- f"Adopt the persona {CURRENT_PERSONA['name']}. Style: {TONES[CURRENT_PERSONA['name']]}. "
213
  "Output plain text only (no role labels or chat logs). "
214
- "Reflect the user’s feelings in simple, kind language, ask ONE gentle follow-up question, "
215
- "keep it short (3–5 sentences), and end with one tiny break idea. "
 
 
216
  "Avoid clinical terms or medical guidance.\n\n"
217
- f"Helpful snippets from the user's content:\n{context_block}"
218
  )
 
 
219
 
 
220
  messages = [{"role": "system", "content": system_msg}]
221
  if history:
222
  for u, a in history:
223
  if u: messages.append({"role": "user", "content": u})
224
  if a: messages.append({"role": "assistant", "content": a})
225
- messages.append({"role": "user", "content": msg})
 
 
 
 
 
 
 
 
 
 
 
 
 
226
 
227
- resp = client.chat_completion(
228
- messages=messages,
229
- max_tokens=220,
230
- temperature=0.7,
231
- stop=["User wrote:", "Assistant wrote:", "User:", "Assistant:"]
232
- )
233
- text = resp["choices"][0]["message"]["content"].strip()
234
 
235
  return f"{text}\n\n**Tiny break idea:** {pick_break()}"
236
 
237
- # ========================
238
- # Minimal UI (single chat box)
239
- # ========================
240
  chatbot = gr.ChatInterface(
241
  respond,
242
  title="Otium — A Friendly Check-In",
243
- description="Say hello whenever you’re ready. Otium is always heee for you' Type /personas for options. (Not medical advice.)"
244
  )
245
 
246
  if __name__ == "__main__":
 
5
  import random
6
  import re
7
 
8
+ # ===== Models =====
 
 
9
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
10
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
11
 
12
+ # ===== Load & sanitize corpus =====
 
 
13
  with open("journal.txt", "r", encoding="utf-8") as f:
14
  raw_text = f.read()
15
 
 
 
 
 
16
  ROLE_TAGS = re.compile(
17
  r'\[/?(?:USER|ASST)\]|\</?(?:user|assistant)\>|<\|(?:user|assistant)\|>',
18
  re.IGNORECASE,
19
  )
20
 
21
  def clean_corpus(text: str) -> str:
22
+ text = ROLE_TAGS.sub("", text or "")
23
+ out = []
24
  for line in text.splitlines():
25
  low = line.strip().lower()
26
  if low.startswith("user wrote:"): continue
 
29
  if low.startswith("/assistant wrote:"): continue
30
  if low.startswith("user:"): continue
31
  if low.startswith("assistant:"): continue
32
+ out.append(line)
33
+ return "\n".join(out)
34
 
35
  journal_text = clean_corpus(raw_text)
36
 
37
+ # ===== Chunk + embed (safe if file is short/empty) =====
38
+ def preprocess_text(text: str):
39
+ cleaned = (text or "").strip()
40
+ if not cleaned:
41
+ return []
42
+ sents = [s.strip() for s in cleaned.split(".") if s.strip()]
43
  sentence_chunks = [s for s in sents if len(s) > 10]
44
 
45
  combined = []
46
  for i in range(0, len(sents), 3):
47
+ chunk = ". ".join(sents[i:i+3]).strip()
48
  if len(chunk) > 20:
49
  combined.append(chunk)
50
 
51
+ paras = [p.strip() for p in cleaned.split("\n\n") if p.strip() and len(p) > 30]
52
 
53
  seen, chunks = set(), []
54
  for c in sentence_chunks + combined + paras:
 
59
  return chunks
60
 
61
  chunks = preprocess_text(journal_text)
62
+ HAS_CORPUS = len(chunks) > 0
63
+ embeddings = embedder.encode(chunks, convert_to_tensor=True) if HAS_CORPUS else None
64
 
65
+ def get_top_chunks(query: str, top_k: int = 5):
66
+ if not (HAS_CORPUS and embeddings is not None and query):
67
  return []
68
  q = embedder.encode(query, convert_to_tensor=True)
69
  q = q / q.norm()
70
  M = embeddings / embeddings.norm(dim=1, keepdim=True)
71
+ n = len(chunks)
72
+ if n == 0:
73
+ return []
74
+ k = max(1, min(top_k, n))
75
  sims = torch.matmul(M, q)
 
76
  scores, idxs = torch.topk(sims, k=k)
77
+ results = []
78
  for i, idx in enumerate(idxs):
79
  if scores[i].item() > 0.25:
80
+ results.append(chunks[int(idx)])
81
+ return results
82
 
83
  def join_context(chunks_list, max_chars=900):
84
  out = ""
 
89
  out += (("\n\n" if out else "") + c)
90
  return out
91
 
92
+ # ===== Tiny safety =====
93
+ CRISIS_TERMS = ["suicide","kill myself","end my life","self-harm","hurt myself","overdose","harm others","kill someone"]
 
 
 
 
 
94
  def is_crisis(msg: str) -> bool:
95
  m = (msg or "").lower()
96
  return any(t in m for t in CRISIS_TERMS)
97
 
98
+ # ===== Emotion gate & extraction =====
 
 
99
  EMOTION_HINTS = [
100
+ "i feel", "i'm feeling", "i am feeling", "feel", "feeling",
101
  "overwhelmed", "stressed", "anxious", "sad", "lonely",
102
  "angry", "upset", "worried", "guilty", "ashamed",
103
  "proud", "happy", "excited", "tired", "burned out", "burnt out"
104
  ]
105
+
106
  def mentions_emotion(msg: str) -> bool:
107
  m = (msg or "").lower()
108
  return any(k in m for k in EMOTION_HINTS)
109
 
110
+ # normalize common typos like "jm sad" -> "i'm sad", "im sad" -> "i'm sad"
111
+ def normalize(msg: str) -> str:
112
+ m = msg.strip()
113
+ m = re.sub(r"^\s*jm\b", "I'm", m, flags=re.IGNORECASE)
114
+ m = re.sub(r"\bim\b", "I'm", m, flags=re.IGNORECASE)
115
+ return m
116
+
117
+ # very simple extraction: try to grab phrase after "I feel/I'm feeling/feeling ..."
118
+ EMO_RE = re.compile(
119
+ r"\b(i\s*feel|i\s*am\s*feeling|i'm\s*feeling|im\s*feeling|feeling)\s+([^.,;!?]{1,40})",
120
+ re.IGNORECASE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  )
122
+ # fallback list if no phrase captured
123
+ EMO_WORDS = [
124
+ "overwhelmed","stressed","anxious","sad","lonely","angry","upset",
125
+ "worried","guilty","ashamed","proud","happy","excited","tired",
126
+ "burned out","burnt out"
127
+ ]
128
+
129
+ def extract_emotion(msg: str) -> str:
130
+ m = normalize(msg)
131
+ m_low = m.lower()
132
+ m = m.strip()
133
+ # try regex phrase
134
+ hit = EMO_RE.search(m)
135
+ if hit:
136
+ phrase = hit.group(2).strip()
137
+ # keep it short and clean
138
+ phrase = re.sub(r"\s+", " ", phrase)
139
+ return phrase
140
+ # fallback: first known word present
141
+ for w in EMO_WORDS:
142
+ if w in m_low:
143
+ return w
144
+ return "this way" # last resort
145
+
146
+ # ===== Tiny break ideas (only when feelings are mentioned) =====
147
+ BREAKS = [
148
+ "Try box breathing 4-4-4-4 for 60 seconds.",
149
+ "Unclench your jaw and roll your shoulders slowly three times.",
150
+ "Look away from the screen and name 5 things you can see.",
151
+ "Sip water slowly and take three deep breaths.",
152
+ "Stand up, stretch overhead, and feel your feet on the ground."
153
+ ]
154
+ def pick_break():
155
+ return random.choice(BREAKS)
156
 
157
+ # ===== Chat handler =====
158
  def respond(message, history):
159
  msg = (message or "").strip()
160
+ if not msg:
161
+ return "Hey, I’m Otium. I’m here to listen whenever you want to talk about your day or how you’re feeling."
 
 
 
 
 
 
 
 
162
 
163
  # Safety
164
  if is_crisis(msg):
 
168
  "• Elsewhere: contact local emergency services."
169
  )
170
 
171
+ # If no emotions yet → friendly hello only
172
  if not mentions_emotion(msg):
173
  return ("Hey, I’m Otium. I’m here to listen whenever you want to talk about your day "
174
+ "or how you’re feeling. No pressure—share only when you’re ready.")
175
 
176
+ # Emotions present → retrieve (if any) + short support
177
+ emo = extract_emotion(msg)
178
+ context_block = join_context(get_top_chunks(msg, top_k=5)) if HAS_CORPUS else ""
179
 
180
  system_msg = (
181
  "You are Otium, a warm journaling buddy. Not medical advice. "
 
182
  "Output plain text only (no role labels or chat logs). "
183
+ "Reflect the user’s feelings in simple, kind language. "
184
+ "Ask exactly ONE question phrased as: 'Why do you feel {emotion}?', "
185
+ "where {emotion} is the extracted emotion provided below. "
186
+ "Keep the reply short (3–5 sentences) and end with one tiny break idea. "
187
  "Avoid clinical terms or medical guidance.\n\n"
188
+ f"Extracted emotion: {emo}\n"
189
  )
190
+ if context_block:
191
+ system_msg += f"\nHelpful snippets from the user's content:\n{context_block}"
192
 
193
+ # Build messages for the model
194
  messages = [{"role": "system", "content": system_msg}]
195
  if history:
196
  for u, a in history:
197
  if u: messages.append({"role": "user", "content": u})
198
  if a: messages.append({"role": "assistant", "content": a})
199
+ messages.append({"role": "user", "content": normalize(msg)})
200
+
201
+ # Call model, with stop strings to avoid chat-log artifacts
202
+ try:
203
+ resp = client.chat_completion(
204
+ messages=messages,
205
+ max_tokens=220,
206
+ temperature=0.7,
207
+ stop=["User wrote:", "Assistant wrote:", "User:", "Assistant:"]
208
+ )
209
+ text = resp["choices"][0]["message"]["content"].strip()
210
+ except Exception:
211
+ # Friendly fallback if API hiccups
212
+ text = f"Thanks for sharing that. Why do you feel {emo}?"
213
 
214
+ # Guarantee the explicit question appears (belt-and-suspenders)
215
+ if f"Why do you feel {emo}?" not in text:
216
+ text = text.rstrip(".! ") + f"\n\nWhy do you feel {emo}?"
 
 
 
 
217
 
218
  return f"{text}\n\n**Tiny break idea:** {pick_break()}"
219
 
220
+ # ===== Minimal UI =====
 
 
221
  chatbot = gr.ChatInterface(
222
  respond,
223
  title="Otium — A Friendly Check-In",
224
+ description="Say hello whenever you’re ready. Otium only offers support once you talk about feelings. (Not medical advice.)"
225
  )
226
 
227
  if __name__ == "__main__":