Otium_testing

Sleeping

App Files Files Community

pikam00 commited on Aug 13, 2025

Commit

60817f8

verified ·

1 Parent(s): 74f089b

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -129

app.py CHANGED Viewed

@@ -5,30 +5,22 @@ import torch
 import random
 import re
-# ========================
-# Models
-# ========================
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 embedder = SentenceTransformer("all-MiniLM-L6-v2")
-# ========================
-# Load corpus (journal.txt in same folder)
-# ========================
 with open("journal.txt", "r", encoding="utf-8") as f:
     raw_text = f.read()
-# ========================
-# --- KEEPING THE OG STRIP/SANITIZER STUFF ---
-# Remove role tags and chat-log lines from the corpus so they never leak
-# ========================
 ROLE_TAGS = re.compile(
     r'\[/?(?:USER|ASST)\]|\</?(?:user|assistant)\>|<\|(?:user|assistant)\|>',
     re.IGNORECASE,
 )
 def clean_corpus(text: str) -> str:
-    text = ROLE_TAGS.sub('', text or '')
-    out_lines = []
     for line in text.splitlines():
         low = line.strip().lower()
         if low.startswith("user wrote:"): continue
@@ -37,26 +29,26 @@ def clean_corpus(text: str) -> str:
         if low.startswith("/assistant wrote:"): continue
         if low.startswith("user:"): continue
         if low.startswith("assistant:"): continue
-        out_lines.append(line)
-    return "\n".join(out_lines)
 journal_text = clean_corpus(raw_text)
-# ========================
-# Chunk + embed (simple)
-# ========================
-def preprocess_text(text):
-    cleaned = text.strip()
-    sents = [s.strip() for s in cleaned.split('.') if s.strip()]
     sentence_chunks = [s for s in sents if len(s) > 10]
     combined = []
     for i in range(0, len(sents), 3):
-        chunk = '. '.join(sents[i:i+3]).strip()
         if len(chunk) > 20:
             combined.append(chunk)
-    paras = [p.strip() for p in cleaned.split('\n\n') if p.strip() and len(p) > 30]
     seen, chunks = set(), []
     for c in sentence_chunks + combined + paras:
@@ -67,22 +59,26 @@ def preprocess_text(text):
     return chunks
 chunks = preprocess_text(journal_text)
-embeddings = embedder.encode(chunks, convert_to_tensor=True)
-def get_top_chunks(query, top_k=5):
-    if not query:
         return []
     q = embedder.encode(query, convert_to_tensor=True)
     q = q / q.norm()
     M = embeddings / embeddings.norm(dim=1, keepdim=True)
     sims = torch.matmul(M, q)
-    k = min(top_k, len(chunks))
     scores, idxs = torch.topk(sims, k=k)
-    out = []
     for i, idx in enumerate(idxs):
         if scores[i].item() > 0.25:
-            out.append(chunks[int(idx)])
-    return out
 def join_context(chunks_list, max_chars=900):
     out = ""
@@ -93,102 +89,76 @@ def join_context(chunks_list, max_chars=900):
         out += (("\n\n" if out else "") + c)
     return out
-# ========================
-# Tiny safety (quiet unless triggered)
-# ========================
-CRISIS_TERMS = [
-    "suicide","kill myself","end my life","self-harm",
-    "hurt myself","overdose","harm others","kill someone"
-]
 def is_crisis(msg: str) -> bool:
     m = (msg or "").lower()
     return any(t in m for t in CRISIS_TERMS)
-# ========================
-# Emotion gate (only help if feelings are mentioned)
-# ========================
 EMOTION_HINTS = [
-    "i feel", "i'm feeling", "i am feeling", "feelings",
     "overwhelmed", "stressed", "anxious", "sad", "lonely",
     "angry", "upset", "worried", "guilty", "ashamed",
     "proud", "happy", "excited", "tired", "burned out", "burnt out"
 ]
 def mentions_emotion(msg: str) -> bool:
     m = (msg or "").lower()
     return any(k in m for k in EMOTION_HINTS)
-# ========================
-# Personas (simple) + break ideas
-# ========================
-BREAKS = {
-    "Sage": [
-        "Look out the window for 1 minute and notice what moves.",
-        "Breathe in 4, out 6, slowly.",
-        "Think of three natural places you enjoy."
-    ],
-    "Buddy": [
-        "Stand and stretch for 20 seconds.",
-        "Send a kind message to a friend.",
-        "Play a short upbeat song."
-    ],
-    "Monk": [
-        "Close your eyes, breathe 4 in, hold 4, breathe 4 out.",
-        "Choose one small task to finish after this.",
-        "Turn your phone face down for a minute."
-    ],
-    "Librarian": [
-        "Write one sentence starting with: 'Today I noticed...'.",
-        "Put three things neatly in place.",
-        "Organize a small space for 1 minute."
-    ],
-    "Cozy": [
-        "Sip water slowly like a warm drink.",
-        "Wrap yourself in a blanket for 1 minute.",
-        "Notice three soft textures nearby."
-    ],
-}
-TONES = {
-    "Sage": "calm, thoughtful, nature imagery",
-    "Buddy": "upbeat, encouraging, simple language",
-    "Monk": "minimalist, focused, mindful",
-    "Librarian": "gentle, organized, caring",
-    "Cozy": "warm, comforting, home-like",
-}
-CURRENT_PERSONA = {"name": "Cozy"}  # kept mutable in a dict for simplicity
-def set_persona(name: str) -> str:
-    names = list(TONES.keys())
-    lookup = {n.lower(): n for n in names}
-    key = (name or "").strip().lower()
-    if key in lookup:
-        CURRENT_PERSONA["name"] = lookup[key]
-        return f"Persona set to {CURRENT_PERSONA['name']}."
-    return "Unknown persona. Options: Sage, Buddy, Monk, Librarian, Cozy."
-def pick_break() -> str:
-    persona = CURRENT_PERSONA["name"]
-    return random.choice(BREAKS.get(persona, BREAKS["Cozy"]))
-# ========================
-# Chat handler
-# ========================
-HELP_TEXT = (
-    "Type `/personas` to see options, or `/persona NAME` to switch. "
-    "Choices: Sage, Buddy, Monk, Librarian, Cozy."
 )
 def respond(message, history):
     msg = (message or "").strip()
-    # Commands (no extra UI)
-    low = msg.lower()
-    if low == "/personas":
-        return HELP_TEXT
-    if low.startswith("/persona"):
-        parts = msg.split(maxsplit=1)
-        if len(parts) == 1:
-            return "Usage: `/persona NAME` — " + HELP_TEXT
-        return set_persona(parts[1])
     # Safety
     if is_crisis(msg):
@@ -198,49 +168,60 @@ def respond(message, history):
             "• Elsewhere: contact local emergency services."
         )
-    # If user hasn't talked about emotions yet → friend-like greeting only
     if not mentions_emotion(msg):
         return ("Hey, I’m Otium. I’m here to listen whenever you want to talk about your day "
-                "or how you’re feeling <3")
-    # Emotions present → retrieve, reflect, short follow-up, one tiny break
-    top = get_top_chunks(msg, top_k=5)
-    context_block = join_context(top)
     system_msg = (
         "You are Otium, a warm journaling buddy. Not medical advice. "
-        f"Adopt the persona {CURRENT_PERSONA['name']}. Style: {TONES[CURRENT_PERSONA['name']]}. "
         "Output plain text only (no role labels or chat logs). "
-        "Reflect the user’s feelings in simple, kind language, ask ONE gentle follow-up question, "
-        "keep it short (3–5 sentences), and end with one tiny break idea. "
         "Avoid clinical terms or medical guidance.\n\n"
-        f"Helpful snippets from the user's content:\n{context_block}"
     )
     messages = [{"role": "system", "content": system_msg}]
     if history:
         for u, a in history:
             if u: messages.append({"role": "user", "content": u})
             if a: messages.append({"role": "assistant", "content": a})
-    messages.append({"role": "user", "content": msg})
-    resp = client.chat_completion(
-        messages=messages,
-        max_tokens=220,
-        temperature=0.7,
-        stop=["User wrote:", "Assistant wrote:", "User:", "Assistant:"]
-    )
-    text = resp["choices"][0]["message"]["content"].strip()
     return f"{text}\n\n**Tiny break idea:** {pick_break()}"
-# ========================
-# Minimal UI (single chat box)
-# ========================
 chatbot = gr.ChatInterface(
     respond,
     title="Otium — A Friendly Check-In",
-    description="Say hello whenever you’re ready. Otium is always heee for you' Type /personas for options. (Not medical advice.)"
 )
 if __name__ == "__main__":

 import random
 import re
+# ===== Models =====
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 embedder = SentenceTransformer("all-MiniLM-L6-v2")
+# ===== Load & sanitize corpus =====
 with open("journal.txt", "r", encoding="utf-8") as f:
     raw_text = f.read()
 ROLE_TAGS = re.compile(
     r'\[/?(?:USER|ASST)\]|\</?(?:user|assistant)\>|<\|(?:user|assistant)\|>',
     re.IGNORECASE,
 )
 def clean_corpus(text: str) -> str:
+    text = ROLE_TAGS.sub("", text or "")
+    out = []
     for line in text.splitlines():
         low = line.strip().lower()
         if low.startswith("user wrote:"): continue
         if low.startswith("/assistant wrote:"): continue
         if low.startswith("user:"): continue
         if low.startswith("assistant:"): continue
+        out.append(line)
+    return "\n".join(out)
 journal_text = clean_corpus(raw_text)
+# ===== Chunk + embed (safe if file is short/empty) =====
+def preprocess_text(text: str):
+    cleaned = (text or "").strip()
+    if not cleaned:
+        return []
+    sents = [s.strip() for s in cleaned.split(".") if s.strip()]
     sentence_chunks = [s for s in sents if len(s) > 10]
     combined = []
     for i in range(0, len(sents), 3):
+        chunk = ". ".join(sents[i:i+3]).strip()
         if len(chunk) > 20:
             combined.append(chunk)
+    paras = [p.strip() for p in cleaned.split("\n\n") if p.strip() and len(p) > 30]
     seen, chunks = set(), []
     for c in sentence_chunks + combined + paras:
     return chunks
 chunks = preprocess_text(journal_text)
+HAS_CORPUS = len(chunks) > 0
+embeddings = embedder.encode(chunks, convert_to_tensor=True) if HAS_CORPUS else None
+def get_top_chunks(query: str, top_k: int = 5):
+    if not (HAS_CORPUS and embeddings is not None and query):
         return []
     q = embedder.encode(query, convert_to_tensor=True)
     q = q / q.norm()
     M = embeddings / embeddings.norm(dim=1, keepdim=True)
+    n = len(chunks)
+    if n == 0:
+        return []
+    k = max(1, min(top_k, n))
     sims = torch.matmul(M, q)
     scores, idxs = torch.topk(sims, k=k)
+    results = []
     for i, idx in enumerate(idxs):
         if scores[i].item() > 0.25:
+            results.append(chunks[int(idx)])
+    return results
 def join_context(chunks_list, max_chars=900):
     out = ""
         out += (("\n\n" if out else "") + c)
     return out
+# ===== Tiny safety =====
+CRISIS_TERMS = ["suicide","kill myself","end my life","self-harm","hurt myself","overdose","harm others","kill someone"]
 def is_crisis(msg: str) -> bool:
     m = (msg or "").lower()
     return any(t in m for t in CRISIS_TERMS)
+# ===== Emotion gate & extraction =====
 EMOTION_HINTS = [
+    "i feel", "i'm feeling", "i am feeling", "feel", "feeling",
     "overwhelmed", "stressed", "anxious", "sad", "lonely",
     "angry", "upset", "worried", "guilty", "ashamed",
     "proud", "happy", "excited", "tired", "burned out", "burnt out"
 ]
 def mentions_emotion(msg: str) -> bool:
     m = (msg or "").lower()
     return any(k in m for k in EMOTION_HINTS)
+# normalize common typos like "jm sad" -> "i'm sad", "im sad" -> "i'm sad"
+def normalize(msg: str) -> str:
+    m = msg.strip()
+    m = re.sub(r"^\s*jm\b", "I'm", m, flags=re.IGNORECASE)
+    m = re.sub(r"\bim\b", "I'm", m, flags=re.IGNORECASE)
+    return m
+# very simple extraction: try to grab phrase after "I feel/I'm feeling/feeling ..."
+EMO_RE = re.compile(
+    r"\b(i\s*feel|i\s*am\s*feeling|i'm\s*feeling|im\s*feeling|feeling)\s+([^.,;!?]{1,40})",
+    re.IGNORECASE
 )
+# fallback list if no phrase captured
+EMO_WORDS = [
+    "overwhelmed","stressed","anxious","sad","lonely","angry","upset",
+    "worried","guilty","ashamed","proud","happy","excited","tired",
+    "burned out","burnt out"
+]
+def extract_emotion(msg: str) -> str:
+    m = normalize(msg)
+    m_low = m.lower()
+    m = m.strip()
+    # try regex phrase
+    hit = EMO_RE.search(m)
+    if hit:
+        phrase = hit.group(2).strip()
+        # keep it short and clean
+        phrase = re.sub(r"\s+", " ", phrase)
+        return phrase
+    # fallback: first known word present
+    for w in EMO_WORDS:
+        if w in m_low:
+            return w
+    return "this way"  # last resort
+# ===== Tiny break ideas (only when feelings are mentioned) =====
+BREAKS = [
+    "Try box breathing 4-4-4-4 for 60 seconds.",
+    "Unclench your jaw and roll your shoulders slowly three times.",
+    "Look away from the screen and name 5 things you can see.",
+    "Sip water slowly and take three deep breaths.",
+    "Stand up, stretch overhead, and feel your feet on the ground."
+]
+def pick_break():
+    return random.choice(BREAKS)
+# ===== Chat handler =====
 def respond(message, history):
     msg = (message or "").strip()
+    if not msg:
+        return "Hey, I’m Otium. I’m here to listen whenever you want to talk about your day or how you’re feeling."
     # Safety
     if is_crisis(msg):
             "• Elsewhere: contact local emergency services."
         )
+    # If no emotions yet → friendly hello only
     if not mentions_emotion(msg):
         return ("Hey, I’m Otium. I’m here to listen whenever you want to talk about your day "
+                "or how you’re feeling. No pressure—share only when you’re ready.")
+    # Emotions present → retrieve (if any) + short support
+    emo = extract_emotion(msg)
+    context_block = join_context(get_top_chunks(msg, top_k=5)) if HAS_CORPUS else ""
     system_msg = (
         "You are Otium, a warm journaling buddy. Not medical advice. "
         "Output plain text only (no role labels or chat logs). "
+        "Reflect the user’s feelings in simple, kind language. "
+        "Ask exactly ONE question phrased as: 'Why do you feel {emotion}?', "
+        "where {emotion} is the extracted emotion provided below. "
+        "Keep the reply short (3–5 sentences) and end with one tiny break idea. "
         "Avoid clinical terms or medical guidance.\n\n"
+        f"Extracted emotion: {emo}\n"
     )
+    if context_block:
+        system_msg += f"\nHelpful snippets from the user's content:\n{context_block}"
+    # Build messages for the model
     messages = [{"role": "system", "content": system_msg}]
     if history:
         for u, a in history:
             if u: messages.append({"role": "user", "content": u})
             if a: messages.append({"role": "assistant", "content": a})
+    messages.append({"role": "user", "content": normalize(msg)})
+    # Call model, with stop strings to avoid chat-log artifacts
+    try:
+        resp = client.chat_completion(
+            messages=messages,
+            max_tokens=220,
+            temperature=0.7,
+            stop=["User wrote:", "Assistant wrote:", "User:", "Assistant:"]
+        )
+        text = resp["choices"][0]["message"]["content"].strip()
+    except Exception:
+        # Friendly fallback if API hiccups
+        text = f"Thanks for sharing that. Why do you feel {emo}?"
+    # Guarantee the explicit question appears (belt-and-suspenders)
+    if f"Why do you feel {emo}?" not in text:
+        text = text.rstrip(".! ") + f"\n\nWhy do you feel {emo}?"
     return f"{text}\n\n**Tiny break idea:** {pick_break()}"
+# ===== Minimal UI =====
 chatbot = gr.ChatInterface(
     respond,
     title="Otium — A Friendly Check-In",
+    description="Say hello whenever you’re ready. Otium only offers support once you talk about feelings. (Not medical advice.)"
 )
 if __name__ == "__main__":