Otium_testing

Sleeping

App Files Files Community

pikam00 commited on Aug 14, 2025

Commit

fd6d7e3

verified ·

1 Parent(s): 60817f8

Update app.py

Browse files

Files changed (1) hide show

app.py +104 -215

app.py CHANGED Viewed

@@ -1,228 +1,117 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
 from sentence_transformers import SentenceTransformer
 import torch
-import random
-import re
-# ===== Models =====
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-embedder = SentenceTransformer("all-MiniLM-L6-v2")
-# ===== Load & sanitize corpus =====
-with open("journal.txt", "r", encoding="utf-8") as f:
-    raw_text = f.read()
-ROLE_TAGS = re.compile(
-    r'\[/?(?:USER|ASST)\]|\</?(?:user|assistant)\>|<\|(?:user|assistant)\|>',
-    re.IGNORECASE,
-)
-def clean_corpus(text: str) -> str:
-    text = ROLE_TAGS.sub("", text or "")
-    out = []
-    for line in text.splitlines():
-        low = line.strip().lower()
-        if low.startswith("user wrote:"): continue
-        if low.startswith("/user wrote:"): continue
-        if low.startswith("assistant wrote:"): continue
-        if low.startswith("/assistant wrote:"): continue
-        if low.startswith("user:"): continue
-        if low.startswith("assistant:"): continue
-        out.append(line)
-    return "\n".join(out)
-journal_text = clean_corpus(raw_text)
-# ===== Chunk + embed (safe if file is short/empty) =====
-def preprocess_text(text: str):
-    cleaned = (text or "").strip()
-    if not cleaned:
-        return []
-    sents = [s.strip() for s in cleaned.split(".") if s.strip()]
-    sentence_chunks = [s for s in sents if len(s) > 10]
-    combined = []
-    for i in range(0, len(sents), 3):
-        chunk = ". ".join(sents[i:i+3]).strip()
         if len(chunk) > 20:
-            combined.append(chunk)
-    paras = [p.strip() for p in cleaned.split("\n\n") if p.strip() and len(p) > 30]
-    seen, chunks = set(), []
-    for c in sentence_chunks + combined + paras:
-        c = c.strip()
-        if c and c not in seen and len(c) > 15:
-            seen.add(c)
-            chunks.append(c)
-    return chunks
-chunks = preprocess_text(journal_text)
-HAS_CORPUS = len(chunks) > 0
-embeddings = embedder.encode(chunks, convert_to_tensor=True) if HAS_CORPUS else None
-def get_top_chunks(query: str, top_k: int = 5):
-    if not (HAS_CORPUS and embeddings is not None and query):
-        return []
-    q = embedder.encode(query, convert_to_tensor=True)
-    q = q / q.norm()
-    M = embeddings / embeddings.norm(dim=1, keepdim=True)
-    n = len(chunks)
-    if n == 0:
-        return []
-    k = max(1, min(top_k, n))
-    sims = torch.matmul(M, q)
-    scores, idxs = torch.topk(sims, k=k)
     results = []
-    for i, idx in enumerate(idxs):
-        if scores[i].item() > 0.25:
-            results.append(chunks[int(idx)])
-    return results
-def join_context(chunks_list, max_chars=900):
-    out = ""
-    for c in chunks_list:
-        c = c.strip()
-        if len(out) + len(c) + 2 > max_chars:
-            break
-        out += (("\n\n" if out else "") + c)
-    return out
-# ===== Tiny safety =====
-CRISIS_TERMS = ["suicide","kill myself","end my life","self-harm","hurt myself","overdose","harm others","kill someone"]
-def is_crisis(msg: str) -> bool:
-    m = (msg or "").lower()
-    return any(t in m for t in CRISIS_TERMS)
-# ===== Emotion gate & extraction =====
-EMOTION_HINTS = [
-    "i feel", "i'm feeling", "i am feeling", "feel", "feeling",
-    "overwhelmed", "stressed", "anxious", "sad", "lonely",
-    "angry", "upset", "worried", "guilty", "ashamed",
-    "proud", "happy", "excited", "tired", "burned out", "burnt out"
-]
-def mentions_emotion(msg: str) -> bool:
-    m = (msg or "").lower()
-    return any(k in m for k in EMOTION_HINTS)
-# normalize common typos like "jm sad" -> "i'm sad", "im sad" -> "i'm sad"
-def normalize(msg: str) -> str:
-    m = msg.strip()
-    m = re.sub(r"^\s*jm\b", "I'm", m, flags=re.IGNORECASE)
-    m = re.sub(r"\bim\b", "I'm", m, flags=re.IGNORECASE)
-    return m
-# very simple extraction: try to grab phrase after "I feel/I'm feeling/feeling ..."
-EMO_RE = re.compile(
-    r"\b(i\s*feel|i\s*am\s*feeling|i'm\s*feeling|im\s*feeling|feeling)\s+([^.,;!?]{1,40})",
-    re.IGNORECASE
-)
-# fallback list if no phrase captured
-EMO_WORDS = [
-    "overwhelmed","stressed","anxious","sad","lonely","angry","upset",
-    "worried","guilty","ashamed","proud","happy","excited","tired",
-    "burned out","burnt out"
 ]
-def extract_emotion(msg: str) -> str:
-    m = normalize(msg)
-    m_low = m.lower()
-    m = m.strip()
-    # try regex phrase
-    hit = EMO_RE.search(m)
-    if hit:
-        phrase = hit.group(2).strip()
-        # keep it short and clean
-        phrase = re.sub(r"\s+", " ", phrase)
-        return phrase
-    # fallback: first known word present
-    for w in EMO_WORDS:
-        if w in m_low:
-            return w
-    return "this way"  # last resort
-# ===== Tiny break ideas (only when feelings are mentioned) =====
-BREAKS = [
-    "Try box breathing 4-4-4-4 for 60 seconds.",
-    "Unclench your jaw and roll your shoulders slowly three times.",
-    "Look away from the screen and name 5 things you can see.",
-    "Sip water slowly and take three deep breaths.",
-    "Stand up, stretch overhead, and feel your feet on the ground."
-]
-def pick_break():
-    return random.choice(BREAKS)
-# ===== Chat handler =====
 def respond(message, history):
-    msg = (message or "").strip()
-    if not msg:
-        return "Hey, I’m Otium. I’m here to listen whenever you want to talk about your day or how you’re feeling."
-    # Safety
-    if is_crisis(msg):
-        return (
-            "I’m glad you reached out. I’m not a crisis service, but help is available:\n"
-            "• U.S.: call or text 988 (988lifeline.org)\n"
-            "• Elsewhere: contact local emergency services."
-        )
-    # If no emotions yet → friendly hello only
-    if not mentions_emotion(msg):
-        return ("Hey, I’m Otium. I’m here to listen whenever you want to talk about your day "
-                "or how you’re feeling. No pressure—share only when you’re ready.")
-    # Emotions present → retrieve (if any) + short support
-    emo = extract_emotion(msg)
-    context_block = join_context(get_top_chunks(msg, top_k=5)) if HAS_CORPUS else ""
-    system_msg = (
-        "You are Otium, a warm journaling buddy. Not medical advice. "
-        "Output plain text only (no role labels or chat logs). "
-        "Reflect the user’s feelings in simple, kind language. "
-        "Ask exactly ONE question phrased as: 'Why do you feel {emotion}?', "
-        "where {emotion} is the extracted emotion provided below. "
-        "Keep the reply short (3–5 sentences) and end with one tiny break idea. "
-        "Avoid clinical terms or medical guidance.\n\n"
-        f"Extracted emotion: {emo}\n"
-    )
-    if context_block:
-        system_msg += f"\nHelpful snippets from the user's content:\n{context_block}"
-    # Build messages for the model
-    messages = [{"role": "system", "content": system_msg}]
     if history:
-        for u, a in history:
-            if u: messages.append({"role": "user", "content": u})
-            if a: messages.append({"role": "assistant", "content": a})
-    messages.append({"role": "user", "content": normalize(msg)})
-    # Call model, with stop strings to avoid chat-log artifacts
-    try:
-        resp = client.chat_completion(
-            messages=messages,
-            max_tokens=220,
-            temperature=0.7,
-            stop=["User wrote:", "Assistant wrote:", "User:", "Assistant:"]
-        )
-        text = resp["choices"][0]["message"]["content"].strip()
-    except Exception:
-        # Friendly fallback if API hiccups
-        text = f"Thanks for sharing that. Why do you feel {emo}?"
-    # Guarantee the explicit question appears (belt-and-suspenders)
-    if f"Why do you feel {emo}?" not in text:
-        text = text.rstrip(".! ") + f"\n\nWhy do you feel {emo}?"
-    return f"{text}\n\n**Tiny break idea:** {pick_break()}"
-# ===== Minimal UI =====
-chatbot = gr.ChatInterface(
-    respond,
-    title="Otium — A Friendly Check-In",
-    description="Say hello whenever you’re ready. Otium only offers support once you talk about feelings. (Not medical advice.)"
-)
-if __name__ == "__main__":
-    chatbot.launch()

 import gradio as gr
+import random
 from huggingface_hub import InferenceClient
+# import lines go at the top: any libraries I need to import go up here ^^
 from sentence_transformers import SentenceTransformer
 import torch
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
+# Step 1
+with open("Untitled document.txt", "r", encoding="utf-8") as f:
+    skincare_text = f.read()
+# Step 2: Preprocess text into sentence chunks
+def preprocess_text(text):
+    cleaned_text = text.strip()
+    sentences = [s.strip() for s in cleaned_text.split('.') if s.strip()]
+    sentence_chunks = [s.strip() for s in sentences if len(s.strip()) > 10]
+    combined_chunks = []
+    for i in range(0, len(sentences), 2):
+        chunk = '. '.join(sentences[i:i+3]).strip()
         if len(chunk) > 20:
+            combined_chunks.append(chunk)
+    paragraphs = [p.strip() for p in cleaned_text.split('\n\n') if p.strip()]
+    paragraph_chunks = [p for p in paragraphs if len(p) > 30]
+    all_chunks = sentence_chunks + combined_chunks + paragraph_chunks
+    seen = set()
+    final_chunks = []
+    for chunk in all_chunks:
+        if chunk not in seen and len(chunk) > 15:
+            seen.add(chunk)
+            final_chunks.append(chunk)
+    print(f"Created {len(final_chunks)} chunks using advanced strategy")
+    print(f"Sample chunks: {final_chunks[:3]}")
+    return final_chunks
+cleaned_chunks = preprocess_text(skincare_text)
+# Step 3: Convert chunks into embeddings
+model = SentenceTransformer('all-MiniLM-L6-v2')
+def create_embeddings(text_chunks):
+    chunk_embeddings = model.encode(text_chunks, convert_to_tensor=True)
+    print(f"Embeddings shape: {chunk_embeddings.shape}")
+    return chunk_embeddings
+chunk_embeddings = create_embeddings(cleaned_chunks)
+# Step 4: Retrieve top matching chunks
+def get_top_chunks(query, chunk_embeddings, text_chunks, top_k=3):
+    query_embedding = model.encode(query, convert_to_tensor=True)
+    query_norm = query_embedding / query_embedding.norm()
+    chunks_norm = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
+    similarities = torch.matmul(chunks_norm, query_norm)
+    top_scores, top_indices = torch.topk(similarities, k=min(top_k, len(text_chunks)))
     results = []
+    for i, idx in enumerate(top_indices):
+        score = top_scores[i].item()
+        if score > 0.3:  # Only include reasonably relevant chunks
+            results.append(text_chunks[idx])
+    return results, top_scores[:len(results)]
+# Step 5: Relevance checker
+def is_skincare_related(query):
+    skincare_keywords = [
+        'skin', 'skincare', 'acne', 'wrinkles', 'moisturizer', 'cleanser',
+        'sunscreen', 'serum', 'retinol', 'vitamin', 'dry', 'oily', 'sensitive',
+        'aging', 'pores', 'blackheads', 'routine', 'face', 'facial', 'beauty',
+        'dermatology', 'cosmetic', 'cream', 'lotion', 'toner', 'exfoliate',
+        'hydration', 'anti-aging', 'blemish', 'spot', 'dark circles'
+    ]
+    query_lower = query.lower()
+    return any(keyword in query_lower for keyword in skincare_keywords)
+queries = [
+    "Consistent skincare routine",
+    "Applying sunscreen daily",
+    "Choosing products that match your skin type"
 ]
+for q in queries:
+    print(f"\nQuery: {q}")
+    results = get_top_chunks(q, chunk_embeddings, cleaned_chunks)
+    for idx, res in enumerate(results, 1):
+        print(f"Result {idx}: {res}")
 def respond(message, history):
+    top_results = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
+    print(top_results)
+    messages = [{"role": "system", "content": f"You are a friendly chatbot. You give people advice about skincare. Base your response on the following information: {top_results}"}]
     if history:
+        messages.extend(history)
+    messages.append({"role": "user", "content": message})
+    response = client.chat_completion(messages, max_tokens=100)
+    return response['choices'][0]['message']['content'].strip()
+def echo(message, history):
+    return message
+def yes_or_no(message, history):
+    return random.choice(['Yes', 'No', 'Maybe', 'Ask Again'])
+chatbot = gr.ChatInterface(respond)
+# defining my chatbot so that the user can interact and see their conversation history and send new messages
+chatbot.launch()