Spaces:
Sleeping
Sleeping
| import os | |
| import numpy as np | |
| import streamlit as st | |
| import faiss | |
| from sentence_transformers import SentenceTransformer | |
| from groq import Groq | |
| # ---------- Secrets / API Key ---------- | |
| API_KEY = os.getenv("GROQ_API_KEY") | |
| if not API_KEY: | |
| st.error( | |
| "GROQ_API_KEY not found.\n\n" | |
| "Go to your Space → Settings → Repository secrets → Add new secret\n" | |
| "Name: GROQ_API_KEY | Value: <your Groq key>\n\n" | |
| "Then Restart/Restart this Space." | |
| ) | |
| st.stop() | |
| # ---------- Groq Client ---------- | |
| client = Groq(api_key=API_KEY) | |
| # ---------- Models / Index ---------- | |
| def load_embedder(): | |
| # 384-dim embeddings | |
| return SentenceTransformer("all-MiniLM-L6-v2") | |
| embedding_model = load_embedder() | |
| DIM = 384 # all-MiniLM-L6-v2 dimension | |
| if "faiss_index" not in st.session_state: | |
| st.session_state.faiss_index = faiss.IndexFlatL2(DIM) | |
| if "chunks_store" not in st.session_state: | |
| st.session_state.chunks_store = [] | |
| index = st.session_state.faiss_index | |
| chunks_store = st.session_state.chunks_store | |
| # ---------- Helpers ---------- | |
| def chunk_text(text: str, max_length: int = 500): | |
| """Simple whitespace chunker by character budget.""" | |
| words, chunks, cur = text.split(), [], [] | |
| for w in words: | |
| if len(" ".join(cur)) + len(w) + 1 <= max_length: | |
| cur.append(w) | |
| else: | |
| chunks.append(" ".join(cur)) | |
| cur = [w] | |
| if cur: | |
| chunks.append(" ".join(cur)) | |
| return chunks | |
| def embed_and_store(chunks): | |
| if not chunks: | |
| return | |
| embs = embedding_model.encode( | |
| chunks, convert_to_numpy=True, normalize_embeddings=False | |
| ).astype("float32") | |
| index.add(embs) | |
| chunks_store.extend(chunks) | |
| def query_llm(prompt: str) -> str: | |
| """Stream a response from Groq and return full text.""" | |
| stream = client.chat.completions.create( | |
| model="deepseek-r1-distill-llama-70b", | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": ( | |
| "Based on this WhatsApp chat, analyze whether this relationship is healthy or toxic." | |
| "Give a toxicity Score out of 10" | |
| "Highlight top 3 red flags" | |
| "1 positive aspect" | |
| "3 improvement suggestions" | |
| "Start every answer with: 'Toxicity score: X/10'." | |
| ), | |
| }, | |
| {"role": "user", "content": prompt}, | |
| ], | |
| temperature=0.6, | |
| max_completion_tokens=1024, | |
| top_p=0.95, | |
| stream=True, | |
| reasoning_format="raw", | |
| ) | |
| out = [] | |
| for chunk in stream: | |
| delta = chunk.choices[0].delta.content or "" | |
| out.append(delta) | |
| return "".join(out) | |
| # ---------- UI ---------- | |
| st.set_page_config(page_title="AI Relationship Counsellor", layout="centered") | |
| st.title("NLP Relationship Counsellor") | |
| uploaded_file = st.file_uploader("Upload a .txt export of your WhatsApp chat", type=["txt"]) | |
| if uploaded_file: | |
| text = uploaded_file.read().decode("utf-8", errors="ignore") | |
| st.success("Chat extracted successfully!") | |
| chunks = chunk_text(text) | |
| embed_and_store(chunks) | |
| user_query = st.text_input("Ask a question about your relationship:") | |
| if user_query: | |
| if index.ntotal == 0: | |
| st.warning("Nothing indexed yet. Please upload a chat file.") | |
| else: | |
| # top-k retrieval | |
| k = min(5, index.ntotal) | |
| q_emb = embedding_model.encode([user_query], convert_to_numpy=True).astype("float32") | |
| distances, idxs = index.search(q_emb, k) | |
| relevant = [chunks_store[i] for i in idxs[0] if 0 <= i < len(chunks_store)] | |
| context = " ".join(relevant) | |
| final_prompt = f"Context:\n{context}\n\nQuestion:\n{user_query}" | |
| with st.spinner("Analyzing…"): | |
| answer = query_llm(final_prompt) | |
| st.markdown("### AI Analysis") | |
| st.write(answer) | |
| else: | |
| st.info("Upload a WhatsApp chat (.txt) to begin.") | |