import os import numpy as np import streamlit as st import faiss from sentence_transformers import SentenceTransformer from groq import Groq # ---------- Secrets / API Key ---------- API_KEY = os.getenv("GROQ_API_KEY") if not API_KEY: st.error( "GROQ_API_KEY not found.\n\n" "Go to your Space → Settings → Repository secrets → Add new secret\n" "Name: GROQ_API_KEY | Value: \n\n" "Then Restart/Restart this Space." ) st.stop() # ---------- Groq Client ---------- client = Groq(api_key=API_KEY) # ---------- Models / Index ---------- @st.cache_resource def load_embedder(): # 384-dim embeddings return SentenceTransformer("all-MiniLM-L6-v2") embedding_model = load_embedder() DIM = 384 # all-MiniLM-L6-v2 dimension if "faiss_index" not in st.session_state: st.session_state.faiss_index = faiss.IndexFlatL2(DIM) if "chunks_store" not in st.session_state: st.session_state.chunks_store = [] index = st.session_state.faiss_index chunks_store = st.session_state.chunks_store # ---------- Helpers ---------- def chunk_text(text: str, max_length: int = 500): """Simple whitespace chunker by character budget.""" words, chunks, cur = text.split(), [], [] for w in words: if len(" ".join(cur)) + len(w) + 1 <= max_length: cur.append(w) else: chunks.append(" ".join(cur)) cur = [w] if cur: chunks.append(" ".join(cur)) return chunks def embed_and_store(chunks): if not chunks: return embs = embedding_model.encode( chunks, convert_to_numpy=True, normalize_embeddings=False ).astype("float32") index.add(embs) chunks_store.extend(chunks) def query_llm(prompt: str) -> str: """Stream a response from Groq and return full text.""" stream = client.chat.completions.create( model="deepseek-r1-distill-llama-70b", messages=[ { "role": "system", "content": ( "Based on this WhatsApp chat, analyze whether this relationship is healthy or toxic." "Give a toxicity Score out of 10" "Highlight top 3 red flags" "1 positive aspect" "3 improvement suggestions" "Start every answer with: 'Toxicity score: X/10'." ), }, {"role": "user", "content": prompt}, ], temperature=0.6, max_completion_tokens=1024, top_p=0.95, stream=True, reasoning_format="raw", ) out = [] for chunk in stream: delta = chunk.choices[0].delta.content or "" out.append(delta) return "".join(out) # ---------- UI ---------- st.set_page_config(page_title="AI Relationship Counsellor", layout="centered") st.title("NLP Relationship Counsellor") uploaded_file = st.file_uploader("Upload a .txt export of your WhatsApp chat", type=["txt"]) if uploaded_file: text = uploaded_file.read().decode("utf-8", errors="ignore") st.success("Chat extracted successfully!") chunks = chunk_text(text) embed_and_store(chunks) user_query = st.text_input("Ask a question about your relationship:") if user_query: if index.ntotal == 0: st.warning("Nothing indexed yet. Please upload a chat file.") else: # top-k retrieval k = min(5, index.ntotal) q_emb = embedding_model.encode([user_query], convert_to_numpy=True).astype("float32") distances, idxs = index.search(q_emb, k) relevant = [chunks_store[i] for i in idxs[0] if 0 <= i < len(chunks_store)] context = " ".join(relevant) final_prompt = f"Context:\n{context}\n\nQuestion:\n{user_query}" with st.spinner("Analyzing…"): answer = query_llm(final_prompt) st.markdown("### AI Analysis") st.write(answer) else: st.info("Upload a WhatsApp chat (.txt) to begin.")