devilsa's picture
Update app.py
26699f1 verified
import os
import numpy as np
import streamlit as st
import faiss
from sentence_transformers import SentenceTransformer
from groq import Groq
# ---------- Secrets / API Key ----------
API_KEY = os.getenv("GROQ_API_KEY")
if not API_KEY:
st.error(
"GROQ_API_KEY not found.\n\n"
"Go to your Space → Settings → Repository secrets → Add new secret\n"
"Name: GROQ_API_KEY | Value: <your Groq key>\n\n"
"Then Restart/Restart this Space."
)
st.stop()
# ---------- Groq Client ----------
client = Groq(api_key=API_KEY)
# ---------- Models / Index ----------
@st.cache_resource
def load_embedder():
# 384-dim embeddings
return SentenceTransformer("all-MiniLM-L6-v2")
embedding_model = load_embedder()
DIM = 384 # all-MiniLM-L6-v2 dimension
if "faiss_index" not in st.session_state:
st.session_state.faiss_index = faiss.IndexFlatL2(DIM)
if "chunks_store" not in st.session_state:
st.session_state.chunks_store = []
index = st.session_state.faiss_index
chunks_store = st.session_state.chunks_store
# ---------- Helpers ----------
def chunk_text(text: str, max_length: int = 500):
"""Simple whitespace chunker by character budget."""
words, chunks, cur = text.split(), [], []
for w in words:
if len(" ".join(cur)) + len(w) + 1 <= max_length:
cur.append(w)
else:
chunks.append(" ".join(cur))
cur = [w]
if cur:
chunks.append(" ".join(cur))
return chunks
def embed_and_store(chunks):
if not chunks:
return
embs = embedding_model.encode(
chunks, convert_to_numpy=True, normalize_embeddings=False
).astype("float32")
index.add(embs)
chunks_store.extend(chunks)
def query_llm(prompt: str) -> str:
"""Stream a response from Groq and return full text."""
stream = client.chat.completions.create(
model="deepseek-r1-distill-llama-70b",
messages=[
{
"role": "system",
"content": (
"Based on this WhatsApp chat, analyze whether this relationship is healthy or toxic."
"Give a toxicity Score out of 10"
"Highlight top 3 red flags"
"1 positive aspect"
"3 improvement suggestions"
"Start every answer with: 'Toxicity score: X/10'."
),
},
{"role": "user", "content": prompt},
],
temperature=0.6,
max_completion_tokens=1024,
top_p=0.95,
stream=True,
reasoning_format="raw",
)
out = []
for chunk in stream:
delta = chunk.choices[0].delta.content or ""
out.append(delta)
return "".join(out)
# ---------- UI ----------
st.set_page_config(page_title="AI Relationship Counsellor", layout="centered")
st.title("NLP Relationship Counsellor")
uploaded_file = st.file_uploader("Upload a .txt export of your WhatsApp chat", type=["txt"])
if uploaded_file:
text = uploaded_file.read().decode("utf-8", errors="ignore")
st.success("Chat extracted successfully!")
chunks = chunk_text(text)
embed_and_store(chunks)
user_query = st.text_input("Ask a question about your relationship:")
if user_query:
if index.ntotal == 0:
st.warning("Nothing indexed yet. Please upload a chat file.")
else:
# top-k retrieval
k = min(5, index.ntotal)
q_emb = embedding_model.encode([user_query], convert_to_numpy=True).astype("float32")
distances, idxs = index.search(q_emb, k)
relevant = [chunks_store[i] for i in idxs[0] if 0 <= i < len(chunks_store)]
context = " ".join(relevant)
final_prompt = f"Context:\n{context}\n\nQuestion:\n{user_query}"
with st.spinner("Analyzing…"):
answer = query_llm(final_prompt)
st.markdown("### AI Analysis")
st.write(answer)
else:
st.info("Upload a WhatsApp chat (.txt) to begin.")