Spaces:
Sleeping
Sleeping
File size: 4,061 Bytes
b8144e8 9abfbc3 01b406f e880710 ef6e80f 01b406f 06075c2 b8144e8 9abfbc3 77c117a 9abfbc3 06075c2 b8144e8 06075c2 b8144e8 06075c2 b8144e8 06075c2 e880710 b8144e8 e880710 06075c2 b8144e8 06075c2 b8144e8 e880710 b8144e8 74fd9d5 06075c2 9abfbc3 06075c2 b8144e8 e880710 b8144e8 e880710 77c117a e880710 b8144e8 06075c2 b8144e8 77c117a b8144e8 06075c2 26699f1 06075c2 9abfbc3 06075c2 e880710 b8144e8 e880710 b8144e8 e880710 9abfbc3 b8144e8 06075c2 9abfbc3 b8144e8 06075c2 b8144e8 9abfbc3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import os
import numpy as np
import streamlit as st
import faiss
from sentence_transformers import SentenceTransformer
from groq import Groq
# ---------- Secrets / API Key ----------
API_KEY = os.getenv("GROQ_API_KEY")
if not API_KEY:
st.error(
"GROQ_API_KEY not found.\n\n"
"Go to your Space → Settings → Repository secrets → Add new secret\n"
"Name: GROQ_API_KEY | Value: <your Groq key>\n\n"
"Then Restart/Restart this Space."
)
st.stop()
# ---------- Groq Client ----------
client = Groq(api_key=API_KEY)
# ---------- Models / Index ----------
@st.cache_resource
def load_embedder():
# 384-dim embeddings
return SentenceTransformer("all-MiniLM-L6-v2")
embedding_model = load_embedder()
DIM = 384 # all-MiniLM-L6-v2 dimension
if "faiss_index" not in st.session_state:
st.session_state.faiss_index = faiss.IndexFlatL2(DIM)
if "chunks_store" not in st.session_state:
st.session_state.chunks_store = []
index = st.session_state.faiss_index
chunks_store = st.session_state.chunks_store
# ---------- Helpers ----------
def chunk_text(text: str, max_length: int = 500):
"""Simple whitespace chunker by character budget."""
words, chunks, cur = text.split(), [], []
for w in words:
if len(" ".join(cur)) + len(w) + 1 <= max_length:
cur.append(w)
else:
chunks.append(" ".join(cur))
cur = [w]
if cur:
chunks.append(" ".join(cur))
return chunks
def embed_and_store(chunks):
if not chunks:
return
embs = embedding_model.encode(
chunks, convert_to_numpy=True, normalize_embeddings=False
).astype("float32")
index.add(embs)
chunks_store.extend(chunks)
def query_llm(prompt: str) -> str:
"""Stream a response from Groq and return full text."""
stream = client.chat.completions.create(
model="deepseek-r1-distill-llama-70b",
messages=[
{
"role": "system",
"content": (
"Based on this WhatsApp chat, analyze whether this relationship is healthy or toxic."
"Give a toxicity Score out of 10"
"Highlight top 3 red flags"
"1 positive aspect"
"3 improvement suggestions"
"Start every answer with: 'Toxicity score: X/10'."
),
},
{"role": "user", "content": prompt},
],
temperature=0.6,
max_completion_tokens=1024,
top_p=0.95,
stream=True,
reasoning_format="raw",
)
out = []
for chunk in stream:
delta = chunk.choices[0].delta.content or ""
out.append(delta)
return "".join(out)
# ---------- UI ----------
st.set_page_config(page_title="AI Relationship Counsellor", layout="centered")
st.title("NLP Relationship Counsellor")
uploaded_file = st.file_uploader("Upload a .txt export of your WhatsApp chat", type=["txt"])
if uploaded_file:
text = uploaded_file.read().decode("utf-8", errors="ignore")
st.success("Chat extracted successfully!")
chunks = chunk_text(text)
embed_and_store(chunks)
user_query = st.text_input("Ask a question about your relationship:")
if user_query:
if index.ntotal == 0:
st.warning("Nothing indexed yet. Please upload a chat file.")
else:
# top-k retrieval
k = min(5, index.ntotal)
q_emb = embedding_model.encode([user_query], convert_to_numpy=True).astype("float32")
distances, idxs = index.search(q_emb, k)
relevant = [chunks_store[i] for i in idxs[0] if 0 <= i < len(chunks_store)]
context = " ".join(relevant)
final_prompt = f"Context:\n{context}\n\nQuestion:\n{user_query}"
with st.spinner("Analyzing…"):
answer = query_llm(final_prompt)
st.markdown("### AI Analysis")
st.write(answer)
else:
st.info("Upload a WhatsApp chat (.txt) to begin.")
|