devilsa commited on
Commit
b8144e8
·
verified ·
1 Parent(s): 9ce3a09

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -58
app.py CHANGED
@@ -1,51 +1,73 @@
 
1
  import streamlit as st
2
  import faiss
 
3
  from sentence_transformers import SentenceTransformer
4
  from groq import Groq
5
 
6
- # Initialize Groq API
7
- client = Groq(api_key="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") # Ensure your API key is valid
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- # Initialize Sentence Transformer
10
- embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
11
 
12
- # FAISS Index
13
- dimension = 384 # Embedding dimension of the model
14
- index = faiss.IndexFlatL2(dimension)
 
 
 
15
 
16
- # Function to chunk text
 
 
 
17
  def chunk_text(text, max_length=500):
18
- words = text.split()
19
- chunks = []
20
- chunk = []
21
- for word in words:
22
- if len(" ".join(chunk)) + len(word) <= max_length:
23
- chunk.append(word)
24
  else:
25
- chunks.append(" ".join(chunk))
26
- chunk = [word]
27
- if chunk:
28
- chunks.append(" ".join(chunk))
29
  return chunks
30
 
31
- # Function to embed text and add to FAISS index
32
  def embed_and_store(chunks):
33
- embeddings = embedding_model.encode(chunks)
34
- index.add(embeddings)
35
-
36
- # Query handling using Groq's streaming completions
37
- def query_llm(prompt):
38
- # Create a completion request using the Groq model
39
- completion = client.chat.completions.create(
40
- model="deepseek-r1-distill-llama-70b", # Use the provided Groq model
 
 
 
 
41
  messages=[
42
  {
43
  "role": "system",
44
  "content": (
45
- "You are a relationship counselor. Analyze the given WhatsApp conversation "
46
- "and provide insights on potential red flags, toxicity, and room for improvement in behavior. "
47
- "Every response must start by rating the overall chat toxicity out of 10."
48
- )
49
  },
50
  {"role": "user", "content": prompt},
51
  ],
@@ -53,41 +75,43 @@ def query_llm(prompt):
53
  max_completion_tokens=1024,
54
  top_p=0.95,
55
  stream=True,
56
- reasoning_format="raw"
57
  )
58
-
59
- # Stream and collect the response
60
- full_response = ""
61
- for chunk in completion:
62
- full_response += chunk.choices[0].delta.content or ""
63
- return full_response
64
-
65
- # Streamlit App
66
  st.title("AI Relationship Counsellor")
67
 
68
- uploaded_file = st.file_uploader("Upload a text file of your WhatsApp chat", type=["txt"])
69
 
70
  if uploaded_file:
71
- text = uploaded_file.read().decode("utf-8")
72
- st.write("Chat Extracted Successfully!")
73
 
74
- # Chunk and embed text
75
  chunks = chunk_text(text)
76
  embed_and_store(chunks)
77
 
78
- # Query Interface
79
  user_query = st.text_input("Ask a question about your relationship:")
80
  if user_query:
81
- # Embed query and search FAISS for the top 5 relevant chunks
82
- query_embedding = embedding_model.encode([user_query])
83
- distances, indices = index.search(query_embedding, k=5)
84
- relevant_chunks = [chunks[i] for i in indices[0]]
85
-
86
- # Combine chunks to form context
87
- context = " ".join(relevant_chunks)
88
- final_prompt = f"Context: {context}\n\nQuestion: {user_query}"
89
-
90
- # Get response from the Groq model
91
- response = query_llm(final_prompt)
92
- st.write("### AI Analysis")
93
- st.write(response)
 
 
 
 
 
 
1
+ import os
2
  import streamlit as st
3
  import faiss
4
+ import numpy as np
5
  from sentence_transformers import SentenceTransformer
6
  from groq import Groq
7
 
8
+ # --- Load API key from environment (HF Repo Secrets) ---
9
+ API_KEY = os.getenv("GROQ_API_KEY")
10
+ if not API_KEY:
11
+ st.error(
12
+ "GROQ_API_KEY not found. In your Space go to: "
13
+ "Settings → Repository secrets → Add new secret (Name: GROQ_API_KEY, Value: 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)."
14
+ )
15
+ st.stop()
16
+
17
+ # --- Init Groq client (no key printed/logged) ---
18
+ client = Groq(api_key=API_KEY)
19
+
20
+ # --- Cache the embedding model to speed up reloads ---
21
+ @st.cache_resource
22
+ def load_embedder():
23
+ return SentenceTransformer("all-MiniLM-L6-v2")
24
 
25
+ embedding_model = load_embedder()
 
26
 
27
+ # --- FAISS index (384 dims for MiniLM) ---
28
+ dimension = 384
29
+ if "faiss_index" not in st.session_state:
30
+ st.session_state.faiss_index = faiss.IndexFlatL2(dimension)
31
+ if "chunks_store" not in st.session_state:
32
+ st.session_state.chunks_store = []
33
 
34
+ index = st.session_state.faiss_index
35
+ chunks_store = st.session_state.chunks_store
36
+
37
+ # ---- Utilities ----
38
  def chunk_text(text, max_length=500):
39
+ words, chunks, cur = text.split(), [], []
40
+ for w in words:
41
+ if len(" ".join(cur)) + len(w) + 1 <= max_length:
42
+ cur.append(w)
 
 
43
  else:
44
+ chunks.append(" ".join(cur))
45
+ cur = [w]
46
+ if cur:
47
+ chunks.append(" ".join(cur))
48
  return chunks
49
 
 
50
  def embed_and_store(chunks):
51
+ if not chunks:
52
+ return
53
+ embs = embedding_model.encode(chunks, convert_to_numpy=True, normalize_embeddings=False)
54
+ # Ensure float32 for FAISS
55
+ embs = np.asarray(embs, dtype="float32")
56
+ index.add(embs)
57
+ chunks_store.extend(chunks)
58
+
59
+ def query_llm(prompt: str) -> str:
60
+ # Streaming chat completion
61
+ stream = client.chat.completions.create(
62
+ model="deepseek-r1-distill-llama-70b",
63
  messages=[
64
  {
65
  "role": "system",
66
  "content": (
67
+ "You are a relationship counselor. Analyze the WhatsApp conversation and "
68
+ "provide insights on red flags, toxicity, and improvements. "
69
+ "Start every answer with: 'Toxicity score: X/10'."
70
+ ),
71
  },
72
  {"role": "user", "content": prompt},
73
  ],
 
75
  max_completion_tokens=1024,
76
  top_p=0.95,
77
  stream=True,
78
+ reasoning_format="raw",
79
  )
80
+ out = []
81
+ for chunk in stream:
82
+ delta = chunk.choices[0].delta.content or ""
83
+ out.append(delta)
84
+ return "".join(out)
85
+
86
+ # ---- UI ----
 
87
  st.title("AI Relationship Counsellor")
88
 
89
+ uploaded_file = st.file_uploader("Upload a .txt export of your WhatsApp chat", type=["txt"])
90
 
91
  if uploaded_file:
92
+ text = uploaded_file.read().decode("utf-8", errors="ignore")
93
+ st.success("Chat extracted successfully!")
94
 
 
95
  chunks = chunk_text(text)
96
  embed_and_store(chunks)
97
 
 
98
  user_query = st.text_input("Ask a question about your relationship:")
99
  if user_query:
100
+ # Search top-k relevant chunks
101
+ k = min(5, index.ntotal) if index.ntotal > 0 else 0
102
+ if k == 0:
103
+ st.warning("No text indexed yet. Please upload a chat file.")
104
+ else:
105
+ q_emb = embedding_model.encode([user_query], convert_to_numpy=True)
106
+ q_emb = np.asarray(q_emb, dtype="float32")
107
+ distances, idxs = index.search(q_emb, k)
108
+ relevant = [chunks_store[i] for i in idxs[0] if 0 <= i < len(chunks_store)]
109
+
110
+ context = " ".join(relevant)
111
+ final_prompt = f"Context:\n{context}\n\nQuestion:\n{user_query}"
112
+
113
+ with st.spinner("Analyzing…"):
114
+ answer = query_llm(final_prompt)
115
+
116
+ st.markdown("### AI Analysis")
117
+ st.write(answer)