devilsa commited on
Commit
06075c2
·
verified ·
1 Parent(s): 74fd9d5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -64
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # app.py
2
  import os
3
  import numpy as np
4
  import streamlit as st
@@ -6,62 +5,31 @@ import faiss
6
  from sentence_transformers import SentenceTransformer
7
  from groq import Groq
8
 
9
- # ------------------------- Secrets / API Key -------------------------
10
  API_KEY = os.getenv("GROQ_API_KEY")
11
  if not API_KEY:
12
  st.error(
13
  "GROQ_API_KEY not found.\n\n"
14
  "Go to your Space → Settings → Repository secrets → Add new secret\n"
15
  "Name: GROQ_API_KEY | Value: <your Groq key>\n\n"
16
- "Then Restart this Space."
17
  )
18
  st.stop()
19
 
20
- # ------------------------- Groq Client -------------------------
21
  client = Groq(api_key=API_KEY)
22
 
23
- # ------------------------- System Prompt (concise) -------------------------
24
- SYSTEM_MSG = """
25
- You are a relationship counselor. Read the provided WhatsApp chat context and answer the user’s question.
26
- Be CONCISE and ACTIONABLE. Do not include internal reasoning or long explanations.
27
-
28
- Return Markdown in exactly this format:
29
-
30
- Toxicity score: X/10
31
-
32
- **1) One-line summary**
33
- - <max 25 words>
34
-
35
- **2) Top red flags (max 3)**
36
- - <short phrase + why it matters>
37
- - <short phrase + why it matters>
38
- - <short phrase + why it matters>
39
-
40
- **3) Greens (if any, max 2)**
41
- - <short phrase>
42
- - <short phrase>
43
-
44
- **4) Next steps (max 3)**
45
- - <clear, practical action>
46
- - <clear, practical action>
47
- - <clear, practical action>
48
-
49
- Rules:
50
- - Keep total answer under 120 words.
51
- - Use plain language; no therapy jargon.
52
- - If signs of abuse, add: “**Safety note:** consider talking to a trusted person/professional.”
53
- """
54
-
55
- # ------------------------- Embeddings / FAISS -------------------------
56
  @st.cache_resource
57
  def load_embedder():
58
- return SentenceTransformer("all-MiniLM-L6-v2") # 384-dim
 
59
 
60
  embedding_model = load_embedder()
61
- EMB_DIM = 384
62
 
 
63
  if "faiss_index" not in st.session_state:
64
- st.session_state.faiss_index = faiss.IndexFlatL2(EMB_DIM)
65
  if "chunks_store" not in st.session_state:
66
  st.session_state.chunks_store = []
67
 
@@ -69,8 +37,9 @@ index = st.session_state.faiss_index
69
  chunks_store = st.session_state.chunks_store
70
 
71
 
72
- # ------------------------- Helpers -------------------------
73
  def chunk_text(text: str, max_length: int = 500):
 
74
  words, chunks, cur = text.split(), [], []
75
  for w in words:
76
  if len(" ".join(cur)) + len(w) + 1 <= max_length:
@@ -86,33 +55,48 @@ def chunk_text(text: str, max_length: int = 500):
86
  def embed_and_store(chunks):
87
  if not chunks:
88
  return
89
- embs = embedding_model.encode(chunks, convert_to_numpy=True).astype("float32")
 
 
90
  index.add(embs)
91
  chunks_store.extend(chunks)
92
 
93
 
94
  def query_llm(prompt: str) -> str:
95
- """Non-streaming version for stability on Hugging Face."""
96
- try:
97
- completion = client.chat.completions.create(
98
- model="deepseek-r1-distill-llama-70b",
99
- messages=[
100
- {"role": "system", "content": SYSTEM_MSG},
101
- {"role": "user", "content": prompt},
102
- ],
103
- temperature=0.5,
104
- max_completion_tokens=300,
105
- top_p=0.9,
106
- )
107
- # Return final text directly
108
- return completion.choices[0].message.content.strip()
109
- except Exception as e:
110
- return f"⚠️ **Error:** {str(e)}"
111
-
112
-
113
- # ------------------------- UI -------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  st.set_page_config(page_title="AI Relationship Counsellor", layout="centered")
115
- st.title("AI Relationship Counsellor")
116
 
117
  uploaded_file = st.file_uploader("Upload a .txt export of your WhatsApp chat", type=["txt"])
118
 
@@ -128,7 +112,7 @@ if uploaded_file:
128
  if index.ntotal == 0:
129
  st.warning("Nothing indexed yet. Please upload a chat file.")
130
  else:
131
- # Retrieve top-k relevant chunks
132
  k = min(5, index.ntotal)
133
  q_emb = embedding_model.encode([user_query], convert_to_numpy=True).astype("float32")
134
  distances, idxs = index.search(q_emb, k)
@@ -137,7 +121,7 @@ if uploaded_file:
137
  context = " ".join(relevant)
138
  final_prompt = f"Context:\n{context}\n\nQuestion:\n{user_query}"
139
 
140
- with st.spinner("Analyzing… please wait."):
141
  answer = query_llm(final_prompt)
142
 
143
  st.markdown("### AI Analysis")
 
 
1
  import os
2
  import numpy as np
3
  import streamlit as st
 
5
  from sentence_transformers import SentenceTransformer
6
  from groq import Groq
7
 
8
+ # ---------- Secrets / API Key ----------
9
  API_KEY = os.getenv("GROQ_API_KEY")
10
  if not API_KEY:
11
  st.error(
12
  "GROQ_API_KEY not found.\n\n"
13
  "Go to your Space → Settings → Repository secrets → Add new secret\n"
14
  "Name: GROQ_API_KEY | Value: <your Groq key>\n\n"
15
+ "Then Restart/Restart this Space."
16
  )
17
  st.stop()
18
 
19
+ # ---------- Groq Client ----------
20
  client = Groq(api_key=API_KEY)
21
 
22
+ # ---------- Models / Index ----------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  @st.cache_resource
24
  def load_embedder():
25
+ # 384-dim embeddings
26
+ return SentenceTransformer("all-MiniLM-L6-v2")
27
 
28
  embedding_model = load_embedder()
 
29
 
30
+ DIM = 384 # all-MiniLM-L6-v2 dimension
31
  if "faiss_index" not in st.session_state:
32
+ st.session_state.faiss_index = faiss.IndexFlatL2(DIM)
33
  if "chunks_store" not in st.session_state:
34
  st.session_state.chunks_store = []
35
 
 
37
  chunks_store = st.session_state.chunks_store
38
 
39
 
40
+ # ---------- Helpers ----------
41
  def chunk_text(text: str, max_length: int = 500):
42
+ """Simple whitespace chunker by character budget."""
43
  words, chunks, cur = text.split(), [], []
44
  for w in words:
45
  if len(" ".join(cur)) + len(w) + 1 <= max_length:
 
55
  def embed_and_store(chunks):
56
  if not chunks:
57
  return
58
+ embs = embedding_model.encode(
59
+ chunks, convert_to_numpy=True, normalize_embeddings=False
60
+ ).astype("float32")
61
  index.add(embs)
62
  chunks_store.extend(chunks)
63
 
64
 
65
  def query_llm(prompt: str) -> str:
66
+ """Stream a response from Groq and return full text."""
67
+ stream = client.chat.completions.create(
68
+ model="deepseek-r1-distill-llama-70b",
69
+ messages=[
70
+ {
71
+ "role": "system",
72
+ "content": (
73
+ "Based on this WhatsApp chat, analyze whether this relationship is healthy or toxic."
74
+ "Give a toxicity Score out of 10"
75
+ "Highlight top 3 red flags"
76
+ "1 positive aspect"
77
+ "3 improvement suggestions"
78
+ "Start every answer with: 'Toxicity score: X/10'".
79
+
80
+ ),
81
+ },
82
+ {"role": "user", "content": prompt},
83
+ ],
84
+ temperature=0.6,
85
+ max_completion_tokens=1024,
86
+ top_p=0.95,
87
+ stream=True,
88
+ reasoning_format="raw",
89
+ )
90
+ out = []
91
+ for chunk in stream:
92
+ delta = chunk.choices[0].delta.content or ""
93
+ out.append(delta)
94
+ return "".join(out)
95
+
96
+
97
+ # ---------- UI ----------
98
  st.set_page_config(page_title="AI Relationship Counsellor", layout="centered")
99
+ st.title("NLP Relationship Counsellor")
100
 
101
  uploaded_file = st.file_uploader("Upload a .txt export of your WhatsApp chat", type=["txt"])
102
 
 
112
  if index.ntotal == 0:
113
  st.warning("Nothing indexed yet. Please upload a chat file.")
114
  else:
115
+ # top-k retrieval
116
  k = min(5, index.ntotal)
117
  q_emb = embedding_model.encode([user_query], convert_to_numpy=True).astype("float32")
118
  distances, idxs = index.search(q_emb, k)
 
121
  context = " ".join(relevant)
122
  final_prompt = f"Context:\n{context}\n\nQuestion:\n{user_query}"
123
 
124
+ with st.spinner("Analyzing…"):
125
  answer = query_llm(final_prompt)
126
 
127
  st.markdown("### AI Analysis")