Shubham170793 commited on
Commit
96e2f7f
Β·
verified Β·
1 Parent(s): 361f68a

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +22 -98
src/streamlit_app.py CHANGED
@@ -7,20 +7,6 @@ import streamlit as st
7
  import torch
8
  from document_registry import DocumentRegistry
9
 
10
- # --- Initialize session keys to avoid rerun issues ---
11
- if "registry" not in st.session_state:
12
- st.session_state.registry = DocumentRegistry()
13
- if "active_doc" not in st.session_state:
14
- st.session_state.active_doc = None
15
- if "chunks" not in st.session_state:
16
- st.session_state.chunks = []
17
- if "embeddings" not in st.session_state:
18
- st.session_state.embeddings = None
19
- if "index" not in st.session_state:
20
- st.session_state.index = None
21
- if "query_suggestions_fixed" not in st.session_state:
22
- st.session_state.query_suggestions_fixed = []
23
-
24
 
25
  # ==========================================================
26
  # βœ… PAGE CONFIGS
@@ -100,44 +86,6 @@ Output: Write each question on a new line. Do not invent facts β€” base question
100
  except Exception:
101
  return ["How do I start using this guide?", "What does this document cover?"]
102
 
103
- # --- Function to activate a document from the registry ---
104
- def activate_document(doc_name):
105
- """Load an existing registered document into the active context."""
106
- registry = st.session_state.registry
107
- doc = registry.get_doc(doc_name)
108
-
109
- if not doc:
110
- st.warning(f"Document {doc_name} not found in registry.")
111
- return
112
-
113
- # Load all available preprocessed data
114
- st.session_state.active_doc = doc
115
- st.session_state.chunks = doc.get("chunks", [])
116
- st.session_state.embeddings = doc.get("embeddings")
117
- st.session_state.index = doc.get("index")
118
-
119
- # --- Handle TOC safely ---
120
- # Some docs may not have toc saved in registry; fallback to empty list
121
- toc_data = doc.get("toc", [])
122
- if not toc_data:
123
- st.info(f"⚠️ No TOC found for {doc_name}. Using empty TOC for suggestions.")
124
-
125
- # --- Regenerate suggestions safely ---
126
- try:
127
- st.session_state.query_suggestions_fixed = generate_dynamic_suggestions_from_toc(
128
- toc_data,
129
- st.session_state.chunks,
130
- doc.get("name", "Unknown Document")
131
- )
132
- except Exception as e:
133
- # Defensive guard: even if suggestion generator fails, app won't break
134
- st.warning(f"Suggestion generation failed: {e}")
135
- st.session_state.query_suggestions_fixed = []
136
-
137
- # --- Trigger a clean UI rerun so the switch shows immediately ---
138
- st.rerun()
139
-
140
-
141
 
142
  # ==========================================================
143
  # 🎨 STYLING β€” REVERT TO ORIGINAL
@@ -189,6 +137,7 @@ h1, h2, h3 {color: #f3f4f6; font-weight: 600;}
189
  # 🧭 SIDEBAR
190
  # ==========================================================
191
  with st.sidebar:
 
192
  st.markdown("### 🧭 Response Style")
193
  mode = st.radio(
194
  "",
@@ -199,50 +148,23 @@ with st.sidebar:
199
 
200
  st.markdown("---")
201
 
202
- # 🧩 Document Registry Viewer
203
- if "registry" in st.session_state:
204
- registry = st.session_state["registry"]
205
- registered_docs = registry.list_docs() if hasattr(registry, "list_docs") else []
206
-
207
- if registered_docs:
208
- st.markdown("### πŸ“š Registered Documents")
209
-
210
- # Loop through registered documents
211
- for i, doc in enumerate(registered_docs, start=1):
212
- doc_name = doc.get("name", f"Document {i}")
213
- chunks = doc.get("num_chunks", "?")
214
- toc_source = doc.get("toc_source", "β€”")
215
-
216
- # Use an expander for each document
217
- with st.expander(f"{i}. {doc_name}", expanded=False):
218
- st.markdown(f"- 🧩 **Chunks:** {chunks}")
219
- st.markdown(f"- πŸ—‚οΈ **TOC Source:** {toc_source}")
220
-
221
- # If this document is currently active, show status
222
- if (
223
- "active_doc" in st.session_state
224
- and st.session_state.active_doc
225
- and isinstance(st.session_state.active_doc, dict)
226
- and st.session_state.active_doc.get("name") == doc_name
227
-
228
- ):
229
- st.success("βœ… Active Document")
230
- else:
231
- # Button to activate this document
232
- st.button(
233
- "Activate",
234
- key=f"activate_{doc_name}",
235
- on_click=activate_document,
236
- args=(doc_name,),
237
- )
238
- else:
239
- st.caption("πŸ“­ No documents registered yet.")
240
  else:
241
- st.caption("πŸ“­ No registry initialized yet.")
242
 
243
  st.markdown("---")
244
 
245
- # 🧠 Developer Options Section
246
  show_dev = st.checkbox("Show advanced settings (for developers)", value=False)
247
  if show_dev:
248
  st.markdown("### βš™οΈ Developer Options")
@@ -255,7 +177,7 @@ with st.sidebar:
255
  st.markdown("---")
256
  st.caption("✨ Built by Shubham Sharma")
257
 
258
- # 🧩 Developer Insights
259
  if show_dev:
260
  st.markdown("---")
261
  with st.expander("🧩 Developer Insights", expanded=False):
@@ -278,6 +200,7 @@ with st.sidebar:
278
  st.caption(f"{len(st.session_state.get('chunks', []))} chunks processed.")
279
 
280
 
 
281
  # ==========================================================
282
  # 🧠 SESSION STATE SAFETY INITIALIZATION
283
  # ==========================================================
@@ -351,12 +274,13 @@ else:
351
  embeddings = cache_embeddings(doc_name, chunks, embed_chunks)
352
  index = build_faiss_index(embeddings)
353
 
354
- if "registry" not in st.session_state:
355
- st.session_state["registry"] = DocumentRegistry()
 
 
356
 
357
- registry = st.session_state["registry"]
358
- doc_id = registry.register(temp_path, chunks, embeddings, index)
359
- st.session_state["active_doc"] = doc_id
360
 
361
  status.success("βœ… Document processed successfully β€” all set to query your assistant!")
362
 
 
7
  import torch
8
  from document_registry import DocumentRegistry
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  # ==========================================================
12
  # βœ… PAGE CONFIGS
 
86
  except Exception:
87
  return ["How do I start using this guide?", "What does this document cover?"]
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  # ==========================================================
91
  # 🎨 STYLING β€” REVERT TO ORIGINAL
 
137
  # 🧭 SIDEBAR
138
  # ==========================================================
139
  with st.sidebar:
140
+ # --- Response Style ---
141
  st.markdown("### 🧭 Response Style")
142
  mode = st.radio(
143
  "",
 
148
 
149
  st.markdown("---")
150
 
151
+ # --- Registered Documents (simple, user-friendly) ---
152
+ registry = st.session_state.get("registry", None)
153
+ registered_docs = registry.list_docs() if registry and hasattr(registry, "list_docs") else []
154
+
155
+ if registered_docs:
156
+ st.markdown("### πŸ“š Uploaded Documents")
157
+ for i, doc in enumerate(registered_docs, start=1):
158
+ doc_name = doc.get("name", f"Document {i}")
159
+ chunks = doc.get("num_chunks", "?")
160
+ toc_source = doc.get("toc_source", "β€”")
161
+ st.markdown(f"**{i}. {doc_name}** β€” {chunks} chunks *(TOC: {toc_source})*")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  else:
163
+ st.caption("πŸ“­ No documents uploaded yet.")
164
 
165
  st.markdown("---")
166
 
167
+ # --- Developer Options ---
168
  show_dev = st.checkbox("Show advanced settings (for developers)", value=False)
169
  if show_dev:
170
  st.markdown("### βš™οΈ Developer Options")
 
177
  st.markdown("---")
178
  st.caption("✨ Built by Shubham Sharma")
179
 
180
+ # --- Developer Insights (optional, hidden by default) ---
181
  if show_dev:
182
  st.markdown("---")
183
  with st.expander("🧩 Developer Insights", expanded=False):
 
200
  st.caption(f"{len(st.session_state.get('chunks', []))} chunks processed.")
201
 
202
 
203
+
204
  # ==========================================================
205
  # 🧠 SESSION STATE SAFETY INITIALIZATION
206
  # ==========================================================
 
274
  embeddings = cache_embeddings(doc_name, chunks, embed_chunks)
275
  index = build_faiss_index(embeddings)
276
 
277
+ registry = st.session_state.get("registry")
278
+ if not registry:
279
+ registry = DocumentRegistry()
280
+ st.session_state["registry"] = registry
281
 
282
+ registry.register(temp_path, chunks, embeddings, index)
283
+
 
284
 
285
  status.success("βœ… Document processed successfully β€” all set to query your assistant!")
286