Shubham170793 commited on
Commit
f5088d3
Β·
verified Β·
1 Parent(s): 499bde3

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +72 -1
src/streamlit_app.py CHANGED
@@ -90,6 +90,54 @@ from ingestion import extract_text_from_pdf, chunk_text
90
  from vectorstore import build_faiss_index
91
  from qa import retrieve_chunks, generate_answer, cache_embeddings, embed_chunks
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  # ==========================================================
94
  # πŸ“ Paths
95
  # ==========================================================
@@ -164,6 +212,16 @@ elif doc_choice == "Sample PDF":
164
  toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
165
  st.text_area("TOC Preview", toc_text, height=200)
166
 
 
 
 
 
 
 
 
 
 
 
167
  # βœ… Cached Embeddings
168
  with st.spinner("βš™οΈ Loading cached embeddings or generating new ones..."):
169
  embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
@@ -194,6 +252,16 @@ elif doc_choice == "Upload Custom PDF":
194
  toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
195
  st.text_area("TOC Preview", toc_text, height=200)
196
 
 
 
 
 
 
 
 
 
 
 
197
  with st.spinner("βš™οΈ Loading cached embeddings or generating new ones..."):
198
  embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
199
  hash_name = hashlib.md5(os.path.basename(temp_path).encode()).hexdigest()
@@ -222,7 +290,10 @@ if index and chunks:
222
  st.markdown("---")
223
  st.subheader("πŸ€– Ask a Question")
224
 
225
- user_query = st.text_input("πŸ” Your question about the document:")
 
 
 
226
 
227
  if user_query:
228
  mode_label = (
 
90
  from vectorstore import build_faiss_index
91
  from qa import retrieve_chunks, generate_answer, cache_embeddings, embed_chunks
92
 
93
+ # ==========================================================
94
+ # 🧠 TOC-Based Smart Question Generator
95
+ # ==========================================================
96
+ def clean_toc_titles(toc):
97
+ """Removes section numbers and keeps only meaningful text."""
98
+ clean_titles = []
99
+ for _, title in toc:
100
+ title = re.sub(r"^\d+(\.\d+)*\s*", "", title) # remove numbering like 3.1
101
+ title = title.strip()
102
+ if len(title) > 3:
103
+ clean_titles.append(title)
104
+ return clean_titles
105
+
106
+
107
+ def generate_query_suggestions(toc_titles):
108
+ """Converts section titles into conversational question suggestions."""
109
+ suggestions = []
110
+ for t in toc_titles:
111
+ lower = t.lower()
112
+
113
+ if "prerequisite" in lower:
114
+ suggestions.append("What are the prerequisites for setting this up?")
115
+ elif "restriction" in lower or "limitation" in lower:
116
+ suggestions.append("What are the key restrictions or limitations?")
117
+ elif "configuration" in lower or "setup" in lower:
118
+ suggestions.append(f"How do I {t.lower()}?")
119
+ elif "overview" in lower or "introduction" in lower:
120
+ suggestions.append("Can you give me an overview of this document?")
121
+ elif "purpose" in lower:
122
+ suggestions.append("What is the purpose of this guide?")
123
+ elif "example" in lower:
124
+ suggestions.append("Can you show an example from this document?")
125
+ elif "process" in lower:
126
+ suggestions.append(f"Can you explain the {t.lower()} process?")
127
+ elif "use" in lower:
128
+ suggestions.append(f"How do I {t.lower()}?")
129
+ else:
130
+ suggestions.append(f"Explain the section about {t.lower()}.")
131
+
132
+ # Deduplicate & limit
133
+ seen, final = set(), []
134
+ for s in suggestions:
135
+ if s not in seen:
136
+ seen.add(s)
137
+ final.append(s)
138
+ return final[:6] # Show top 6
139
+
140
+
141
  # ==========================================================
142
  # πŸ“ Paths
143
  # ==========================================================
 
212
  toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
213
  st.text_area("TOC Preview", toc_text, height=200)
214
 
215
+ # πŸ’‘ Generate and display smart suggestions
216
+ clean_titles = clean_toc_titles(toc)
217
+ query_suggestions = generate_query_suggestions(clean_titles)
218
+ if query_suggestions:
219
+ st.markdown("#### πŸ’‘ Suggested Questions")
220
+ cols = st.columns(2)
221
+ for i, q in enumerate(query_suggestions):
222
+ if cols[i % 2].button(f"πŸ” {q}"):
223
+ st.session_state["user_query"] = q
224
+
225
  # βœ… Cached Embeddings
226
  with st.spinner("βš™οΈ Loading cached embeddings or generating new ones..."):
227
  embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
 
252
  toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
253
  st.text_area("TOC Preview", toc_text, height=200)
254
 
255
+ # πŸ’‘ Generate and display smart suggestions
256
+ clean_titles = clean_toc_titles(toc)
257
+ query_suggestions = generate_query_suggestions(clean_titles)
258
+ if query_suggestions:
259
+ st.markdown("#### πŸ’‘ Suggested Questions")
260
+ cols = st.columns(2)
261
+ for i, q in enumerate(query_suggestions):
262
+ if cols[i % 2].button(f"πŸ” {q}"):
263
+ st.session_state["user_query"] = q
264
+
265
  with st.spinner("βš™οΈ Loading cached embeddings or generating new ones..."):
266
  embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
267
  hash_name = hashlib.md5(os.path.basename(temp_path).encode()).hexdigest()
 
290
  st.markdown("---")
291
  st.subheader("πŸ€– Ask a Question")
292
 
293
+ user_query = st.text_input(
294
+ "πŸ” Your question about the document:",
295
+ value=st.session_state.get("user_query", "")
296
+ )
297
 
298
  if user_query:
299
  mode_label = (