Shubham170793 commited on
Commit
043784e
Β·
verified Β·
1 Parent(s): a87194e

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +51 -72
src/streamlit_app.py CHANGED
@@ -5,7 +5,7 @@ import streamlit as st
5
  import torch
6
 
7
  # ==========================================================
8
- # βœ… Environment Diagnostics
9
  # ==========================================================
10
  print("CUDA available:", torch.cuda.is_available())
11
  if torch.cuda.is_available():
@@ -13,20 +13,13 @@ if torch.cuda.is_available():
13
  else:
14
  print("Running on CPU")
15
 
16
- # ==========================================================
17
- # βœ… Page Configuration
18
- # ==========================================================
19
  st.set_page_config(page_title="Enterprise Knowledge Assistant", layout="wide")
20
 
21
  # ==========================================================
22
- # 🧹 Cache Management
23
  # ==========================================================
24
  def clean_cache(max_size_gb: float = 2.0):
25
- folders = [
26
- "/root/.cache/huggingface",
27
- "/root/.cache/transformers",
28
- "/root/.cache/torch",
29
- ]
30
  total_deleted = 0.0
31
  for folder in folders:
32
  if os.path.exists(folder):
@@ -44,7 +37,7 @@ def clean_cache(max_size_gb: float = 2.0):
44
  clean_cache()
45
 
46
  # ==========================================================
47
- # βš™οΈ Hugging Face Cache Configuration
48
  # ==========================================================
49
  CACHE_DIR = "/tmp/hf_cache"
50
  os.makedirs(CACHE_DIR, exist_ok=True)
@@ -66,7 +59,6 @@ from qa import retrieve_chunks, generate_answer, cache_embeddings, embed_chunks,
66
  # 🧠 Smart Suggestion Generator
67
  # ==========================================================
68
  def generate_dynamic_suggestions_from_toc(toc, chunks, doc_name="Document"):
69
- """Generate natural-sounding, concise professional questions from TOC + context."""
70
  if not toc or not chunks:
71
  return []
72
 
@@ -79,15 +71,17 @@ def generate_dynamic_suggestions_from_toc(toc, chunks, doc_name="Document"):
79
 
80
  context_sample = " ".join(chunks[:3])[:4000]
81
  prompt = f"""
82
- You are an enterprise documentation assistant.
83
- Generate 5–7 professional, relevant questions a user might ask about this document: "{doc_name}".
84
- Use the table of contents and content sample below.
85
 
86
  TABLE OF CONTENTS:
87
- {chr(10).join(['- ' + t for t in titles[:10]])}
88
 
89
  CONTENT SAMPLE:
90
  {context_sample}
 
 
 
91
  """
92
 
93
  try:
@@ -101,18 +95,7 @@ def generate_dynamic_suggestions_from_toc(toc, chunks, doc_name="Document"):
101
  final.append(q)
102
  return final[:7]
103
  except Exception:
104
- return [
105
- "What is this document about?",
106
- "How do I start using this process?",
107
- "What configurations are needed?",
108
- ]
109
-
110
- # ==========================================================
111
- # πŸ“ Paths
112
- # ==========================================================
113
- BASE_DIR = os.path.dirname(__file__)
114
- LOGO_PATH = os.path.join(BASE_DIR, "logo.png")
115
- SAMPLE_PATH = os.path.join(BASE_DIR, "sample.pdf")
116
 
117
  # ==========================================================
118
  # πŸ–₯️ Header
@@ -124,9 +107,6 @@ st.caption("Ask questions about SAP documentation and enterprise PDFs β€” powere
124
  # 🧭 Sidebar
125
  # ==========================================================
126
  with st.sidebar:
127
- if os.path.exists(LOGO_PATH):
128
- st.image(LOGO_PATH, width=150)
129
-
130
  if "reasoning_mode" not in st.session_state:
131
  st.session_state.reasoning_mode = False
132
  st.session_state.reasoning_mode = st.toggle(
@@ -148,28 +128,46 @@ with st.sidebar:
148
  st.caption("✨ Built by Shubham Sharma")
149
 
150
  # ==========================================================
151
- # 🧾 Document Handling
152
  # ==========================================================
153
  text, chunks, index, embeddings, toc = None, None, None, None, None
154
 
155
- # --- Initialize session state ---
156
- for key, default in {
157
- "show_more": False,
158
- "user_query_input": "",
159
- "selected_suggestion": None,
160
- "pending_query": None,
161
- }.items():
162
- if key not in st.session_state:
163
- st.session_state[key] = default
164
-
165
- # ----------------------------------------------------------
166
- # πŸ“„ Document Handling + Ask a Question
167
- # ----------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  if doc_choice == "-- Select --":
169
  st.info("⬅️ Please choose a document from the sidebar to begin.")
170
  else:
171
  if doc_choice == "Sample PDF":
172
- temp_path = SAMPLE_PATH
173
  st.success("πŸ“˜ Using built-in Sample PDF")
174
  else:
175
  uploaded_file = st.file_uploader("πŸ“‚ Upload your PDF", type="pdf")
@@ -193,10 +191,7 @@ else:
193
  st.text_area("TOC Preview", toc_text, height=180)
194
  query_suggestions = generate_dynamic_suggestions_from_toc(toc, chunks, os.path.basename(temp_path))
195
  else:
196
- query_suggestions = [
197
- "What is this document about?",
198
- "How do I start using this process?",
199
- ]
200
 
201
  with st.spinner("βš™οΈ Preparing search index..."):
202
  embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
@@ -211,34 +206,21 @@ else:
211
  if query_suggestions:
212
  st.markdown("#### πŸ’‘ Suggested Questions")
213
 
214
- visible = query_suggestions if st.session_state.show_more else query_suggestions[:3]
215
- cols = st.columns(min(3, len(visible)))
216
 
217
- for i, q in enumerate(visible):
218
- col = cols[i % 3]
219
- if col.button(f"πŸ” {q}", key=f"suggest_{i}"):
220
- st.session_state.pending_query = q
221
- st.experimental_rerun()
222
 
223
  toggle_text = "Show less β–²" if st.session_state.show_more else "Show more β–Ό"
224
- if st.button(toggle_text, key="toggle_show_more"):
225
  st.session_state.show_more = not st.session_state.show_more
226
  st.experimental_rerun()
227
 
228
- # --- Handle query sync ---
229
- if st.session_state.pending_query:
230
- st.session_state.user_query_input = st.session_state.pending_query
231
- st.session_state.pending_query = None
232
-
233
- user_query = st.text_input(
234
- "Type your question or pick one above:",
235
- value=st.session_state.user_query_input,
236
- key="user_query_input",
237
- )
238
 
239
  if user_query.strip():
240
  st.caption("Mode: 🧠 Reasoning" if st.session_state.reasoning_mode else "Mode: πŸ“„ Strict Document")
241
-
242
  with st.spinner("πŸ’­ Analyzing your document..."):
243
  retrieved = retrieve_chunks(user_query, index, chunks, top_k=top_k, embeddings=embeddings)
244
  answer = generate_answer(user_query, retrieved, reasoning_mode=st.session_state.reasoning_mode)
@@ -250,9 +232,6 @@ else:
250
  for i, r in enumerate(retrieved, start=1):
251
  st.markdown(f"**Chunk {i}:** {r}")
252
 
253
- # ----------------------------------------------------------
254
- # πŸ“‘ Document Preview
255
- # ----------------------------------------------------------
256
  if chunks:
257
  st.markdown("---")
258
  st.subheader("πŸ“‘ Document Preview")
 
5
  import torch
6
 
7
  # ==========================================================
8
+ # βœ… Environment Setup
9
  # ==========================================================
10
  print("CUDA available:", torch.cuda.is_available())
11
  if torch.cuda.is_available():
 
13
  else:
14
  print("Running on CPU")
15
 
 
 
 
16
  st.set_page_config(page_title="Enterprise Knowledge Assistant", layout="wide")
17
 
18
  # ==========================================================
19
+ # 🧹 Cache Cleanup
20
  # ==========================================================
21
  def clean_cache(max_size_gb: float = 2.0):
22
+ folders = ["/root/.cache/huggingface", "/root/.cache/transformers", "/root/.cache/torch"]
 
 
 
 
23
  total_deleted = 0.0
24
  for folder in folders:
25
  if os.path.exists(folder):
 
37
  clean_cache()
38
 
39
  # ==========================================================
40
+ # βš™οΈ Hugging Face Cache
41
  # ==========================================================
42
  CACHE_DIR = "/tmp/hf_cache"
43
  os.makedirs(CACHE_DIR, exist_ok=True)
 
59
  # 🧠 Smart Suggestion Generator
60
  # ==========================================================
61
  def generate_dynamic_suggestions_from_toc(toc, chunks, doc_name="Document"):
 
62
  if not toc or not chunks:
63
  return []
64
 
 
71
 
72
  context_sample = " ".join(chunks[:3])[:4000]
73
  prompt = f"""
74
+ You are generating intelligent, short, and context-aware questions from a document titled "{doc_name}".
75
+ Use the following Table of Contents and sample content for reference.
 
76
 
77
  TABLE OF CONTENTS:
78
+ {chr(10).join(['- ' + t for t in titles[:8]])}
79
 
80
  CONTENT SAMPLE:
81
  {context_sample}
82
+
83
+ Generate 5–7 concise, professional, and relevant questions a user might ask about this document.
84
+ Each question should be under 20 words and directly based on the context.
85
  """
86
 
87
  try:
 
95
  final.append(q)
96
  return final[:7]
97
  except Exception:
98
+ return ["What is this document about?", "How do I start using this process?"]
 
 
 
 
 
 
 
 
 
 
 
99
 
100
  # ==========================================================
101
  # πŸ–₯️ Header
 
107
  # 🧭 Sidebar
108
  # ==========================================================
109
  with st.sidebar:
 
 
 
110
  if "reasoning_mode" not in st.session_state:
111
  st.session_state.reasoning_mode = False
112
  st.session_state.reasoning_mode = st.toggle(
 
128
  st.caption("✨ Built by Shubham Sharma")
129
 
130
  # ==========================================================
131
+ # 🧾 Document Handling + Ask Section
132
  # ==========================================================
133
  text, chunks, index, embeddings, toc = None, None, None, None, None
134
 
135
+ # Style for chips
136
+ st.markdown("""
137
+ <style>
138
+ .suggest-chip {
139
+ background-color: #1f2937;
140
+ border: 1px solid #374151;
141
+ border-radius: 20px;
142
+ color: #f9fafb;
143
+ padding: 8px 14px;
144
+ cursor: pointer;
145
+ font-size: 14px;
146
+ transition: all 0.2s ease-in-out;
147
+ }
148
+ .suggest-chip:hover {
149
+ background-color: #2563eb;
150
+ border-color: #3b82f6;
151
+ color: #ffffff;
152
+ box-shadow: 0 0 10px rgba(59,130,246,0.5);
153
+ }
154
+ </style>
155
+ """, unsafe_allow_html=True)
156
+
157
+ # Initialize session state
158
+ if "user_query_input" not in st.session_state:
159
+ st.session_state["user_query_input"] = ""
160
+ if "show_more" not in st.session_state:
161
+ st.session_state["show_more"] = False
162
+
163
+ def set_user_query(q: str):
164
+ st.session_state["user_query_input"] = q
165
+
166
  if doc_choice == "-- Select --":
167
  st.info("⬅️ Please choose a document from the sidebar to begin.")
168
  else:
169
  if doc_choice == "Sample PDF":
170
+ temp_path = os.path.join(os.path.dirname(__file__), "sample.pdf")
171
  st.success("πŸ“˜ Using built-in Sample PDF")
172
  else:
173
  uploaded_file = st.file_uploader("πŸ“‚ Upload your PDF", type="pdf")
 
191
  st.text_area("TOC Preview", toc_text, height=180)
192
  query_suggestions = generate_dynamic_suggestions_from_toc(toc, chunks, os.path.basename(temp_path))
193
  else:
194
+ query_suggestions = ["What is this document about?", "How do I start using this process?"]
 
 
 
195
 
196
  with st.spinner("βš™οΈ Preparing search index..."):
197
  embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
 
206
  if query_suggestions:
207
  st.markdown("#### πŸ’‘ Suggested Questions")
208
 
209
+ visible_suggestions = query_suggestions if st.session_state.show_more else query_suggestions[:3]
210
+ cols = st.columns(min(3, len(visible_suggestions)))
211
 
212
+ for i, q in enumerate(visible_suggestions):
213
+ cols[i % 3].button(f"πŸ” {q}", key=f"suggest_{i}", on_click=set_user_query, args=(q,))
 
 
 
214
 
215
  toggle_text = "Show less β–²" if st.session_state.show_more else "Show more β–Ό"
216
+ if st.button(toggle_text):
217
  st.session_state.show_more = not st.session_state.show_more
218
  st.experimental_rerun()
219
 
220
+ user_query = st.text_input("Type your question or pick one above:", key="user_query_input")
 
 
 
 
 
 
 
 
 
221
 
222
  if user_query.strip():
223
  st.caption("Mode: 🧠 Reasoning" if st.session_state.reasoning_mode else "Mode: πŸ“„ Strict Document")
 
224
  with st.spinner("πŸ’­ Analyzing your document..."):
225
  retrieved = retrieve_chunks(user_query, index, chunks, top_k=top_k, embeddings=embeddings)
226
  answer = generate_answer(user_query, retrieved, reasoning_mode=st.session_state.reasoning_mode)
 
232
  for i, r in enumerate(retrieved, start=1):
233
  st.markdown(f"**Chunk {i}:** {r}")
234
 
 
 
 
235
  if chunks:
236
  st.markdown("---")
237
  st.subheader("πŸ“‘ Document Preview")