Shubham170793 commited on
Commit
ca5587a
Β·
verified Β·
1 Parent(s): 190f0f1

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +181 -115
src/streamlit_app.py CHANGED
@@ -1,34 +1,139 @@
1
  # ==========================================================
2
- # streamlit_app.py β€” Restored Centered Layout + Green Status UI
3
  # ==========================================================
4
  import os
5
  import re
6
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  from ingestion import extract_text_from_pdf, chunk_text
8
  from vectorstore import build_faiss_index
9
  from qa import retrieve_chunks, generate_answer, cache_embeddings, embed_chunks, genai_generate
10
 
11
- # βœ… Centered layout again (this caused the stretch difference)
12
- st.set_page_config(page_title="Enterprise Knowledge Assistant", layout="centered")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  # ==========================================================
15
- # 🧭 SIDEBAR β€” As before
16
  # ==========================================================
17
  with st.sidebar:
18
- st.markdown("### 🧭 Response Mode")
19
  mode = st.radio(
20
  "",
21
  ("Strict (Document-only)", "Extended (Document + general)"),
22
  index=0,
23
- help="Strict = answers only from the document. Extended = may include general context.",
24
  )
25
 
26
  st.markdown("---")
27
- show_advanced = st.checkbox("Show advanced settings (for developers)", value=False)
28
- if show_advanced:
29
- st.markdown("### Developer Settings")
30
- chunk_size = st.slider("Chunk Size (characters)", 200, 1500, 1000, step=50)
31
- overlap = st.slider("Chunk Overlap (characters)", 50, 200, 120, step=10)
32
  top_k = st.slider("Top K Results", 1, 10, 5)
33
  else:
34
  chunk_size, overlap, top_k = 1000, 120, 5
@@ -55,127 +160,88 @@ def set_user_query(q, idx):
55
  st.experimental_rerun()
56
 
57
  # ==========================================================
58
- # πŸ“˜ PAGE HEADER (Centered)
59
  # ==========================================================
60
- st.markdown(
61
- """
62
- <div style="text-align:center; margin-top:-10px;">
63
- <h1>πŸ“„ Enterprise Knowledge Assistant</h1>
64
- <p style="color:gray; font-size:15px;">
65
- Query SAP documentation and enterprise PDFs β€” powered by reasoning and retrieval.
66
- </p>
67
- </div>
68
- """,
69
- unsafe_allow_html=True,
70
- )
71
 
72
  # ==========================================================
73
- # πŸ“„ MAIN CONTAINER β€” Centered
74
  # ==========================================================
75
- col_spacer_left, col_main, col_spacer_right = st.columns([0.15, 0.7, 0.15])
76
- with col_main:
77
-
78
- # ----------------------------------------------------------
79
- # πŸ“‚ Document Upload / Selection
80
- # ----------------------------------------------------------
81
- st.markdown("### Select a document:")
82
- doc_choice = st.radio("", ("-- Select --", "Sample PDF", "Upload Custom PDF"))
83
-
84
- temp_path = None
85
  if doc_choice == "Sample PDF":
86
  temp_path = os.path.join(os.path.dirname(__file__), "sample.pdf")
87
- st.success("πŸ“˜ Sample PDF loaded successfully. Ask questions below.")
88
- st.success("βœ… Document ready β€” you can now ask questions below.")
89
- elif doc_choice == "Upload Custom PDF":
90
- uploaded_file = st.file_uploader("Upload your PDF", type="pdf")
91
  if uploaded_file:
92
  temp_path = os.path.join("/tmp", uploaded_file.name)
93
  with open(temp_path, "wb") as f:
94
  f.write(uploaded_file.getbuffer())
95
- st.success(f"βœ… '{uploaded_file.name}' uploaded successfully β€” ready to query below.")
96
- st.success("βœ… Document ready β€” you can now ask questions below.")
97
- else:
98
- st.info("⬅️ Please select or upload a document to begin.")
99
 
100
- # ----------------------------------------------------------
101
- # 🧠 Process document when loaded
102
- # ----------------------------------------------------------
103
  if temp_path:
104
- text, toc = extract_text_from_pdf(temp_path)
105
- chunks = chunk_text(text, chunk_size=chunk_size, overlap=overlap)
106
- embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
107
- index = build_faiss_index(embeddings)
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  # ----------------------------------------------------------
110
- # πŸ’‘ Suggested Questions
111
  # ----------------------------------------------------------
112
- if not st.session_state["query_suggestions_fixed"]:
113
- st.session_state["query_suggestions_fixed"] = [
114
- "What is the purpose of this document?",
115
- "How can integration be set up in SAP Cloud?",
116
- "What are the prerequisites mentioned?",
117
- "What steps are involved in configuration?",
118
- "How to troubleshoot integration issues?",
119
- "What is the key functionality covered?"
120
- ]
121
-
122
- st.markdown("### Ask the Assistant")
123
-
124
- visible_qs = (
125
- st.session_state["query_suggestions_fixed"][:3]
126
- if not st.session_state["show_more"]
127
- else st.session_state["query_suggestions_fixed"]
128
- )
129
- cols = st.columns(3)
130
- for i, q in enumerate(visible_qs):
131
- if cols[i % 3].button(f"πŸ’¬ {q}", key=f"suggest_{i}"):
132
- set_user_query(q, i)
133
-
134
- if st.button("Show more β–Ό" if not st.session_state["show_more"] else "Show less β–²"):
135
- st.session_state["show_more"] = not st.session_state["show_more"]
136
 
137
- # ----------------------------------------------------------
138
- # 🧩 Query Input
139
- # ----------------------------------------------------------
140
- user_query = st.text_input(
141
- "Type your question or click one above:",
142
- value=st.session_state["user_query_input"],
143
- key="user_query_input"
144
- )
 
 
 
 
 
145
 
146
  # ----------------------------------------------------------
147
- # πŸ€– Generate Answer
148
  # ----------------------------------------------------------
149
- if user_query:
150
  reasoning_mode = mode == "Extended (Document + general)"
151
- st.markdown(f"**Mode:** {'🧠 Extended' if reasoning_mode else 'πŸ“„ Strict Document'}")
152
-
153
- with st.spinner("πŸ’­ Generating answer..."):
154
- retrieved = retrieve_chunks(
155
- user_query, index, chunks, top_k=top_k, embeddings=embeddings
156
- )
157
- answer = generate_answer(
158
- user_query, retrieved, reasoning_mode=reasoning_mode
159
- )
160
-
161
- st.markdown("### Assistant’s Answer")
162
- st.markdown(
163
- f"""
164
- <div style='background:#0f172a;border-left:4px solid #22c55e;
165
- padding:12px;border-radius:8px;color:#f1f5f9'>{answer}</div>
166
- """,
167
- unsafe_allow_html=True,
168
- )
169
-
170
- with st.expander("πŸ“˜ Supporting Context", expanded=False):
171
- for i, chunk in enumerate(retrieved, 1):
172
- st.markdown(f"**Chunk {i}:** {chunk.strip()}")
173
-
174
- # ----------------------------------------------------------
175
- # πŸ“‘ Document Preview
176
- # ----------------------------------------------------------
177
- if temp_path:
178
- st.markdown("---")
179
- with st.expander("πŸ“„ Document Preview", expanded=False):
180
- st.text_area("Extracted text (first 1000 chars):", text[:1000], height=180)
181
- st.caption(f"πŸ“¦ {len(chunks)} chunks processed.")
 
1
  # ==========================================================
2
+ # streamlit_app.py β€” Stable Layout + Latest Backend Improvements
3
  # ==========================================================
4
  import os
5
  import re
6
  import streamlit as st
7
+ import torch
8
+
9
+ # ==========================================================
10
+ # βœ… PAGE CONFIG
11
+ # ==========================================================
12
+ st.set_page_config(page_title="Enterprise Knowledge Assistant", layout="wide")
13
+ print("CUDA available:", torch.cuda.is_available())
14
+
15
+ # ==========================================================
16
+ # βš™οΈ CACHE SETUP
17
+ # ==========================================================
18
+ CACHE_DIR = "/tmp/hf_cache"
19
+ os.makedirs(CACHE_DIR, exist_ok=True)
20
+ os.environ.update({
21
+ "HF_HOME": CACHE_DIR,
22
+ "TRANSFORMERS_CACHE": CACHE_DIR,
23
+ "HF_DATASETS_CACHE": CACHE_DIR,
24
+ "HF_MODULES_CACHE": CACHE_DIR,
25
+ })
26
+
27
+ # ==========================================================
28
+ # πŸ“¦ IMPORTS
29
+ # ==========================================================
30
  from ingestion import extract_text_from_pdf, chunk_text
31
  from vectorstore import build_faiss_index
32
  from qa import retrieve_chunks, generate_answer, cache_embeddings, embed_chunks, genai_generate
33
 
34
+ # ==========================================================
35
+ # 🧠 SMART SUGGESTION GENERATOR
36
+ # ==========================================================
37
+ def generate_dynamic_suggestions_from_toc(toc, chunks, doc_name="Document"):
38
+ if not toc or not chunks:
39
+ return []
40
+ titles = []
41
+ for sec, raw_title in toc:
42
+ title = re.sub(r"^\s*[\dA-Za-z.\-]+\s*", "", raw_title)
43
+ title = re.sub(r"\.{2,}\s*\d+$", "", title).strip()
44
+ if 4 < len(title) < 120:
45
+ titles.append(title)
46
+ context_sample = " ".join(chunks[:3])[:4000]
47
+ prompt = f"""
48
+ You are generating short, natural, and context-aware questions for users reading "{doc_name}".
49
+ Use the Table of Contents and some document text for inspiration.
50
+
51
+ TABLE OF CONTENTS:
52
+ {chr(10).join(['- ' + t for t in titles[:8]])}
53
+
54
+ SAMPLE TEXT:
55
+ {context_sample}
56
+
57
+ Generate 5–7 clear and human-like questions based strictly on this document.
58
+ Each should sound natural, under 18 words, and avoid robotic phrasing.
59
+ """
60
+ try:
61
+ ai_response = genai_generate(prompt)
62
+ questions = re.findall(r"[-β€’]?\s*(.+?)\?", ai_response)
63
+ clean_qs = [q.strip("β€’-β€” ").strip() + "?" for q in questions if 8 < len(q) < 120]
64
+ seen, final = set(), []
65
+ for q in clean_qs:
66
+ if q.lower() not in seen:
67
+ seen.add(q.lower())
68
+ final.append(q)
69
+ return final[:7]
70
+ except Exception:
71
+ return ["How do I start using this guide?", "What does this document cover?"]
72
+
73
+ # ==========================================================
74
+ # 🎨 STYLING β€” MINIMAL ENTERPRISE DESIGN
75
+ # ==========================================================
76
+ st.markdown("""
77
+ <style>
78
+ div.block-container {padding-top: 1.2rem; max-width: 1080px;}
79
+ h1, h2, h3 {color: #f3f4f6; font-weight: 600;}
80
+ .suggest-chip {
81
+ background: #0f1724;
82
+ border: 1px solid #374151;
83
+ border-radius: 14px;
84
+ color: #e6eef8;
85
+ padding: 8px 12px;
86
+ cursor: pointer;
87
+ font-size: 13px;
88
+ margin: 6px 6px 6px 0;
89
+ display: inline-block;
90
+ transition: background 0.2s, transform 0.1s;
91
+ }
92
+ .suggest-chip:hover {background: #1e3a8a; transform: translateY(-2px);}
93
+ .answer-box {
94
+ background: linear-gradient(180deg,#0b1220,#071027);
95
+ border-left: 4px solid #3b82f6;
96
+ border-radius: 8px;
97
+ padding: 16px 18px;
98
+ color: #e6eef8;
99
+ margin-top: 12px;
100
+ box-shadow: 0 4px 14px rgba(0,0,0,0.35);
101
+ }
102
+ .stTextInput > div > div > input {
103
+ background-color: #0f172a !important;
104
+ color: #f1f5f9 !important;
105
+ border-radius: 6px !important;
106
+ border: 1px solid #334155 !important;
107
+ padding: 8px 10px !important;
108
+ font-size: 15px !important;
109
+ }
110
+ .stTextInput > label {font-weight: 500;}
111
+ .small-link {
112
+ font-size: 13px;
113
+ color: #60a5fa;
114
+ cursor: pointer;
115
+ }
116
+ </style>
117
+ """, unsafe_allow_html=True)
118
 
119
  # ==========================================================
120
+ # 🧭 SIDEBAR
121
  # ==========================================================
122
  with st.sidebar:
123
+ st.markdown("### 🧭 Response Style")
124
  mode = st.radio(
125
  "",
126
  ("Strict (Document-only)", "Extended (Document + general)"),
127
  index=0,
128
+ help="Strict = answers only from the uploaded document. Extended = may include related general info.",
129
  )
130
 
131
  st.markdown("---")
132
+ show_dev = st.checkbox("Show advanced settings (for developers)", value=False)
133
+ if show_dev:
134
+ st.markdown("### βš™οΈ Developer Options")
135
+ chunk_size = st.slider("Chunk Size", 200, 1500, 1000, step=50)
136
+ overlap = st.slider("Chunk Overlap", 50, 200, 120, step=10)
137
  top_k = st.slider("Top K Results", 1, 10, 5)
138
  else:
139
  chunk_size, overlap, top_k = 1000, 120, 5
 
160
  st.experimental_rerun()
161
 
162
  # ==========================================================
163
+ # πŸ“„ MAIN SECTION
164
  # ==========================================================
165
+ st.title("πŸ“„ Enterprise Knowledge Assistant")
166
+ st.caption("Query SAP documentation and enterprise PDFs β€” powered by reasoning and retrieval.")
167
+
168
+ doc_choice = st.radio("Select a document:", ["-- Select --", "Sample PDF", "Upload Custom PDF"], index=0)
 
 
 
 
 
 
 
169
 
170
  # ==========================================================
171
+ # πŸ“‚ DOCUMENT HANDLING
172
  # ==========================================================
173
+ if doc_choice == "-- Select --":
174
+ st.info("⬅️ Select or upload a document to begin.")
175
+ else:
 
 
 
 
 
 
 
176
  if doc_choice == "Sample PDF":
177
  temp_path = os.path.join(os.path.dirname(__file__), "sample.pdf")
178
+ st.success("πŸ“˜ Sample PDF loaded successfully. You can now ask below.")
179
+ else:
180
+ uploaded_file = st.file_uploader("", type="pdf", label_visibility="collapsed")
 
181
  if uploaded_file:
182
  temp_path = os.path.join("/tmp", uploaded_file.name)
183
  with open(temp_path, "wb") as f:
184
  f.write(uploaded_file.getbuffer())
185
+ st.success(f"βœ… '{uploaded_file.name}' uploaded successfully. You can now ask below.")
186
+ else:
187
+ temp_path = None
 
188
 
 
 
 
189
  if temp_path:
190
+ with st.spinner("πŸ” Processing document..."):
191
+ text, toc = extract_text_from_pdf(temp_path)
192
+ chunks = chunk_text(text, chunk_size=chunk_size, overlap=overlap)
193
+ with st.spinner("βš™οΈ Building search index..."):
194
+ embeddings = cache_embeddings(os.path.basename(temp_path), chunks, embed_chunks)
195
+ index = build_faiss_index(embeddings)
196
+ st.success("βœ… Document ready β€” you can now ask your question below.")
197
+
198
+ doc_name = os.path.basename(temp_path)
199
+ if st.session_state["last_doc"] != doc_name:
200
+ query_suggestions = generate_dynamic_suggestions_from_toc(toc, chunks, doc_name)
201
+ st.session_state["query_suggestions_fixed"] = query_suggestions
202
+ st.session_state["last_doc"] = doc_name
203
+ else:
204
+ query_suggestions = st.session_state["query_suggestions_fixed"]
205
 
206
  # ----------------------------------------------------------
207
+ # πŸ’¬ ASK SECTION
208
  # ----------------------------------------------------------
209
+ st.markdown("### πŸ’¬ Ask the Assistant")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
+ if query_suggestions:
212
+ visible = query_suggestions if st.session_state["show_more"] else query_suggestions[:3]
213
+ cols = st.columns(min(3, len(visible)))
214
+ for i, q in enumerate(visible):
215
+ if cols[i % 3].button(f"πŸ’¬ {q}", key=f"sugg_{i}"):
216
+ set_user_query(q, i)
217
+
218
+ toggle_text = "Show less β–²" if st.session_state["show_more"] else "Show more β–Ό"
219
+ if st.button(toggle_text, help="Show or hide more suggestions"):
220
+ st.session_state["show_more"] = not st.session_state["show_more"]
221
+ st.experimental_rerun()
222
+
223
+ user_query = st.text_input("Type your question or click one above:", key="user_query_input")
224
 
225
  # ----------------------------------------------------------
226
+ # πŸ’‘ RESPONSE SECTION
227
  # ----------------------------------------------------------
228
+ if user_query.strip():
229
  reasoning_mode = mode == "Extended (Document + general)"
230
+ with st.spinner("πŸ’­ Generating your answer..."):
231
+ retrieved = retrieve_chunks(user_query, index, chunks, top_k=top_k, embeddings=embeddings)
232
+ answer = generate_answer(user_query, retrieved, reasoning_mode=reasoning_mode)
233
+ st.markdown("### πŸ€– Assistant’s Answer")
234
+ st.markdown(f"<div class='answer-box'>{answer}</div>", unsafe_allow_html=True)
235
+
236
+ with st.expander("πŸ“˜ Supporting Context"):
237
+ for i, r in enumerate(retrieved, start=1):
238
+ st.markdown(f"**Chunk {i}:** {r}")
239
+
240
+ if toc:
241
+ with st.expander("πŸ“š Explore Document Sections"):
242
+ toc_text = "\n".join([f"{sec}. {title}" for sec, title in toc])
243
+ st.text_area("", toc_text, height=140)
244
+
245
+ with st.expander("πŸ“„ Document Preview"):
246
+ st.text_area("", text[:1000], height=140)
247
+ st.caption(f"{len(chunks)} chunks processed.")