Shubham170793 commited on
Commit
cc58d64
Β·
verified Β·
1 Parent(s): e97699c

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +47 -31
src/streamlit_app.py CHANGED
@@ -1,18 +1,20 @@
1
  import os
2
  import streamlit as st
3
 
4
- # --- Streamlit safe options (prevents upload 403 / CORS issues) ---
5
  st.set_option("client.showErrorDetails", True)
6
 
7
  # ---------------------------
8
- # Cache Fix for Hugging Face
9
  # ---------------------------
10
  CACHE_DIR = "/tmp/hf_cache"
11
  os.makedirs(CACHE_DIR, exist_ok=True)
12
- os.environ["HF_HOME"] = CACHE_DIR
13
- os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR
14
- os.environ["HF_DATASETS_CACHE"] = CACHE_DIR
15
- os.environ["HF_MODULES_CACHE"] = CACHE_DIR
 
 
16
 
17
  # ---------------------------
18
  # Imports AFTER environment setup
@@ -34,7 +36,7 @@ SAMPLE_PATH = os.path.join(BASE_DIR, "sample.pdf")
34
  # ---------------------------
35
  st.set_page_config(page_title="Enterprise Knowledge Assistant", layout="wide")
36
  st.title("πŸ“„ Enterprise Knowledge Assistant")
37
- st.caption("Select a document from the library or upload your own, then ask questions.")
38
 
39
  # ---------------------------
40
  # Sidebar (Library + Settings + Credits)
@@ -43,7 +45,7 @@ with st.sidebar:
43
  if os.path.exists(LOGO_PATH):
44
  st.image(LOGO_PATH, width=150)
45
 
46
- # 1. Document Library
47
  st.header("πŸ“š Document Library")
48
  doc_choice = st.radio(
49
  "Choose a document:",
@@ -53,14 +55,14 @@ with st.sidebar:
53
 
54
  st.markdown("---")
55
 
56
- # 2. Settings
57
  st.header("βš™οΈ Settings")
58
- chunk_size = st.slider("Chunk Size", 200, 1000, 500, step=100)
59
- top_k = st.slider("Top K Results", 1, 5, 3)
60
 
61
  st.markdown("---")
62
 
63
- # 3. Branding
64
  st.caption("πŸ‘¨β€πŸ’» Built by Shubham Sharma")
65
  st.markdown("[πŸ“‚ GitHub Repo](https://github.com/shubhamsharma170793-cpu/enterprise-knowledge-assistant)")
66
 
@@ -70,37 +72,39 @@ with st.sidebar:
70
  text, chunks, index = None, None, None
71
 
72
  if doc_choice == "-- Select --":
73
- st.info("⬅️ Please choose **Sample PDF** or **Upload Custom PDF** from the sidebar.")
74
 
75
  elif doc_choice == "Sample PDF":
76
  temp_path = SAMPLE_PATH
77
- st.success("πŸ“˜ Sample PDF selected")
78
- text = extract_text_from_pdf(temp_path)
79
- chunks = chunk_text(text, chunk_size=chunk_size)
80
- embeddings = generate_embeddings(chunks)
81
- index = build_faiss_index(embeddings)
 
82
 
83
  elif doc_choice == "Upload Custom PDF":
84
  uploaded_file = st.file_uploader("πŸ“‚ Upload your PDF", type="pdf")
85
  if uploaded_file:
86
- # Always write to /tmp (the only guaranteed writable folder)
87
  temp_path = os.path.join("/tmp", uploaded_file.name)
88
  with open(temp_path, "wb") as f:
89
  f.write(uploaded_file.getbuffer())
90
- st.success("βœ… Document uploaded and processed!")
91
 
92
- text = extract_text_from_pdf(temp_path)
93
- chunks = chunk_text(text, chunk_size=chunk_size)
94
- embeddings = generate_embeddings(chunks)
95
- index = build_faiss_index(embeddings)
 
 
96
 
97
  # ---------------------------
98
  # Document Preview
99
  # ---------------------------
100
  if chunks:
101
  st.subheader("πŸ“‘ Document Preview")
102
- st.text_area("Extracted text (first 1000 chars)", text[:1000], height=150)
103
- st.caption(f"πŸ“¦ {len(chunks)} chunks created")
104
 
105
  # ---------------------------
106
  # Query Section
@@ -111,12 +115,24 @@ if index and chunks:
111
 
112
  user_query = st.text_input("πŸ” Your question about the document:")
113
  if user_query:
114
- retrieved = retrieve_chunks(user_query, index, chunks, top_k=top_k)
115
- answer = generate_answer(user_query, retrieved)
 
116
 
 
117
  st.markdown("### βœ… Assistant’s Answer")
118
- st.write(answer)
119
 
120
- with st.expander("πŸ“„ Supporting Chunks"):
 
121
  for i, r in enumerate(retrieved, start=1):
122
- st.markdown(f"**Chunk {i}:** {r}")
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import streamlit as st
3
 
4
+ # --- Streamlit Safe Options (Hugging Face Spaces upload fix) ---
5
  st.set_option("client.showErrorDetails", True)
6
 
7
  # ---------------------------
8
+ # Hugging Face Cache Fix (/tmp for writable)
9
  # ---------------------------
10
  CACHE_DIR = "/tmp/hf_cache"
11
  os.makedirs(CACHE_DIR, exist_ok=True)
12
+ os.environ.update({
13
+ "HF_HOME": CACHE_DIR,
14
+ "TRANSFORMERS_CACHE": CACHE_DIR,
15
+ "HF_DATASETS_CACHE": CACHE_DIR,
16
+ "HF_MODULES_CACHE": CACHE_DIR
17
+ })
18
 
19
  # ---------------------------
20
  # Imports AFTER environment setup
 
36
  # ---------------------------
37
  st.set_page_config(page_title="Enterprise Knowledge Assistant", layout="wide")
38
  st.title("πŸ“„ Enterprise Knowledge Assistant")
39
+ st.caption("Upload a PDF or use the sample file to explore intelligent document Q&A.")
40
 
41
  # ---------------------------
42
  # Sidebar (Library + Settings + Credits)
 
45
  if os.path.exists(LOGO_PATH):
46
  st.image(LOGO_PATH, width=150)
47
 
48
+ # 1️⃣ Document Library
49
  st.header("πŸ“š Document Library")
50
  doc_choice = st.radio(
51
  "Choose a document:",
 
55
 
56
  st.markdown("---")
57
 
58
+ # 2️⃣ Settings
59
  st.header("βš™οΈ Settings")
60
+ chunk_size = st.slider("Chunk Size (characters)", 300, 1200, 800, step=100)
61
+ top_k = st.slider("Top K Results (retrieved chunks)", 1, 10, 5)
62
 
63
  st.markdown("---")
64
 
65
+ # 3️⃣ Branding
66
  st.caption("πŸ‘¨β€πŸ’» Built by Shubham Sharma")
67
  st.markdown("[πŸ“‚ GitHub Repo](https://github.com/shubhamsharma170793-cpu/enterprise-knowledge-assistant)")
68
 
 
72
  text, chunks, index = None, None, None
73
 
74
  if doc_choice == "-- Select --":
75
+ st.info("⬅️ Please choose **Sample PDF** or **Upload Custom PDF** from the sidebar to get started.")
76
 
77
  elif doc_choice == "Sample PDF":
78
  temp_path = SAMPLE_PATH
79
+ st.success("πŸ“˜ Using built-in Sample PDF")
80
+ with st.spinner("πŸ” Extracting and processing document..."):
81
+ text = extract_text_from_pdf(temp_path)
82
+ chunks = chunk_text(text, chunk_size=chunk_size)
83
+ embeddings = generate_embeddings(chunks)
84
+ index = build_faiss_index(embeddings)
85
 
86
  elif doc_choice == "Upload Custom PDF":
87
  uploaded_file = st.file_uploader("πŸ“‚ Upload your PDF", type="pdf")
88
  if uploaded_file:
 
89
  temp_path = os.path.join("/tmp", uploaded_file.name)
90
  with open(temp_path, "wb") as f:
91
  f.write(uploaded_file.getbuffer())
92
+ st.success(f"βœ… File '{uploaded_file.name}' uploaded and saved to /tmp")
93
 
94
+ with st.spinner("βš™οΈ Extracting and processing your document..."):
95
+ text = extract_text_from_pdf(temp_path)
96
+ chunks = chunk_text(text, chunk_size=chunk_size)
97
+ embeddings = generate_embeddings(chunks)
98
+ index = build_faiss_index(embeddings)
99
+ st.success("πŸš€ Document processed successfully!")
100
 
101
  # ---------------------------
102
  # Document Preview
103
  # ---------------------------
104
  if chunks:
105
  st.subheader("πŸ“‘ Document Preview")
106
+ st.text_area("Extracted text (first 1000 chars)", text[:1000], height=200)
107
+ st.caption(f"πŸ“¦ {len(chunks)} chunks created | Avg chunk length: {int(sum(len(c) for c in chunks) / len(chunks))} chars")
108
 
109
  # ---------------------------
110
  # Query Section
 
115
 
116
  user_query = st.text_input("πŸ” Your question about the document:")
117
  if user_query:
118
+ with st.spinner("🧠 Thinking... retrieving context and generating answer..."):
119
+ retrieved = retrieve_chunks(user_query, index, chunks, top_k=top_k)
120
+ answer = generate_answer(user_query, retrieved)
121
 
122
+ # Answer Section
123
  st.markdown("### βœ… Assistant’s Answer")
124
+ st.markdown(f"<div style='background-color:#0E1117;padding:12px;border-radius:10px;'>{answer}</div>", unsafe_allow_html=True)
125
 
126
+ # Supporting Chunks Section
127
+ with st.expander("πŸ“„ Supporting Chunks (Context Used)"):
128
  for i, r in enumerate(retrieved, start=1):
129
+ st.markdown(
130
+ f"""
131
+ <div style='background-color:#111827;padding:10px;border-radius:8px;margin-bottom:6px;'>
132
+ <b>Chunk {i}:</b><br>{r}
133
+ </div>
134
+ """,
135
+ unsafe_allow_html=True,
136
+ )
137
+ else:
138
+ st.info("πŸ“₯ Upload or select a document to start exploring.")