Update src/streamlit_app.py
Browse files- src/streamlit_app.py +24 -86
src/streamlit_app.py
CHANGED
|
@@ -1,98 +1,36 @@
|
|
| 1 |
-
#
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
import streamlit as st
|
| 4 |
from ingestion import extract_text_from_pdf, chunk_text
|
| 5 |
from embeddings import generate_embeddings
|
| 6 |
from vectorstore import build_faiss_index
|
| 7 |
from qa import retrieve_chunks, generate_answer
|
| 8 |
-
import faiss
|
| 9 |
|
| 10 |
-
#
|
| 11 |
# App Config
|
| 12 |
-
#
|
| 13 |
st.set_page_config(page_title="Enterprise Knowledge Assistant", layout="wide")
|
| 14 |
st.title("π Enterprise Knowledge Assistant")
|
| 15 |
-
st.caption("Select a document from the library or upload your own, then ask questions.")
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
#
|
|
|
|
|
|
|
| 20 |
with st.sidebar:
|
| 21 |
st.image("app/logo.png", width=150)
|
| 22 |
-
|
| 23 |
-
# 1. Document Library FIRST
|
| 24 |
-
st.header("π Document Library")
|
| 25 |
-
doc_choice = st.radio(
|
| 26 |
-
"Choose a document:",
|
| 27 |
-
["-- Select --", "Sample PDF", "Upload Custom PDF"],
|
| 28 |
-
index=0
|
| 29 |
-
)
|
| 30 |
-
|
| 31 |
-
st.markdown("---")
|
| 32 |
-
|
| 33 |
-
# 2. Settings SECOND
|
| 34 |
-
st.header("βοΈ Settings")
|
| 35 |
-
chunk_size = st.slider("Chunk Size", 200, 1000, 500, step=100)
|
| 36 |
-
top_k = st.slider("Top K Results", 1, 5, 3)
|
| 37 |
-
|
| 38 |
-
st.markdown("---")
|
| 39 |
-
|
| 40 |
-
# 3. Branding / Credits LAST
|
| 41 |
-
st.caption("π¨βπ» Built by Shubham Sharma")
|
| 42 |
-
st.markdown("[π GitHub Repo](https://github.com/shubhamsharma170793-cpu/enterprise-knowledge-assistant)")
|
| 43 |
-
|
| 44 |
-
# ---------------------------
|
| 45 |
-
# Document Handling
|
| 46 |
-
# ---------------------------
|
| 47 |
-
text, chunks, index = None, None, None
|
| 48 |
-
|
| 49 |
-
if doc_choice == "-- Select --":
|
| 50 |
-
st.info("β¬
οΈ Please choose **Sample PDF** or **Upload Custom PDF** from the sidebar to continue.")
|
| 51 |
-
|
| 52 |
-
elif doc_choice == "Sample PDF":
|
| 53 |
-
temp_path = os.path.join("app", "sample.pdf")
|
| 54 |
-
st.success("π Sample PDF selected")
|
| 55 |
-
text = extract_text_from_pdf(temp_path)
|
| 56 |
-
chunks = chunk_text(text, chunk_size=chunk_size)
|
| 57 |
-
embeddings = generate_embeddings(chunks)
|
| 58 |
-
index = build_faiss_index(embeddings)
|
| 59 |
-
|
| 60 |
-
elif doc_choice == "Upload Custom PDF":
|
| 61 |
-
uploaded_file = st.file_uploader("π Upload your PDF", type="pdf")
|
| 62 |
-
if uploaded_file:
|
| 63 |
-
temp_path = "temp.pdf"
|
| 64 |
-
with open(temp_path, "wb") as f:
|
| 65 |
-
f.write(uploaded_file.getbuffer())
|
| 66 |
-
st.success("β
Document uploaded and processed!")
|
| 67 |
-
|
| 68 |
-
text = extract_text_from_pdf(temp_path)
|
| 69 |
-
chunks = chunk_text(text, chunk_size=chunk_size)
|
| 70 |
-
embeddings = generate_embeddings(chunks)
|
| 71 |
-
index = build_faiss_index(embeddings)
|
| 72 |
-
|
| 73 |
-
# ---------------------------
|
| 74 |
-
# Document Preview
|
| 75 |
-
# ---------------------------
|
| 76 |
-
if chunks:
|
| 77 |
-
st.subheader("π Document Preview")
|
| 78 |
-
st.text_area("Extracted text (first 1000 chars)", text[:1000], height=150)
|
| 79 |
-
st.caption(f"π¦ {len(chunks)} chunks created")
|
| 80 |
-
|
| 81 |
-
# ---------------------------
|
| 82 |
-
# Query Section
|
| 83 |
-
# ---------------------------
|
| 84 |
-
if index and chunks:
|
| 85 |
-
st.markdown("---")
|
| 86 |
-
st.subheader("π€ Ask a Question")
|
| 87 |
-
|
| 88 |
-
user_query = st.text_input("π Your question about the document:")
|
| 89 |
-
if user_query:
|
| 90 |
-
retrieved = retrieve_chunks(user_query, index, chunks, top_k=top_k)
|
| 91 |
-
answer = generate_answer(user_query, retrieved)
|
| 92 |
-
|
| 93 |
-
st.markdown("### β
Assistantβs Answer")
|
| 94 |
-
st.write(answer)
|
| 95 |
-
|
| 96 |
-
with st.expander("π Supporting Chunks"):
|
| 97 |
-
for i, r in enumerate(retrieved, start=1):
|
| 98 |
-
st.markdown(f"**Chunk {i}:** {r}")
|
|
|
|
| 1 |
+
# ----------------------------
|
| 2 |
+
# Hugging Face cache bootstrap
|
| 3 |
+
# ----------------------------
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
CACHE_DIR = "/tmp/huggingface"
|
| 7 |
+
os.makedirs(CACHE_DIR, exist_ok=True)
|
| 8 |
+
|
| 9 |
+
os.environ["HF_HOME"] = CACHE_DIR
|
| 10 |
+
os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR
|
| 11 |
+
os.environ["HF_DATASETS_CACHE"] = CACHE_DIR
|
| 12 |
+
os.environ["HF_MODULES_CACHE"] = CACHE_DIR
|
| 13 |
+
|
| 14 |
+
# ----------------------------
|
| 15 |
+
# Imports AFTER cache bootstrap
|
| 16 |
+
# ----------------------------
|
| 17 |
import streamlit as st
|
| 18 |
from ingestion import extract_text_from_pdf, chunk_text
|
| 19 |
from embeddings import generate_embeddings
|
| 20 |
from vectorstore import build_faiss_index
|
| 21 |
from qa import retrieve_chunks, generate_answer
|
|
|
|
| 22 |
|
| 23 |
+
# ----------------------------
|
| 24 |
# App Config
|
| 25 |
+
# ----------------------------
|
| 26 |
st.set_page_config(page_title="Enterprise Knowledge Assistant", layout="wide")
|
| 27 |
st.title("π Enterprise Knowledge Assistant")
|
|
|
|
| 28 |
|
| 29 |
+
st.write("Upload a PDF **or try the sample file** to explore this assistant.")
|
| 30 |
+
|
| 31 |
+
# ----------------------------
|
| 32 |
+
# Sidebar (Settings + Credits)
|
| 33 |
+
# ----------------------------
|
| 34 |
with st.sidebar:
|
| 35 |
st.image("app/logo.png", width=150)
|
| 36 |
+
st.header("βοΈ Se
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|