|
|
import os |
|
|
import streamlit as st |
|
|
|
|
|
|
|
|
st.set_option("client.showErrorDetails", True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CACHE_DIR = "/tmp/hf_cache" |
|
|
os.makedirs(CACHE_DIR, exist_ok=True) |
|
|
os.environ["HF_HOME"] = CACHE_DIR |
|
|
os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR |
|
|
os.environ["HF_DATASETS_CACHE"] = CACHE_DIR |
|
|
os.environ["HF_MODULES_CACHE"] = CACHE_DIR |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from ingestion import extract_text_from_pdf, chunk_text |
|
|
from embeddings import generate_embeddings |
|
|
from vectorstore import build_faiss_index |
|
|
from qa import retrieve_chunks, generate_answer |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
BASE_DIR = os.path.dirname(__file__) |
|
|
LOGO_PATH = os.path.join(BASE_DIR, "logo.png") |
|
|
SAMPLE_PATH = os.path.join(BASE_DIR, "sample.pdf") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.set_page_config(page_title="Enterprise Knowledge Assistant", layout="wide") |
|
|
st.title("π Enterprise Knowledge Assistant") |
|
|
st.caption("Select a document from the library or upload your own, then ask questions.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with st.sidebar: |
|
|
if os.path.exists(LOGO_PATH): |
|
|
st.image(LOGO_PATH, width=150) |
|
|
|
|
|
|
|
|
st.header("π Document Library") |
|
|
doc_choice = st.radio( |
|
|
"Choose a document:", |
|
|
["-- Select --", "Sample PDF", "Upload Custom PDF"], |
|
|
index=0 |
|
|
) |
|
|
|
|
|
st.markdown("---") |
|
|
|
|
|
|
|
|
st.header("βοΈ Settings") |
|
|
chunk_size = st.slider("Chunk Size", 200, 1000, 500, step=100) |
|
|
top_k = st.slider("Top K Results", 1, 5, 3) |
|
|
|
|
|
st.markdown("---") |
|
|
|
|
|
|
|
|
st.caption("π¨βπ» Built by Shubham Sharma") |
|
|
st.markdown("[π GitHub Repo](https://github.com/shubhamsharma170793-cpu/enterprise-knowledge-assistant)") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text, chunks, index = None, None, None |
|
|
|
|
|
if doc_choice == "-- Select --": |
|
|
st.info("β¬
οΈ Please choose **Sample PDF** or **Upload Custom PDF** from the sidebar.") |
|
|
|
|
|
elif doc_choice == "Sample PDF": |
|
|
temp_path = SAMPLE_PATH |
|
|
st.success("π Sample PDF selected") |
|
|
text = extract_text_from_pdf(temp_path) |
|
|
chunks = chunk_text(text, chunk_size=chunk_size) |
|
|
embeddings = generate_embeddings(chunks) |
|
|
index = build_faiss_index(embeddings) |
|
|
|
|
|
elif doc_choice == "Upload Custom PDF": |
|
|
uploaded_file = st.file_uploader("π Upload your PDF", type="pdf") |
|
|
if uploaded_file: |
|
|
|
|
|
temp_path = os.path.join("/tmp", uploaded_file.name) |
|
|
with open(temp_path, "wb") as f: |
|
|
f.write(uploaded_file.getbuffer()) |
|
|
st.success("β
Document uploaded and processed!") |
|
|
|
|
|
text = extract_text_from_pdf(temp_path) |
|
|
chunks = chunk_text(text, chunk_size=chunk_size) |
|
|
embeddings = generate_embeddings(chunks) |
|
|
index = build_faiss_index(embeddings) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if chunks: |
|
|
st.subheader("π Document Preview") |
|
|
st.text_area("Extracted text (first 1000 chars)", text[:1000], height=150) |
|
|
st.caption(f"π¦ {len(chunks)} chunks created") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if index and chunks: |
|
|
st.markdown("---") |
|
|
st.subheader("π€ Ask a Question") |
|
|
|
|
|
user_query = st.text_input("π Your question about the document:") |
|
|
if user_query: |
|
|
retrieved = retrieve_chunks(user_query, index, chunks, top_k=top_k) |
|
|
answer = generate_answer(user_query, retrieved) |
|
|
|
|
|
st.markdown("### β
Assistantβs Answer") |
|
|
st.write(answer) |
|
|
|
|
|
with st.expander("π Supporting Chunks"): |
|
|
for i, r in enumerate(retrieved, start=1): |
|
|
st.markdown(f"**Chunk {i}:** {r}") |
|
|
|