import os import streamlit as st # --- Streamlit safe options (prevents upload 403 / CORS issues) --- st.set_option("client.showErrorDetails", True) # --------------------------- # Cache Fix for Hugging Face # --------------------------- CACHE_DIR = "/tmp/hf_cache" os.makedirs(CACHE_DIR, exist_ok=True) os.environ["HF_HOME"] = CACHE_DIR os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR os.environ["HF_DATASETS_CACHE"] = CACHE_DIR os.environ["HF_MODULES_CACHE"] = CACHE_DIR # --------------------------- # Imports AFTER environment setup # --------------------------- from ingestion import extract_text_from_pdf, chunk_text from embeddings import generate_embeddings from vectorstore import build_faiss_index from qa import retrieve_chunks, generate_answer # --------------------------- # Paths # --------------------------- BASE_DIR = os.path.dirname(__file__) # /app/src LOGO_PATH = os.path.join(BASE_DIR, "logo.png") SAMPLE_PATH = os.path.join(BASE_DIR, "sample.pdf") # --------------------------- # App Config # --------------------------- st.set_page_config(page_title="Enterprise Knowledge Assistant", layout="wide") st.title("πŸ“„ Enterprise Knowledge Assistant") st.caption("Select a document from the library or upload your own, then ask questions.") # --------------------------- # Sidebar (Library + Settings + Credits) # --------------------------- with st.sidebar: if os.path.exists(LOGO_PATH): st.image(LOGO_PATH, width=150) # 1. Document Library st.header("πŸ“š Document Library") doc_choice = st.radio( "Choose a document:", ["-- Select --", "Sample PDF", "Upload Custom PDF"], index=0 ) st.markdown("---") # 2. Settings st.header("βš™οΈ Settings") chunk_size = st.slider("Chunk Size", 200, 1000, 500, step=100) top_k = st.slider("Top K Results", 1, 5, 3) st.markdown("---") # 3. Branding st.caption("πŸ‘¨β€πŸ’» Built by Shubham Sharma") st.markdown("[πŸ“‚ GitHub Repo](https://github.com/shubhamsharma170793-cpu/enterprise-knowledge-assistant)") # --------------------------- # Document Handling # --------------------------- text, chunks, index = None, None, None if doc_choice == "-- Select --": st.info("⬅️ Please choose **Sample PDF** or **Upload Custom PDF** from the sidebar.") elif doc_choice == "Sample PDF": temp_path = SAMPLE_PATH st.success("πŸ“˜ Sample PDF selected") text = extract_text_from_pdf(temp_path) chunks = chunk_text(text, chunk_size=chunk_size) embeddings = generate_embeddings(chunks) index = build_faiss_index(embeddings) elif doc_choice == "Upload Custom PDF": uploaded_file = st.file_uploader("πŸ“‚ Upload your PDF", type="pdf") if uploaded_file: # Always write to /tmp (the only guaranteed writable folder) temp_path = os.path.join("/tmp", uploaded_file.name) with open(temp_path, "wb") as f: f.write(uploaded_file.getbuffer()) st.success("βœ… Document uploaded and processed!") text = extract_text_from_pdf(temp_path) chunks = chunk_text(text, chunk_size=chunk_size) embeddings = generate_embeddings(chunks) index = build_faiss_index(embeddings) # --------------------------- # Document Preview # --------------------------- if chunks: st.subheader("πŸ“‘ Document Preview") st.text_area("Extracted text (first 1000 chars)", text[:1000], height=150) st.caption(f"πŸ“¦ {len(chunks)} chunks created") # --------------------------- # Query Section # --------------------------- if index and chunks: st.markdown("---") st.subheader("πŸ€– Ask a Question") user_query = st.text_input("πŸ” Your question about the document:") if user_query: retrieved = retrieve_chunks(user_query, index, chunks, top_k=top_k) answer = generate_answer(user_query, retrieved) st.markdown("### βœ… Assistant’s Answer") st.write(answer) with st.expander("πŸ“„ Supporting Chunks"): for i, r in enumerate(retrieved, start=1): st.markdown(f"**Chunk {i}:** {r}")