File size: 4,107 Bytes
4687fa9
6944855
e9c70f2
 
 
4687fa9
6944855
 
 
dbc6ce8
4687fa9
 
 
 
 
 
e9c70f2
 
 
 
 
 
 
 
6944855
 
 
 
 
 
54be71f
6944855
e152803
6944855
e152803
 
6944855
e152803
6944855
 
 
e152803
6944855
 
 
 
 
 
 
 
 
 
 
 
 
 
24deec1
 
 
 
 
6944855
 
24deec1
 
 
6944855
 
 
 
24deec1
6944855
 
24deec1
6944855
 
 
 
24deec1
 
 
 
6944855
 
 
0cacffd
 
6944855
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24deec1
6944855
24deec1
 
 
 
6944855
24deec1
 
6944855
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import os
import streamlit as st

# --- Streamlit safe options (prevents upload 403 / CORS issues) ---
st.set_option("client.showErrorDetails", True)

# ---------------------------
# Cache Fix for Hugging Face
# ---------------------------
CACHE_DIR = "/tmp/hf_cache"
os.makedirs(CACHE_DIR, exist_ok=True)
os.environ["HF_HOME"] = CACHE_DIR
os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR
os.environ["HF_DATASETS_CACHE"] = CACHE_DIR
os.environ["HF_MODULES_CACHE"] = CACHE_DIR

# ---------------------------
# Imports AFTER environment setup
# ---------------------------
from ingestion import extract_text_from_pdf, chunk_text
from embeddings import generate_embeddings
from vectorstore import build_faiss_index
from qa import retrieve_chunks, generate_answer

# ---------------------------
# Paths
# ---------------------------
BASE_DIR = os.path.dirname(__file__)         # /app/src
LOGO_PATH = os.path.join(BASE_DIR, "logo.png")
SAMPLE_PATH = os.path.join(BASE_DIR, "sample.pdf")

# ---------------------------
# App Config
# ---------------------------
st.set_page_config(page_title="Enterprise Knowledge Assistant", layout="wide")
st.title("πŸ“„ Enterprise Knowledge Assistant")
st.caption("Select a document from the library or upload your own, then ask questions.")

# ---------------------------
# Sidebar (Library + Settings + Credits)
# ---------------------------
with st.sidebar:
    if os.path.exists(LOGO_PATH):
        st.image(LOGO_PATH, width=150)

    # 1. Document Library
    st.header("πŸ“š Document Library")
    doc_choice = st.radio(
        "Choose a document:",
        ["-- Select --", "Sample PDF", "Upload Custom PDF"],
        index=0
    )

    st.markdown("---")

    # 2. Settings
    st.header("βš™οΈ Settings")
    chunk_size = st.slider("Chunk Size", 200, 1000, 500, step=100)
    top_k = st.slider("Top K Results", 1, 5, 3)

    st.markdown("---")

    # 3. Branding
    st.caption("πŸ‘¨β€πŸ’» Built by Shubham Sharma")
    st.markdown("[πŸ“‚ GitHub Repo](https://github.com/shubhamsharma170793-cpu/enterprise-knowledge-assistant)")

# ---------------------------
# Document Handling
# ---------------------------
text, chunks, index = None, None, None

if doc_choice == "-- Select --":
    st.info("⬅️ Please choose **Sample PDF** or **Upload Custom PDF** from the sidebar.")

elif doc_choice == "Sample PDF":
    temp_path = SAMPLE_PATH
    st.success("πŸ“˜ Sample PDF selected")
    text = extract_text_from_pdf(temp_path)
    chunks = chunk_text(text, chunk_size=chunk_size)
    embeddings = generate_embeddings(chunks)
    index = build_faiss_index(embeddings)

elif doc_choice == "Upload Custom PDF":
    uploaded_file = st.file_uploader("πŸ“‚ Upload your PDF", type="pdf")
    if uploaded_file:
        # Always write to /tmp (the only guaranteed writable folder)
        temp_path = os.path.join("/tmp", uploaded_file.name)
        with open(temp_path, "wb") as f:
            f.write(uploaded_file.getbuffer())
        st.success("βœ… Document uploaded and processed!")

        text = extract_text_from_pdf(temp_path)
        chunks = chunk_text(text, chunk_size=chunk_size)
        embeddings = generate_embeddings(chunks)
        index = build_faiss_index(embeddings)

# ---------------------------
# Document Preview
# ---------------------------
if chunks:
    st.subheader("πŸ“‘ Document Preview")
    st.text_area("Extracted text (first 1000 chars)", text[:1000], height=150)
    st.caption(f"πŸ“¦ {len(chunks)} chunks created")

# ---------------------------
# Query Section
# ---------------------------
if index and chunks:
    st.markdown("---")
    st.subheader("πŸ€– Ask a Question")

    user_query = st.text_input("πŸ” Your question about the document:")
    if user_query:
        retrieved = retrieve_chunks(user_query, index, chunks, top_k=top_k)
        answer = generate_answer(user_query, retrieved)

        st.markdown("### βœ… Assistant’s Answer")
        st.write(answer)

        with st.expander("πŸ“„ Supporting Chunks"):
            for i, r in enumerate(retrieved, start=1):
                st.markdown(f"**Chunk {i}:** {r}")