Shubham170793 commited on
Commit
4687fa9
Β·
verified Β·
1 Parent(s): 83f6641

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +24 -86
src/streamlit_app.py CHANGED
@@ -1,98 +1,36 @@
1
- # app/main.py
2
- import os, json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import streamlit as st
4
  from ingestion import extract_text_from_pdf, chunk_text
5
  from embeddings import generate_embeddings
6
  from vectorstore import build_faiss_index
7
  from qa import retrieve_chunks, generate_answer
8
- import faiss
9
 
10
- # ---------------------------
11
  # App Config
12
- # ---------------------------
13
  st.set_page_config(page_title="Enterprise Knowledge Assistant", layout="wide")
14
  st.title("πŸ“„ Enterprise Knowledge Assistant")
15
- st.caption("Select a document from the library or upload your own, then ask questions.")
16
 
17
- # ---------------------------
18
- # Sidebar (Settings + Docs + Credits)
19
- # ---------------------------
 
 
20
  with st.sidebar:
21
  st.image("app/logo.png", width=150)
22
-
23
- # 1. Document Library FIRST
24
- st.header("πŸ“š Document Library")
25
- doc_choice = st.radio(
26
- "Choose a document:",
27
- ["-- Select --", "Sample PDF", "Upload Custom PDF"],
28
- index=0
29
- )
30
-
31
- st.markdown("---")
32
-
33
- # 2. Settings SECOND
34
- st.header("βš™οΈ Settings")
35
- chunk_size = st.slider("Chunk Size", 200, 1000, 500, step=100)
36
- top_k = st.slider("Top K Results", 1, 5, 3)
37
-
38
- st.markdown("---")
39
-
40
- # 3. Branding / Credits LAST
41
- st.caption("πŸ‘¨β€πŸ’» Built by Shubham Sharma")
42
- st.markdown("[πŸ“‚ GitHub Repo](https://github.com/shubhamsharma170793-cpu/enterprise-knowledge-assistant)")
43
-
44
- # ---------------------------
45
- # Document Handling
46
- # ---------------------------
47
- text, chunks, index = None, None, None
48
-
49
- if doc_choice == "-- Select --":
50
- st.info("⬅️ Please choose **Sample PDF** or **Upload Custom PDF** from the sidebar to continue.")
51
-
52
- elif doc_choice == "Sample PDF":
53
- temp_path = os.path.join("app", "sample.pdf")
54
- st.success("πŸ“˜ Sample PDF selected")
55
- text = extract_text_from_pdf(temp_path)
56
- chunks = chunk_text(text, chunk_size=chunk_size)
57
- embeddings = generate_embeddings(chunks)
58
- index = build_faiss_index(embeddings)
59
-
60
- elif doc_choice == "Upload Custom PDF":
61
- uploaded_file = st.file_uploader("πŸ“‚ Upload your PDF", type="pdf")
62
- if uploaded_file:
63
- temp_path = "temp.pdf"
64
- with open(temp_path, "wb") as f:
65
- f.write(uploaded_file.getbuffer())
66
- st.success("βœ… Document uploaded and processed!")
67
-
68
- text = extract_text_from_pdf(temp_path)
69
- chunks = chunk_text(text, chunk_size=chunk_size)
70
- embeddings = generate_embeddings(chunks)
71
- index = build_faiss_index(embeddings)
72
-
73
- # ---------------------------
74
- # Document Preview
75
- # ---------------------------
76
- if chunks:
77
- st.subheader("πŸ“‘ Document Preview")
78
- st.text_area("Extracted text (first 1000 chars)", text[:1000], height=150)
79
- st.caption(f"πŸ“¦ {len(chunks)} chunks created")
80
-
81
- # ---------------------------
82
- # Query Section
83
- # ---------------------------
84
- if index and chunks:
85
- st.markdown("---")
86
- st.subheader("πŸ€– Ask a Question")
87
-
88
- user_query = st.text_input("πŸ” Your question about the document:")
89
- if user_query:
90
- retrieved = retrieve_chunks(user_query, index, chunks, top_k=top_k)
91
- answer = generate_answer(user_query, retrieved)
92
-
93
- st.markdown("### βœ… Assistant’s Answer")
94
- st.write(answer)
95
-
96
- with st.expander("πŸ“„ Supporting Chunks"):
97
- for i, r in enumerate(retrieved, start=1):
98
- st.markdown(f"**Chunk {i}:** {r}")
 
1
+ # ----------------------------
2
+ # Hugging Face cache bootstrap
3
+ # ----------------------------
4
+ import os
5
+
6
+ CACHE_DIR = "/tmp/huggingface"
7
+ os.makedirs(CACHE_DIR, exist_ok=True)
8
+
9
+ os.environ["HF_HOME"] = CACHE_DIR
10
+ os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR
11
+ os.environ["HF_DATASETS_CACHE"] = CACHE_DIR
12
+ os.environ["HF_MODULES_CACHE"] = CACHE_DIR
13
+
14
+ # ----------------------------
15
+ # Imports AFTER cache bootstrap
16
+ # ----------------------------
17
  import streamlit as st
18
  from ingestion import extract_text_from_pdf, chunk_text
19
  from embeddings import generate_embeddings
20
  from vectorstore import build_faiss_index
21
  from qa import retrieve_chunks, generate_answer
 
22
 
23
+ # ----------------------------
24
  # App Config
25
+ # ----------------------------
26
  st.set_page_config(page_title="Enterprise Knowledge Assistant", layout="wide")
27
  st.title("πŸ“„ Enterprise Knowledge Assistant")
 
28
 
29
+ st.write("Upload a PDF **or try the sample file** to explore this assistant.")
30
+
31
+ # ----------------------------
32
+ # Sidebar (Settings + Credits)
33
+ # ----------------------------
34
  with st.sidebar:
35
  st.image("app/logo.png", width=150)
36
+ st.header("βš™οΈ Se