sourize commited on
Commit
e07c00d
·
1 Parent(s): 13eba5e

Updated main.py

Browse files
Files changed (3) hide show
  1. .streamlit/secrets.toml +0 -8
  2. app.py +83 -54
  3. requirements.txt +6 -3
.streamlit/secrets.toml DELETED
@@ -1,8 +0,0 @@
1
- # .streamlit/secrets.toml
2
-
3
- # Your FastAPI backend (if you host it separately, e.g. on Railway or Render)
4
- backend_url = "https://rag-pathway.onrender.com"
5
-
6
- # Supabase credentials (if you call Supabase directly from Streamlit)
7
- SUPABASE_URL = "https://iddmmovzjstbinuptpit.supabase.co"
8
- SUPABASE_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImlkZG1tb3Z6anN0YmludXB0cGl0Iiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImlhdCI6MTc0MzQxMDgxMSwiZXhwIjoyMDU4OTg2ODExfQ.MQUoU3JhDSWofJ7Z3zmytbKVF8DOJ9yLBYraDI_YIFw"
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,57 +1,86 @@
1
  import streamlit as st
2
- import requests
3
-
4
- st.set_page_config(page_title="RagBot", layout="wide")
5
-
6
- # --- Sidebar: Upload files ---
7
- st.sidebar.header("📂 Upload Documents")
8
- uploaded = st.sidebar.file_uploader(
9
- "Choose PDF/TXT files",
10
- type=["pdf", "txt"],
11
- accept_multiple_files=True
12
- )
13
-
14
- if uploaded:
15
- # Show a little notification
16
- with st.spinner("Uploading and re-indexing…"):
17
- for f in uploaded:
18
- files = {"file": (f.name, f.getvalue())}
19
- res = requests.post(
20
- f"{st.secrets.backend_url}/upload",
21
- files=files
22
- )
23
- res.raise_for_status()
24
- # kick off reindex
25
- requests.post(f"{st.secrets.backend_url}/reindex")
26
- st.sidebar.success(f"Uploaded {len(uploaded)} file(s) and reindexed.")
27
-
28
- # --- Main chat interface ---
29
- st.title("🤖 RagBot")
30
-
31
- # keep chat history in session state
32
- if "history" not in st.session_state:
33
- st.session_state.history = []
34
-
35
- # show previous messages
36
- for role, text in st.session_state.history:
37
- align = "→" if role=="user" else "←"
38
- st.markdown(f"**{align} {role.capitalize()}**: {text}")
39
-
40
- # input your question
41
- question = st.text_input("Ask a question about your documents:")
42
-
43
- if st.button("Send") and question:
44
- st.session_state.history.append(("user", question))
45
- with st.spinner("Thinking…"):
46
- resp = requests.post(
47
- f"{st.secrets.backend_url}/qa",
48
- json={"question": question}
49
- )
50
- if resp.status_code == 200:
51
- answer = resp.json().get("answer")
52
- st.session_state.history.append(("assistant", answer))
53
  else:
54
- st.error("Failed to fetch answer.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
- # auto-scroll to bottom (hack)
57
- st.write("") # ensures the last message is visible
 
1
  import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ import docx
4
+ from sentence_transformers import SentenceTransformer
5
+ import faiss
6
+ from transformers import pipeline
7
+
8
+ # Caching heavy resources
9
+ @st.cache_resource
10
+ def load_models():
11
+ # Embedding model (lightweight)
12
+ embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
13
+ # QA model (distilled SQuAD)
14
+ qa = pipeline('question-answering', model='distilbert-base-uncased-distilled-squad')
15
+ return embedder, qa
16
+
17
+ # Extract text from uploaded file
18
+ def extract_text_from_file(uploaded_file):
19
+ name = uploaded_file.name.lower()
20
+ if name.endswith('.pdf'):
21
+ reader = PdfReader(uploaded_file)
22
+ text = ''.join(page.extract_text() or '' for page in reader.pages)
23
+ elif name.endswith('.docx'):
24
+ doc = docx.Document(uploaded_file)
25
+ text = '\n'.join(para.text for para in doc.paragraphs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  else:
27
+ text = uploaded_file.getvalue().decode('utf-8', errors='ignore')
28
+ return text
29
+
30
+ # Split text into chunks
31
+ def chunk_text(text, chunk_size=500, overlap=50):
32
+ words = text.split()
33
+ chunks = []
34
+ start = 0
35
+ while start < len(words):
36
+ end = min(start + chunk_size, len(words))
37
+ chunk = ' '.join(words[start:end])
38
+ chunks.append(chunk)
39
+ start += chunk_size - overlap
40
+ return chunks
41
+
42
+ # Build FAISS index from chunks
43
+ @st.cache_resource
44
+ def build_faiss_index(chunks, embedder):
45
+ embeddings = embedder.encode(chunks)
46
+ dim = embeddings.shape[1]
47
+ index = faiss.IndexFlatL2(dim)
48
+ index.add(embeddings)
49
+ return index
50
+
51
+ # Main Streamlit app
52
+ def main():
53
+ st.title('📄 Streamlit RAG: Document QA')
54
+ st.markdown('Upload a PDF or DOCX and ask questions about its content.')
55
+
56
+ uploaded = st.file_uploader('Upload Document', type=['pdf', 'docx', 'txt'], accept_multiple_files=False)
57
+ if uploaded:
58
+ with st.spinner('Extracting text...'):
59
+ text = extract_text_from_file(uploaded)
60
+ st.success('Text extracted!')
61
+
62
+ # Chunk and index
63
+ chunks = chunk_text(text)
64
+ embedder, qa = load_models()
65
+ index = build_faiss_index(chunks, embedder)
66
+
67
+ # Ask questions
68
+ question = st.text_input('Ask a question:')
69
+ if question:
70
+ with st.spinner('Searching relevant passages...'):
71
+ q_emb = embedder.encode([question])
72
+ D, I = index.search(q_emb, k=3)
73
+ context = '\n\n'.join(chunks[i] for i in I[0])
74
+
75
+ with st.spinner('Answering...'):
76
+ result = qa({'question': question, 'context': context})
77
+ answer = result.get('answer', 'Sorry, could not find an answer.')
78
+
79
+ st.write('**Answer:**', answer)
80
+ st.write('---')
81
+ st.write('**Context snippets:**')
82
+ for idx in I[0]:
83
+ st.write('- ', chunks[idx][:200].replace('\n', ' '), '...')
84
 
85
+ if __name__ == '__main__':
86
+ main()
requirements.txt CHANGED
@@ -1,3 +1,6 @@
1
- streamlit
2
- requests
3
- python-multipart # to handle uploads if you ever embed FastAPI inside Streamlit
 
 
 
 
1
+ streamlit
2
+ faiss-cpu
3
+ sentence-transformers
4
+ transformers
5
+ PyPDF2
6
+ python-docx