Spaces:

sourize
/

RagBot

Sleeping

App Files Files Community

sourize commited on Apr 20

Commit

06b2acd

1 Parent(s): e07c00d

Updated main.py

Browse files

Files changed (1) hide show

app.py +37 -24

app.py CHANGED Viewed

@@ -34,8 +34,7 @@ def chunk_text(text, chunk_size=500, overlap=50):
     start = 0
     while start < len(words):
         end = min(start + chunk_size, len(words))
-        chunk = ' '.join(words[start:end])
-        chunks.append(chunk)
         start += chunk_size - overlap
     return chunks
@@ -50,37 +49,51 @@ def build_faiss_index(chunks, embedder):
 # Main Streamlit app
 def main():
-    st.title('📄 Streamlit RAG: Document QA')
-    st.markdown('Upload a PDF or DOCX and ask questions about its content.')
-    uploaded = st.file_uploader('Upload Document', type=['pdf', 'docx', 'txt'], accept_multiple_files=False)
     if uploaded:
-        with st.spinner('Extracting text...'):
-            text = extract_text_from_file(uploaded)
-        st.success('Text extracted!')
-        # Chunk and index
         chunks = chunk_text(text)
         embedder, qa = load_models()
         index = build_faiss_index(chunks, embedder)
-        # Ask questions
-        question = st.text_input('Ask a question:')
         if question:
-            with st.spinner('Searching relevant passages...'):
-                q_emb = embedder.encode([question])
-                D, I = index.search(q_emb, k=3)
-                context = '\n\n'.join(chunks[i] for i in I[0])
-            with st.spinner('Answering...'):
-                result = qa({'question': question, 'context': context})
-                answer = result.get('answer', 'Sorry, could not find an answer.')
-            st.write('**Answer:**', answer)
-            st.write('---')
-            st.write('**Context snippets:**')
-            for idx in I[0]:
-                st.write('- ', chunks[idx][:200].replace('\n', ' '), '...')
 if __name__ == '__main__':
     main()

     start = 0
     while start < len(words):
         end = min(start + chunk_size, len(words))
+        chunks.append(' '.join(words[start:end]))
         start += chunk_size - overlap
     return chunks
 # Main Streamlit app
 def main():
+    st.set_page_config(page_title='📄 RAGbot', layout='wide')
+    st.title('🤖 RagBot')
+    st.sidebar.header('Upload Documents')
+    # Initialize chat history in session state
+    if 'history' not in st.session_state:
+        st.session_state.history = []
+    uploaded = st.sidebar.file_uploader('Upload PDF/DOCX/TXT', type=['pdf', 'docx', 'txt'])
     if uploaded:
+        text = extract_text_from_file(uploaded)
         chunks = chunk_text(text)
         embedder, qa = load_models()
         index = build_faiss_index(chunks, embedder)
+        # Display existing chat history
+        for chat in st.session_state.history:
+            with st.chat_message('user'):
+                st.markdown(f"**You:** {chat['question']}")
+            with st.chat_message('assistant'):
+                st.markdown(f"**RagBot:** {chat['answer']}")
+        # Chat input
+        question = st.chat_input('Ask a question about the document...')
         if question:
+            # Retrieve top-k relevant chunks
+            q_emb = embedder.encode([question])
+            D, I = index.search(q_emb, k=3)
+            context = '\n\n'.join(chunks[i] for i in I[0])
+            # Get answer
+            result = qa({'question': question, 'context': context})
+            answer = result.get('answer', 'Sorry, could not find an answer.')
+            # Save to history
+            st.session_state.history.append({'question': question, 'answer': answer})
+            # Display new messages
+            with st.chat_message('user'):
+                st.markdown(f"**You:** {question}")
+            with st.chat_message('assistant'):
+                st.markdown(f"**RagBot:** {answer}")
+    else:
+        st.info('Please upload a document in the sidebar to begin.')
 if __name__ == '__main__':
     main()