sourize commited on
Commit
06b2acd
Β·
1 Parent(s): e07c00d

Updated main.py

Browse files
Files changed (1) hide show
  1. app.py +37 -24
app.py CHANGED
@@ -34,8 +34,7 @@ def chunk_text(text, chunk_size=500, overlap=50):
34
  start = 0
35
  while start < len(words):
36
  end = min(start + chunk_size, len(words))
37
- chunk = ' '.join(words[start:end])
38
- chunks.append(chunk)
39
  start += chunk_size - overlap
40
  return chunks
41
 
@@ -50,37 +49,51 @@ def build_faiss_index(chunks, embedder):
50
 
51
  # Main Streamlit app
52
  def main():
53
- st.title('πŸ“„ Streamlit RAG: Document QA')
54
- st.markdown('Upload a PDF or DOCX and ask questions about its content.')
 
 
 
 
 
55
 
56
- uploaded = st.file_uploader('Upload Document', type=['pdf', 'docx', 'txt'], accept_multiple_files=False)
57
  if uploaded:
58
- with st.spinner('Extracting text...'):
59
- text = extract_text_from_file(uploaded)
60
- st.success('Text extracted!')
61
-
62
- # Chunk and index
63
  chunks = chunk_text(text)
64
  embedder, qa = load_models()
65
  index = build_faiss_index(chunks, embedder)
66
 
67
- # Ask questions
68
- question = st.text_input('Ask a question:')
 
 
 
 
 
 
 
69
  if question:
70
- with st.spinner('Searching relevant passages...'):
71
- q_emb = embedder.encode([question])
72
- D, I = index.search(q_emb, k=3)
73
- context = '\n\n'.join(chunks[i] for i in I[0])
 
 
 
 
74
 
75
- with st.spinner('Answering...'):
76
- result = qa({'question': question, 'context': context})
77
- answer = result.get('answer', 'Sorry, could not find an answer.')
78
 
79
- st.write('**Answer:**', answer)
80
- st.write('---')
81
- st.write('**Context snippets:**')
82
- for idx in I[0]:
83
- st.write('- ', chunks[idx][:200].replace('\n', ' '), '...')
 
 
 
84
 
85
  if __name__ == '__main__':
86
  main()
 
34
  start = 0
35
  while start < len(words):
36
  end = min(start + chunk_size, len(words))
37
+ chunks.append(' '.join(words[start:end]))
 
38
  start += chunk_size - overlap
39
  return chunks
40
 
 
49
 
50
  # Main Streamlit app
51
  def main():
52
+ st.set_page_config(page_title='πŸ“„ RAGbot', layout='wide')
53
+ st.title('πŸ€– RagBot')
54
+ st.sidebar.header('Upload Documents')
55
+
56
+ # Initialize chat history in session state
57
+ if 'history' not in st.session_state:
58
+ st.session_state.history = []
59
 
60
+ uploaded = st.sidebar.file_uploader('Upload PDF/DOCX/TXT', type=['pdf', 'docx', 'txt'])
61
  if uploaded:
62
+ text = extract_text_from_file(uploaded)
 
 
 
 
63
  chunks = chunk_text(text)
64
  embedder, qa = load_models()
65
  index = build_faiss_index(chunks, embedder)
66
 
67
+ # Display existing chat history
68
+ for chat in st.session_state.history:
69
+ with st.chat_message('user'):
70
+ st.markdown(f"**You:** {chat['question']}")
71
+ with st.chat_message('assistant'):
72
+ st.markdown(f"**RagBot:** {chat['answer']}")
73
+
74
+ # Chat input
75
+ question = st.chat_input('Ask a question about the document...')
76
  if question:
77
+ # Retrieve top-k relevant chunks
78
+ q_emb = embedder.encode([question])
79
+ D, I = index.search(q_emb, k=3)
80
+ context = '\n\n'.join(chunks[i] for i in I[0])
81
+
82
+ # Get answer
83
+ result = qa({'question': question, 'context': context})
84
+ answer = result.get('answer', 'Sorry, could not find an answer.')
85
 
86
+ # Save to history
87
+ st.session_state.history.append({'question': question, 'answer': answer})
 
88
 
89
+ # Display new messages
90
+ with st.chat_message('user'):
91
+ st.markdown(f"**You:** {question}")
92
+ with st.chat_message('assistant'):
93
+ st.markdown(f"**RagBot:** {answer}")
94
+
95
+ else:
96
+ st.info('Please upload a document in the sidebar to begin.')
97
 
98
  if __name__ == '__main__':
99
  main()