udituen commited on
Commit
8f83418
Β·
verified Β·
1 Parent(s): 090acc7

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +144 -62
src/streamlit_app.py CHANGED
@@ -1,11 +1,17 @@
1
  import streamlit as st
2
- from PyPDF2 import PdfReader
3
- import io
4
  from langchain_community.vectorstores import FAISS
5
  from langchain_community.embeddings import HuggingFaceEmbeddings
6
- from langchain.chains import RetrievalQA
7
  from langchain_community.llms import HuggingFacePipeline
 
8
  from transformers import pipeline
 
 
 
 
 
 
 
9
 
10
  # ----------------------
11
  # Sample Text Content
@@ -23,6 +29,7 @@ EXAMPLE_QUESTIONS = [
23
  "How does composting help farming?",
24
  ]
25
 
 
26
  def read_uploaded_file(uploaded_file):
27
  uploaded_file.seek(0)
28
 
@@ -41,14 +48,13 @@ def read_uploaded_file(uploaded_file):
41
  docs = [doc.strip() for doc in docs if doc.strip()]
42
  return docs
43
 
44
- # Load lightweight LLM - FIXED VERSION
45
  @st.cache_resource
46
  def load_llm():
47
- # Use text2text-generation for FLAN-T5
48
  pipe = pipeline(
49
- "text2text-generation", # ← Changed from text-generation
50
  model="google/flan-t5-small",
51
- max_length=256, # ← Changed from max_new_tokens
52
  temperature=0.7,
53
  top_p=0.95
54
  )
@@ -60,64 +66,140 @@ def build_retriever(docs):
60
  db = FAISS.from_texts(docs, embeddings)
61
  return db.as_retriever()
62
 
63
- # Streamlit UI
64
- st.title("DocsQA: Upload & Ask")
65
-
66
- st.markdown("Upload a text file and ask questions about its contents.")
67
-
68
- # Add sample file download button
69
- st.download_button(
70
- label="πŸ“„ Download Sample File",
71
- data=SAMPLE_TEXT,
72
- file_name="sample_agri.txt",
73
- mime="text/plain"
74
- )
75
 
76
- # Show example questions
77
- with st.expander("πŸ’‘ Try example questions"):
78
- for q in EXAMPLE_QUESTIONS:
79
- st.markdown(f"- {q}")
80
 
81
- uploaded_file = st.file_uploader("Upload your file", type=["txt", "pdf"])
82
 
83
- if uploaded_file is not None:
84
- st.write("πŸ“ Filename:", uploaded_file.name)
85
- st.write("πŸ“‹ File type:", uploaded_file.type)
86
 
87
- # Show preview for text files only
88
- if uploaded_file.type == "text/plain":
89
- uploaded_file.seek(0)
90
- file_content = uploaded_file.read()
91
- st.text_area("Content Preview", file_content.decode("utf-8"), height=200)
92
- else:
93
- st.info(f"πŸ“„ PDF uploaded: {uploaded_file.name}")
94
-
95
- query = st.text_input("Ask a question")
96
-
97
- if uploaded_file is not None:
98
- docs = read_uploaded_file(uploaded_file)
99
-
100
- if len(docs) > 0:
101
- retriever = build_retriever(docs)
102
- llm = load_llm()
103
- qa_chain = RetrievalQA.from_chain_type(
104
- llm=llm,
105
- retriever=retriever,
106
- return_source_documents=True # Optional: see source docs
107
- )
108
 
109
- if query:
110
- with st.spinner("Generating answer..."):
111
- result = qa_chain({"query": query})
112
-
113
- st.success(f"Answer: {result['result']}")
114
- st.write()
115
-
116
- # Show source documents
117
- with st.expander("πŸ“„ View source documents"):
118
- for i, doc in enumerate(result["source_documents"]):
119
- st.write(f"**Source {i+1}:** {doc.page_content}")
120
- else:
121
- st.error("No content found in file. Please check your file.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  else:
123
- st.info("Please upload a `.txt, .pdf` file or use the sample provided.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
 
 
2
  from langchain_community.vectorstores import FAISS
3
  from langchain_community.embeddings import HuggingFaceEmbeddings
4
+ from langchain.chains import ConversationalRetrievalChain
5
  from langchain_community.llms import HuggingFacePipeline
6
+ from langchain.memory import ConversationBufferMemory
7
  from transformers import pipeline
8
+ import io
9
+
10
+ # For PDF processing
11
+ try:
12
+ from pypdf import PdfReader
13
+ except ImportError:
14
+ from PyPDF2 import PdfReader
15
 
16
  # ----------------------
17
  # Sample Text Content
 
29
  "How does composting help farming?",
30
  ]
31
 
32
+ # Helper: Read uploaded file (TXT or PDF)
33
  def read_uploaded_file(uploaded_file):
34
  uploaded_file.seek(0)
35
 
 
48
  docs = [doc.strip() for doc in docs if doc.strip()]
49
  return docs
50
 
51
+ # Load lightweight LLM
52
  @st.cache_resource
53
  def load_llm():
 
54
  pipe = pipeline(
55
+ "text2text-generation",
56
  model="google/flan-t5-small",
57
+ max_length=256,
58
  temperature=0.7,
59
  top_p=0.95
60
  )
 
66
  db = FAISS.from_texts(docs, embeddings)
67
  return db.as_retriever()
68
 
69
+ # Initialize session state
70
+ if 'chat_history' not in st.session_state:
71
+ st.session_state.chat_history = []
72
+ if 'qa_chain' not in st.session_state:
73
+ st.session_state.qa_chain = None
74
+ if 'document_processed' not in st.session_state:
75
+ st.session_state.document_processed = False
 
 
 
 
 
76
 
77
+ # Streamlit UI
78
+ st.title("πŸ’¬ DocsQA: Chat with Your Document")
 
 
79
 
80
+ st.markdown("Upload a document and have a conversation about its contents!")
81
 
82
+ # Sidebar for document upload
83
+ with st.sidebar:
84
+ st.header("πŸ“„ Document Upload")
85
 
86
+ # Add sample file download button
87
+ st.download_button(
88
+ label="πŸ“₯ Download Sample File",
89
+ data=SAMPLE_TEXT,
90
+ file_name="sample_agri.txt",
91
+ mime="text/plain"
92
+ )
93
+
94
+ uploaded_file = st.file_uploader("Upload your file", type=["txt", "pdf"])
95
+
96
+ if uploaded_file is not None:
97
+ st.success(f"βœ… {uploaded_file.name}")
 
 
 
 
 
 
 
 
 
98
 
99
+ # Process document button
100
+ if st.button("πŸ”„ Process Document", type="primary"):
101
+ with st.spinner("Processing document..."):
102
+ try:
103
+ docs = read_uploaded_file(uploaded_file)
104
+
105
+ if len(docs) > 0:
106
+ retriever = build_retriever(docs)
107
+ llm = load_llm()
108
+
109
+ # Create conversational chain with memory
110
+ memory = ConversationBufferMemory(
111
+ memory_key="chat_history",
112
+ return_messages=True,
113
+ output_key="answer"
114
+ )
115
+
116
+ st.session_state.qa_chain = ConversationalRetrievalChain.from_llm(
117
+ llm=llm,
118
+ retriever=retriever,
119
+ memory=memory,
120
+ return_source_documents=True
121
+ )
122
+
123
+ st.session_state.document_processed = True
124
+ st.session_state.chat_history = []
125
+ st.success(f"βœ… Processed {len(docs)} text chunks!")
126
+ st.rerun()
127
+ else:
128
+ st.error("No content found in file.")
129
+
130
+ except Exception as e:
131
+ st.error(f"Error: {str(e)}")
132
+
133
+ # Show example questions
134
+ if st.session_state.document_processed:
135
+ st.markdown("---")
136
+ st.subheader("πŸ’‘ Example Questions")
137
+ for q in EXAMPLE_QUESTIONS:
138
+ if st.button(q, key=f"example_{q}"):
139
+ st.session_state.user_input = q
140
+ st.rerun()
141
+
142
+ # Clear chat button
143
+ if st.session_state.chat_history:
144
+ st.markdown("---")
145
+ if st.button("πŸ—‘οΈ Clear Chat History"):
146
+ st.session_state.chat_history = []
147
+ st.rerun()
148
+
149
+ # Main chat interface
150
+ if not st.session_state.document_processed:
151
+ st.info("πŸ‘ˆ Please upload a document in the sidebar and click 'Process Document' to start chatting!")
152
  else:
153
+ # Display chat history
154
+ for message in st.session_state.chat_history:
155
+ with st.chat_message(message["role"]):
156
+ st.markdown(message["content"])
157
+
158
+ # Show sources if available
159
+ if message["role"] == "assistant" and "sources" in message:
160
+ with st.expander("πŸ“š View Sources"):
161
+ for i, source in enumerate(message["sources"]):
162
+ st.markdown(f"**Source {i+1}:** {source}")
163
+
164
+ # Chat input
165
+ if prompt := st.chat_input("Ask a question about your document..."):
166
+ # Add user message to chat history
167
+ st.session_state.chat_history.append({"role": "user", "content": prompt})
168
+
169
+ # Display user message
170
+ with st.chat_message("user"):
171
+ st.markdown(prompt)
172
+
173
+ # Generate response
174
+ with st.chat_message("assistant"):
175
+ with st.spinner("Thinking..."):
176
+ try:
177
+ result = st.session_state.qa_chain({
178
+ "question": prompt
179
+ })
180
+
181
+ answer = result["answer"]
182
+ sources = [doc.page_content for doc in result.get("source_documents", [])]
183
+
184
+ st.markdown(answer)
185
+
186
+ # Show sources
187
+ if sources:
188
+ with st.expander("πŸ“š View Sources"):
189
+ for i, source in enumerate(sources):
190
+ st.markdown(f"**Source {i+1}:** {source}")
191
+
192
+ # Add assistant message to chat history
193
+ st.session_state.chat_history.append({
194
+ "role": "assistant",
195
+ "content": answer,
196
+ "sources": sources
197
+ })
198
+
199
+ except Exception as e:
200
+ error_msg = f"Sorry, I encountered an error: {str(e)}"
201
+ st.error(error_msg)
202
+ st.session_state.chat_history.append({
203
+ "role": "assistant",
204
+ "content": error_msg
205
+ })