udituen commited on
Commit
ce50c84
Β·
2 Parent(s): 362de84 c1d3591

modify dockerfile

Browse files
Files changed (2) hide show
  1. README.md +17 -0
  2. app_archive.py +0 -222
README.md CHANGED
@@ -13,3 +13,20 @@ short_description: Upload a document and ask questions based on its content
13
 
14
  # Welcome to DocsQA!
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  # Welcome to DocsQA!
15
 
16
+
17
+ ```
18
+ project/
19
+ β”œβ”€β”€ app.py # Main entry point
20
+ β”œβ”€β”€ config.py # Configuration settings
21
+ β”œβ”€β”€ utils/
22
+ β”‚ └── document_processor.py # Document reading & processing
23
+ β”œβ”€β”€ models/
24
+ β”‚ β”œβ”€β”€ llm_loader.py # Qwen LLM loading
25
+ β”‚ └── retriever.py # FAISS retriever setup
26
+ β”œβ”€β”€ chains/
27
+ β”‚ └── qa_chain.py # QA chain creation
28
+ └── ui/
29
+ β”œβ”€β”€ sidebar.py # Sidebar components
30
+ └── chat.py # Chat interface
31
+ ```
32
+
app_archive.py DELETED
@@ -1,222 +0,0 @@
1
- import streamlit as st
2
- from langchain_community.vectorstores import FAISS
3
- from langchain_community.embeddings import HuggingFaceEmbeddings
4
- from langchain.chains import ConversationalRetrievalChain
5
- from langchain_community.llms import HuggingFacePipeline
6
- from langchain.memory import ConversationBufferMemory
7
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
8
- import torch
9
- import io
10
-
11
- # For PDF processing
12
- try:
13
- from pypdf import PdfReader
14
- except ImportError:
15
- from PyPDF2 import PdfReader
16
-
17
- # ----------------------
18
- # Sample Text Content
19
- # ----------------------
20
- SAMPLE_TEXT = """Fertilizers help improve soil nutrients and crop yield.
21
- Irrigation methods vary depending on climate and crop type.
22
- Crop rotation can enhance soil health and reduce pests.
23
- Composting is an organic way to enrich the soil.
24
- Weed management is essential for higher productivity."""
25
-
26
- EXAMPLE_QUESTIONS = [
27
- "What is this document about?",
28
- "What is the role of fertilizers in agriculture?",
29
- "Why is crop rotation important?",
30
- "How does composting help farming?",
31
- ]
32
-
33
- # Helper: Read uploaded file (TXT or PDF)
34
- def read_uploaded_file(uploaded_file):
35
- uploaded_file.seek(0)
36
-
37
- if uploaded_file.type == "application/pdf":
38
- # Handle PDF files
39
- pdf_reader = PdfReader(io.BytesIO(uploaded_file.read()))
40
- text = ""
41
- for page in pdf_reader.pages:
42
- text += page.extract_text() + "\n"
43
- else:
44
- # Handle text files
45
- text = uploaded_file.read().decode("utf-8")
46
-
47
- # Split into chunks by lines
48
- docs = text.split("\n")
49
- docs = [doc.strip() for doc in docs if doc.strip()]
50
- return docs
51
-
52
- # Load Qwen LLM
53
- @st.cache_resource
54
- def load_llm():
55
- model_name = "Qwen/Qwen2.5-1.5B-Instruct" # Using smaller Qwen model for efficiency
56
-
57
- # Load tokenizer and model
58
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
59
- model = AutoModelForCausalLM.from_pretrained(
60
- model_name,
61
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
62
- device_map="auto" if torch.cuda.is_available() else None,
63
- trust_remote_code=True
64
- )
65
-
66
- # Create pipeline
67
- pipe = pipeline(
68
- "text-generation",
69
- model=model,
70
- tokenizer=tokenizer,
71
- max_new_tokens=256,
72
- temperature=0.7,
73
- top_p=0.95,
74
- do_sample=True,
75
- return_full_text=False
76
- )
77
-
78
- return HuggingFacePipeline(pipeline=pipe)
79
-
80
- # Build retriever from uploaded content
81
- def build_retriever(docs):
82
- embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
83
- db = FAISS.from_texts(docs, embeddings)
84
- return db.as_retriever()
85
-
86
- # Initialize session state
87
- if 'chat_history' not in st.session_state:
88
- st.session_state.chat_history = []
89
- if 'qa_chain' not in st.session_state:
90
- st.session_state.qa_chain = None
91
- if 'document_processed' not in st.session_state:
92
- st.session_state.document_processed = False
93
-
94
- # Streamlit UI
95
- st.title("DocsQA: Chat with Your Document")
96
-
97
- st.markdown("Upload a document and have a conversation about its contents! (Powered by Qwen)")
98
-
99
- # Sidebar for document upload
100
- with st.sidebar:
101
- st.header("πŸ“„ Document Upload")
102
-
103
- # Add sample file download button
104
- st.download_button(
105
- label="πŸ“„ Download Sample File",
106
- data=SAMPLE_TEXT,
107
- file_name="sample_agri.txt",
108
- mime="text/plain"
109
- )
110
-
111
- uploaded_file = st.file_uploader("Upload your file", type=["txt", "pdf"])
112
-
113
- if uploaded_file is not None:
114
- st.success(f"{uploaded_file.name}")
115
-
116
- # Process document button
117
- if st.button("Process Document", type="primary"):
118
- with st.spinner("Processing document..."):
119
- try:
120
- docs = read_uploaded_file(uploaded_file)
121
-
122
- if len(docs) > 0:
123
- retriever = build_retriever(docs)
124
- llm = load_llm()
125
-
126
- # Create conversational chain with memory
127
- memory = ConversationBufferMemory(
128
- memory_key="chat_history",
129
- return_messages=True,
130
- output_key="answer"
131
- )
132
-
133
- st.session_state.qa_chain = ConversationalRetrievalChain.from_llm(
134
- llm=llm,
135
- retriever=retriever,
136
- memory=memory,
137
- return_source_documents=True
138
- )
139
-
140
- st.session_state.document_processed = True
141
- st.session_state.chat_history = []
142
- st.success(f"Processed {len(docs)} text chunks!")
143
- st.rerun()
144
- else:
145
- st.error("No content found in file.")
146
-
147
- except Exception as e:
148
- st.error(f"Error: {str(e)}")
149
-
150
- # Show example questions
151
- if st.session_state.document_processed:
152
- st.markdown("---")
153
- st.subheader("πŸ’‘ Example Questions")
154
- for q in EXAMPLE_QUESTIONS:
155
- if st.button(q, key=f"example_{q}"):
156
- st.session_state.user_input = q
157
- st.rerun()
158
-
159
- # Clear chat button
160
- if st.session_state.chat_history:
161
- st.markdown("---")
162
- if st.button("πŸ—‘οΈ Clear Chat History"):
163
- st.session_state.chat_history = []
164
- st.rerun()
165
-
166
- # Main chat interface
167
- if not st.session_state.document_processed:
168
- st.info("πŸ‘ˆ Please upload a document in the sidebar and click 'Process Document' to start chatting!")
169
- else:
170
- # Display chat history
171
- for message in st.session_state.chat_history:
172
- with st.chat_message(message["role"]):
173
- st.markdown(message["content"])
174
-
175
- # Show sources if available
176
- if message["role"] == "assistant" and "sources" in message:
177
- with st.expander("πŸ“š View Sources"):
178
- for i, source in enumerate(message["sources"]):
179
- st.markdown(f"**Source {i+1}:** {source}")
180
-
181
- # Chat input
182
- if prompt := st.chat_input("Ask a question about your document..."):
183
- # Add user message to chat history
184
- st.session_state.chat_history.append({"role": "user", "content": prompt})
185
-
186
- # Display user message
187
- with st.chat_message("user"):
188
- st.markdown(prompt)
189
-
190
- # Generate response
191
- with st.chat_message("assistant"):
192
- with st.spinner("Thinking..."):
193
- try:
194
- result = st.session_state.qa_chain({
195
- "question": prompt
196
- })
197
-
198
- answer = result["answer"]
199
- sources = [doc.page_content for doc in result.get("source_documents", [])]
200
-
201
- st.markdown(answer)
202
-
203
- # Show sources
204
- if sources:
205
- with st.expander("πŸ“š View Sources"):
206
- for i, source in enumerate(sources):
207
- st.markdown(f"**Source {i+1}:** {source}")
208
-
209
- # Add assistant message to chat history
210
- st.session_state.chat_history.append({
211
- "role": "assistant",
212
- "content": answer,
213
- "sources": sources
214
- })
215
-
216
- except Exception as e:
217
- error_msg = f"Sorry, I encountered an error: {str(e)}"
218
- st.error(error_msg)
219
- st.session_state.chat_history.append({
220
- "role": "assistant",
221
- "content": error_msg
222
- })