MnM369 commited on
Commit
a66b1aa
Β·
verified Β·
1 Parent(s): ec4354c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +334 -334
app.py CHANGED
@@ -1,335 +1,335 @@
1
- # ========== Standard Library ==========
2
- import os
3
- import tempfile
4
- import zipfile
5
- from typing import List, Optional, Tuple, Union
6
- import collections
7
-
8
-
9
- # ========== Third-Party Libraries ==========
10
- import gradio as gr
11
- from groq import Groq
12
- from langchain.text_splitter import RecursiveCharacterTextSplitter
13
- from langchain_community.document_loaders import DirectoryLoader, UnstructuredFileLoader
14
- from langchain_core.output_parsers import StrOutputParser
15
- from langchain_core.prompts import PromptTemplate
16
- from langchain_core.runnables import RunnablePassthrough
17
- from langchain_core.vectorstores import InMemoryVectorStore
18
- from langchain_groq import ChatGroq
19
- from langchain_huggingface import HuggingFaceEmbeddings
20
-
21
- # ========== Configs ==========
22
- TITLE = """<h1 align="center">πŸ—¨οΈπŸ¦™ Llama 4 Docx Chatter</h1>"""
23
- AVATAR_IMAGES = (
24
- None,
25
- "./logo.jpg",
26
- )
27
-
28
- # Acceptable file extensions
29
- TEXT_EXTENSIONS = [".docx", ".zip"]
30
-
31
- # ========== Models & Clients ==========
32
- GROQ_API_KEY = os.getenv("GROQ_API_KEY")
33
- #GROQ_API_KEY = "Your API Key Here
34
- client = Groq(api_key=GROQ_API_KEY)
35
- llm = ChatGroq(model="meta-llama/llama-4-scout-17b-16e-instruct", api_key=GROQ_API_KEY)
36
- embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
37
-
38
- # ========== Core Components ==========
39
- text_splitter = RecursiveCharacterTextSplitter(
40
- chunk_size=1000,
41
- chunk_overlap=100,
42
- separators=["\n\n", "\n"],
43
- )
44
-
45
- rag_template = """You are an expert assistant tasked with answering questions based on the provided documents.
46
- Use only the given context to generate your answer.
47
- If the answer cannot be found in the context, clearly state that you do not know.
48
- Be detailed and precise in your response, but avoid mentioning or referencing the context itself.
49
- Context:
50
- {context}
51
- Question:
52
- {question}
53
- Answer:"""
54
- rag_prompt = PromptTemplate.from_template(rag_template)
55
-
56
-
57
- # ========== App State ==========
58
- class AppState:
59
- vectorstore: Optional[InMemoryVectorStore] = None
60
- rag_chain = None
61
-
62
-
63
- state = AppState()
64
-
65
- # ========== Utility Functions ==========
66
-
67
-
68
- def load_documents_from_files(files: List[str]) -> List:
69
- """Load documents from uploaded files directly without moving."""
70
- all_documents = []
71
-
72
- # Temporary directory if ZIP needs extraction
73
- with tempfile.TemporaryDirectory() as temp_dir:
74
- for file_path in files:
75
- ext = os.path.splitext(file_path)[1].lower()
76
-
77
- if ext == ".zip":
78
- # Extract ZIP inside temp_dir
79
- with zipfile.ZipFile(file_path, "r") as zip_ref:
80
- zip_ref.extractall(temp_dir)
81
-
82
- # Load all docx from extracted zip
83
- loader = DirectoryLoader(
84
- path=temp_dir,
85
- glob="**/*.docx",
86
- use_multithreading=True,
87
- )
88
- docs = loader.load()
89
- all_documents.extend(docs)
90
-
91
- elif ext == ".docx":
92
- # Load single docx directly
93
- loader = UnstructuredFileLoader(file_path)
94
- docs = loader.load()
95
- all_documents.extend(docs)
96
-
97
- return all_documents
98
-
99
-
100
- def get_last_user_message(chatbot: List[Union[gr.ChatMessage, dict]]) -> Optional[str]:
101
- """Get last user prompt."""
102
- for message in reversed(chatbot):
103
- content = (
104
- message.get("content") if isinstance(message, dict) else message.content
105
- )
106
- if (
107
- message.get("role") if isinstance(message, dict) else message.role
108
- ) == "user":
109
- return content
110
- return None
111
-
112
-
113
- # ========== Main Logic ==========
114
-
115
-
116
-
117
-
118
- def upload_files(
119
- files: Optional[List[str]], chatbot: List[Union[gr.ChatMessage, dict]]
120
- ):
121
- """Handle file upload - .docx or .zip containing docx."""
122
- if not files:
123
- return chatbot
124
-
125
- file_summaries = [] # <-- Collect formatted file/folder info
126
- documents = []
127
-
128
- with tempfile.TemporaryDirectory() as temp_dir:
129
- for file_path in files:
130
- filename = os.path.basename(file_path)
131
- ext = os.path.splitext(file_path)[1].lower()
132
-
133
- if ext == ".zip":
134
- file_summaries.append(f"πŸ“¦ **{filename}** (ZIP file) contains:")
135
- try:
136
- with zipfile.ZipFile(file_path, "r") as zip_ref:
137
- zip_ref.extractall(temp_dir)
138
- zip_contents = zip_ref.namelist()
139
-
140
- # Group files by folder
141
- folder_map = collections.defaultdict(list)
142
- for item in zip_contents:
143
- if item.endswith("/"):
144
- continue # skip folder entries themselves
145
- folder = os.path.dirname(item)
146
- file_name = os.path.basename(item)
147
- folder_map[folder].append(file_name)
148
-
149
- # Format nicely
150
- for folder, files_in_folder in folder_map.items():
151
- if folder:
152
- file_summaries.append(f"πŸ“‚ {folder}/")
153
- else:
154
- file_summaries.append(f"πŸ“„ (root)")
155
- for f in files_in_folder:
156
- file_summaries.append(f" - {f}")
157
-
158
- # Load docx files extracted from ZIP
159
- loader = DirectoryLoader(
160
- path=temp_dir,
161
- glob="**/*.docx",
162
- use_multithreading=True,
163
- )
164
- docs = loader.load()
165
- documents.extend(docs)
166
-
167
- except zipfile.BadZipFile:
168
- chatbot.append(
169
- gr.ChatMessage(
170
- role="assistant",
171
- content=f"❌ Failed to open ZIP file: {filename}",
172
- )
173
- )
174
-
175
- elif ext == ".docx":
176
- file_summaries.append(f"πŸ“„ **{filename}**")
177
- loader = UnstructuredFileLoader(file_path)
178
- docs = loader.load()
179
- documents.extend(docs)
180
-
181
- else:
182
- file_summaries.append(f"❌ Unsupported file type: {filename}")
183
-
184
- if not documents:
185
- chatbot.append(
186
- gr.ChatMessage(
187
- role="assistant", content="No valid .docx files found in upload."
188
- )
189
- )
190
- return chatbot
191
-
192
- # Split documents
193
- chunks = text_splitter.split_documents(documents)
194
- if not chunks:
195
- chatbot.append(
196
- gr.ChatMessage(
197
- role="assistant", content="Failed to split documents into chunks."
198
- )
199
- )
200
- return chatbot
201
-
202
- # Create Vectorstore
203
- state.vectorstore = InMemoryVectorStore.from_documents(
204
- documents=chunks,
205
- embedding=embed_model,
206
- )
207
- retriever = state.vectorstore.as_retriever()
208
-
209
- # Build RAG Chain
210
- state.rag_chain = (
211
- {"context": retriever, "question": RunnablePassthrough()}
212
- | rag_prompt
213
- | llm
214
- | StrOutputParser()
215
- )
216
-
217
- # Final display
218
- chatbot.append(
219
- gr.ChatMessage(
220
- role="assistant",
221
- content="**Uploaded Files:**\n"
222
- + "\n".join(file_summaries)
223
- + "\n\nβœ… Ready to chat!",
224
- )
225
- )
226
- return chatbot
227
-
228
-
229
- def user_message(
230
- text_prompt: str, chatbot: List[Union[gr.ChatMessage, dict]]
231
- ) -> Tuple[str, List[Union[gr.ChatMessage, dict]]]:
232
- """Add user's text input to conversation."""
233
- if text_prompt.strip():
234
- chatbot.append(gr.ChatMessage(role="user", content=text_prompt))
235
- return "", chatbot
236
-
237
-
238
- def process_query(
239
- chatbot: List[Union[gr.ChatMessage, dict]],
240
- ) -> List[Union[gr.ChatMessage, dict]]:
241
- """Process user's query through RAG pipeline."""
242
- prompt = get_last_user_message(chatbot)
243
- if not prompt:
244
- chatbot.append(
245
- gr.ChatMessage(role="assistant", content="Please type a question first.")
246
- )
247
- return chatbot
248
-
249
- if state.rag_chain is None:
250
- chatbot.append(
251
- gr.ChatMessage(role="assistant", content="Please upload documents first.")
252
- )
253
- return chatbot
254
-
255
- chatbot.append(gr.ChatMessage(role="assistant", content="Thinking..."))
256
-
257
- try:
258
- response = state.rag_chain.invoke(prompt)
259
- chatbot[-1].content = response
260
- except Exception as e:
261
- chatbot[-1].content = f"Error: {str(e)}"
262
-
263
- return chatbot
264
-
265
-
266
- def reset_app(
267
- chatbot: List[Union[gr.ChatMessage, dict]],
268
- ) -> List[Union[gr.ChatMessage, dict]]:
269
- """Reset application state."""
270
- state.vectorstore = None
271
- state.rag_chain = None
272
- return [
273
- gr.ChatMessage(
274
- role="assistant", content="App reset! Upload new documents to start."
275
- )
276
- ]
277
-
278
-
279
- # ========== UI Layout ==========
280
-
281
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
282
- gr.HTML(TITLE)
283
- chatbot = gr.Chatbot(
284
- label="Llama 4 RAG",
285
- type="messages",
286
- bubble_full_width=False,
287
- avatar_images=AVATAR_IMAGES,
288
- scale=2,
289
- height=350,
290
- )
291
-
292
- with gr.Row(equal_height=True):
293
- text_prompt = gr.Textbox(
294
- placeholder="Ask a question...", show_label=False, autofocus=True, scale=28
295
- )
296
- send_button = gr.Button(
297
- value="Send",
298
- variant="primary",
299
- scale=1,
300
- min_width=80,
301
- )
302
- upload_button = gr.UploadButton(
303
- label="Upload",
304
- file_count="multiple",
305
- file_types=TEXT_EXTENSIONS,
306
- scale=1,
307
- min_width=80,
308
- )
309
- reset_button = gr.Button(
310
- value="Reset",
311
- variant="stop",
312
- scale=1,
313
- min_width=80,
314
- )
315
-
316
- send_button.click(
317
- fn=user_message,
318
- inputs=[text_prompt, chatbot],
319
- outputs=[text_prompt, chatbot],
320
- queue=False,
321
- ).then(fn=process_query, inputs=[chatbot], outputs=[chatbot])
322
-
323
- text_prompt.submit(
324
- fn=user_message,
325
- inputs=[text_prompt, chatbot],
326
- outputs=[text_prompt, chatbot],
327
- queue=False,
328
- ).then(fn=process_query, inputs=[chatbot], outputs=[chatbot])
329
-
330
- upload_button.upload(
331
- fn=upload_files, inputs=[upload_button, chatbot], outputs=[chatbot], queue=False
332
- )
333
- reset_button.click(fn=reset_app, inputs=[chatbot], outputs=[chatbot], queue=False)
334
-
335
  demo.queue().launch(share=True)
 
1
+ # ========== Standard Library ==========
2
+ import os
3
+ import tempfile
4
+ import zipfile
5
+ from typing import List, Optional, Tuple, Union
6
+ import collections
7
+
8
+
9
+ # ========== Third-Party Libraries ==========
10
+ import gradio as gr
11
+ from groq import Groq
12
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
13
+ from langchain_community.document_loaders import DirectoryLoader, UnstructuredFileLoader
14
+ from langchain_core.output_parsers import StrOutputParser
15
+ from langchain_core.prompts import PromptTemplate
16
+ from langchain_core.runnables import RunnablePassthrough
17
+ from langchain_core.vectorstores import InMemoryVectorStore
18
+ from langchain_groq import ChatGroq
19
+ from langchain_huggingface import HuggingFaceEmbeddings
20
+
21
+ # ========== Configs ==========
22
+ TITLE = """<h1 align="center">πŸ—¨οΈπŸ¦™ Llama 4 Docx Chatter</h1>"""
23
+ AVATAR_IMAGES = (
24
+ None,
25
+ "./logo.jpg",
26
+ )
27
+
28
+ # Acceptable file extensions
29
+ TEXT_EXTENSIONS = [".docx", ".zip"]
30
+
31
+ # ========== Models & Clients ==========
32
+ # GROQ_API_KEY = os.getenv("GROQ_API_KEY")
33
+ GROQ_API_KEY = "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"
34
+ client = Groq(api_key=GROQ_API_KEY)
35
+ llm = ChatGroq(model="meta-llama/llama-4-scout-17b-16e-instruct", api_key=GROQ_API_KEY)
36
+ embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
37
+
38
+ # ========== Core Components ==========
39
+ text_splitter = RecursiveCharacterTextSplitter(
40
+ chunk_size=1000,
41
+ chunk_overlap=100,
42
+ separators=["\n\n", "\n"],
43
+ )
44
+
45
+ rag_template = """You are an expert assistant tasked with answering questions based on the provided documents.
46
+ Use only the given context to generate your answer.
47
+ If the answer cannot be found in the context, clearly state that you do not know.
48
+ Be detailed and precise in your response, but avoid mentioning or referencing the context itself.
49
+ Context:
50
+ {context}
51
+ Question:
52
+ {question}
53
+ Answer:"""
54
+ rag_prompt = PromptTemplate.from_template(rag_template)
55
+
56
+
57
+ # ========== App State ==========
58
+ class AppState:
59
+ vectorstore: Optional[InMemoryVectorStore] = None
60
+ rag_chain = None
61
+
62
+
63
+ state = AppState()
64
+
65
+ # ========== Utility Functions ==========
66
+
67
+
68
+ def load_documents_from_files(files: List[str]) -> List:
69
+ """Load documents from uploaded files directly without moving."""
70
+ all_documents = []
71
+
72
+ # Temporary directory if ZIP needs extraction
73
+ with tempfile.TemporaryDirectory() as temp_dir:
74
+ for file_path in files:
75
+ ext = os.path.splitext(file_path)[1].lower()
76
+
77
+ if ext == ".zip":
78
+ # Extract ZIP inside temp_dir
79
+ with zipfile.ZipFile(file_path, "r") as zip_ref:
80
+ zip_ref.extractall(temp_dir)
81
+
82
+ # Load all docx from extracted zip
83
+ loader = DirectoryLoader(
84
+ path=temp_dir,
85
+ glob="**/*.docx",
86
+ use_multithreading=True,
87
+ )
88
+ docs = loader.load()
89
+ all_documents.extend(docs)
90
+
91
+ elif ext == ".docx":
92
+ # Load single docx directly
93
+ loader = UnstructuredFileLoader(file_path)
94
+ docs = loader.load()
95
+ all_documents.extend(docs)
96
+
97
+ return all_documents
98
+
99
+
100
+ def get_last_user_message(chatbot: List[Union[gr.ChatMessage, dict]]) -> Optional[str]:
101
+ """Get last user prompt."""
102
+ for message in reversed(chatbot):
103
+ content = (
104
+ message.get("content") if isinstance(message, dict) else message.content
105
+ )
106
+ if (
107
+ message.get("role") if isinstance(message, dict) else message.role
108
+ ) == "user":
109
+ return content
110
+ return None
111
+
112
+
113
+ # ========== Main Logic ==========
114
+
115
+
116
+
117
+
118
+ def upload_files(
119
+ files: Optional[List[str]], chatbot: List[Union[gr.ChatMessage, dict]]
120
+ ):
121
+ """Handle file upload - .docx or .zip containing docx."""
122
+ if not files:
123
+ return chatbot
124
+
125
+ file_summaries = [] # <-- Collect formatted file/folder info
126
+ documents = []
127
+
128
+ with tempfile.TemporaryDirectory() as temp_dir:
129
+ for file_path in files:
130
+ filename = os.path.basename(file_path)
131
+ ext = os.path.splitext(file_path)[1].lower()
132
+
133
+ if ext == ".zip":
134
+ file_summaries.append(f"πŸ“¦ **{filename}** (ZIP file) contains:")
135
+ try:
136
+ with zipfile.ZipFile(file_path, "r") as zip_ref:
137
+ zip_ref.extractall(temp_dir)
138
+ zip_contents = zip_ref.namelist()
139
+
140
+ # Group files by folder
141
+ folder_map = collections.defaultdict(list)
142
+ for item in zip_contents:
143
+ if item.endswith("/"):
144
+ continue # skip folder entries themselves
145
+ folder = os.path.dirname(item)
146
+ file_name = os.path.basename(item)
147
+ folder_map[folder].append(file_name)
148
+
149
+ # Format nicely
150
+ for folder, files_in_folder in folder_map.items():
151
+ if folder:
152
+ file_summaries.append(f"πŸ“‚ {folder}/")
153
+ else:
154
+ file_summaries.append(f"πŸ“„ (root)")
155
+ for f in files_in_folder:
156
+ file_summaries.append(f" - {f}")
157
+
158
+ # Load docx files extracted from ZIP
159
+ loader = DirectoryLoader(
160
+ path=temp_dir,
161
+ glob="**/*.docx",
162
+ use_multithreading=True,
163
+ )
164
+ docs = loader.load()
165
+ documents.extend(docs)
166
+
167
+ except zipfile.BadZipFile:
168
+ chatbot.append(
169
+ gr.ChatMessage(
170
+ role="assistant",
171
+ content=f"❌ Failed to open ZIP file: {filename}",
172
+ )
173
+ )
174
+
175
+ elif ext == ".docx":
176
+ file_summaries.append(f"πŸ“„ **{filename}**")
177
+ loader = UnstructuredFileLoader(file_path)
178
+ docs = loader.load()
179
+ documents.extend(docs)
180
+
181
+ else:
182
+ file_summaries.append(f"❌ Unsupported file type: {filename}")
183
+
184
+ if not documents:
185
+ chatbot.append(
186
+ gr.ChatMessage(
187
+ role="assistant", content="No valid .docx files found in upload."
188
+ )
189
+ )
190
+ return chatbot
191
+
192
+ # Split documents
193
+ chunks = text_splitter.split_documents(documents)
194
+ if not chunks:
195
+ chatbot.append(
196
+ gr.ChatMessage(
197
+ role="assistant", content="Failed to split documents into chunks."
198
+ )
199
+ )
200
+ return chatbot
201
+
202
+ # Create Vectorstore
203
+ state.vectorstore = InMemoryVectorStore.from_documents(
204
+ documents=chunks,
205
+ embedding=embed_model,
206
+ )
207
+ retriever = state.vectorstore.as_retriever()
208
+
209
+ # Build RAG Chain
210
+ state.rag_chain = (
211
+ {"context": retriever, "question": RunnablePassthrough()}
212
+ | rag_prompt
213
+ | llm
214
+ | StrOutputParser()
215
+ )
216
+
217
+ # Final display
218
+ chatbot.append(
219
+ gr.ChatMessage(
220
+ role="assistant",
221
+ content="**Uploaded Files:**\n"
222
+ + "\n".join(file_summaries)
223
+ + "\n\nβœ… Ready to chat!",
224
+ )
225
+ )
226
+ return chatbot
227
+
228
+
229
+ def user_message(
230
+ text_prompt: str, chatbot: List[Union[gr.ChatMessage, dict]]
231
+ ) -> Tuple[str, List[Union[gr.ChatMessage, dict]]]:
232
+ """Add user's text input to conversation."""
233
+ if text_prompt.strip():
234
+ chatbot.append(gr.ChatMessage(role="user", content=text_prompt))
235
+ return "", chatbot
236
+
237
+
238
+ def process_query(
239
+ chatbot: List[Union[gr.ChatMessage, dict]],
240
+ ) -> List[Union[gr.ChatMessage, dict]]:
241
+ """Process user's query through RAG pipeline."""
242
+ prompt = get_last_user_message(chatbot)
243
+ if not prompt:
244
+ chatbot.append(
245
+ gr.ChatMessage(role="assistant", content="Please type a question first.")
246
+ )
247
+ return chatbot
248
+
249
+ if state.rag_chain is None:
250
+ chatbot.append(
251
+ gr.ChatMessage(role="assistant", content="Please upload documents first.")
252
+ )
253
+ return chatbot
254
+
255
+ chatbot.append(gr.ChatMessage(role="assistant", content="Thinking..."))
256
+
257
+ try:
258
+ response = state.rag_chain.invoke(prompt)
259
+ chatbot[-1].content = response
260
+ except Exception as e:
261
+ chatbot[-1].content = f"Error: {str(e)}"
262
+
263
+ return chatbot
264
+
265
+
266
+ def reset_app(
267
+ chatbot: List[Union[gr.ChatMessage, dict]],
268
+ ) -> List[Union[gr.ChatMessage, dict]]:
269
+ """Reset application state."""
270
+ state.vectorstore = None
271
+ state.rag_chain = None
272
+ return [
273
+ gr.ChatMessage(
274
+ role="assistant", content="App reset! Upload new documents to start."
275
+ )
276
+ ]
277
+
278
+
279
+ # ========== UI Layout ==========
280
+
281
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
282
+ gr.HTML(TITLE)
283
+ chatbot = gr.Chatbot(
284
+ label="Llama 4 RAG",
285
+ type="messages",
286
+ bubble_full_width=False,
287
+ avatar_images=AVATAR_IMAGES,
288
+ scale=2,
289
+ height=350,
290
+ )
291
+
292
+ with gr.Row(equal_height=True):
293
+ text_prompt = gr.Textbox(
294
+ placeholder="Ask a question...", show_label=False, autofocus=True, scale=28
295
+ )
296
+ send_button = gr.Button(
297
+ value="Send",
298
+ variant="primary",
299
+ scale=1,
300
+ min_width=80,
301
+ )
302
+ upload_button = gr.UploadButton(
303
+ label="Upload",
304
+ file_count="multiple",
305
+ file_types=TEXT_EXTENSIONS,
306
+ scale=1,
307
+ min_width=80,
308
+ )
309
+ reset_button = gr.Button(
310
+ value="Reset",
311
+ variant="stop",
312
+ scale=1,
313
+ min_width=80,
314
+ )
315
+
316
+ send_button.click(
317
+ fn=user_message,
318
+ inputs=[text_prompt, chatbot],
319
+ outputs=[text_prompt, chatbot],
320
+ queue=False,
321
+ ).then(fn=process_query, inputs=[chatbot], outputs=[chatbot])
322
+
323
+ text_prompt.submit(
324
+ fn=user_message,
325
+ inputs=[text_prompt, chatbot],
326
+ outputs=[text_prompt, chatbot],
327
+ queue=False,
328
+ ).then(fn=process_query, inputs=[chatbot], outputs=[chatbot])
329
+
330
+ upload_button.upload(
331
+ fn=upload_files, inputs=[upload_button, chatbot], outputs=[chatbot], queue=False
332
+ )
333
+ reset_button.click(fn=reset_app, inputs=[chatbot], outputs=[chatbot], queue=False)
334
+
335
  demo.queue().launch(share=True)