Girinath11 commited on
Commit
bfe51e4
Β·
verified Β·
1 Parent(s): 44c1a2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +188 -124
app.py CHANGED
@@ -6,11 +6,11 @@ import docx
6
  from sentence_transformers import SentenceTransformer, util
7
  import faiss
8
  import numpy as np
9
- from transformers import AutoTokenizer, AutoModelForCausalLM, VisionEncoderDecoderModel, ViTImageProcessor
10
  import torch
11
  from datetime import datetime
12
  import fitz # PyMuPDF
13
- import io
14
 
15
  # Load models
16
  print("Loading models...")
@@ -25,12 +25,13 @@ llm_model = AutoModelForCausalLM.from_pretrained(
25
  )
26
 
27
  print("Loading image caption model...")
28
- # Better image captioning model
29
- image_processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
30
- caption_model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
31
- caption_model = caption_model.to("cuda" if torch.cuda.is_available() else "cpu")
 
32
 
33
- print("βœ… Models loaded!")
34
 
35
  # Storage
36
  documents = []
@@ -39,86 +40,91 @@ image_captions = []
39
  embeddings_index = None
40
 
41
  def generate_image_caption(image_path):
42
- """Generate better caption for image"""
43
  try:
44
  img = Image.open(image_path).convert('RGB')
45
 
46
- # Preprocess
47
- pixel_values = image_processor(images=img, return_tensors="pt").pixel_values
48
- pixel_values = pixel_values.to(caption_model.device)
49
-
50
- # Generate caption
51
- output_ids = caption_model.generate(pixel_values, max_length=30, num_beams=4)
52
- caption = image_processor.batch_decode(output_ids, skip_special_tokens=True)[0]
 
 
53
 
54
  return caption.strip()
55
  except Exception as e:
56
  print(f"Caption error: {e}")
57
- return "Image content"
58
 
59
  def extract_images_from_pdf(pdf_path):
60
  """Extract images from PDF"""
61
  extracted = []
62
  try:
63
  doc = fitz.open(pdf_path)
 
64
  for page_num in range(len(doc)):
65
  page = doc[page_num]
66
  images_list = page.get_images(full=True)
67
 
68
  for img_index, img in enumerate(images_list):
69
- xref = img[0]
70
- base_image = doc.extract_image(xref)
71
- image_bytes = base_image["image"]
72
-
73
- # Save
74
- img_path = f"/tmp/pdf_img_p{page_num+1}_{img_index}.png"
75
- with open(img_path, "wb") as f:
76
- f.write(image_bytes)
77
-
78
- # Check if valid image
79
  try:
 
 
 
 
 
 
 
 
 
 
80
  test_img = Image.open(img_path)
81
  width, height = test_img.size
82
- # Skip very small images (likely icons/logos)
83
- if width > 100 and height > 100:
 
84
  extracted.append({
85
  'path': img_path,
86
  'page': page_num + 1,
87
  'source': Path(pdf_path).name
88
  })
89
- except:
90
- pass
91
 
92
  doc.close()
93
  except Exception as e:
94
- print(f"PDF image error: {e}")
95
 
96
  return extracted
97
 
98
  def extract_pdf_text(pdf_path):
99
- """Extract text"""
100
  chunks = []
101
  with open(pdf_path, 'rb') as f:
102
  pdf = PyPDF2.PdfReader(f)
103
  for i, page in enumerate(pdf.pages):
104
  text = page.extract_text()
105
  if text.strip():
106
- chunks.append({'text': text, 'page': i+1, 'source': Path(pdf_path).name})
 
 
 
 
107
  return chunks
108
 
109
  def extract_docx_text(docx_path):
110
- """Extract from DOCX"""
111
  doc = docx.Document(docx_path)
112
  text = '\n'.join([p.text for p in doc.paragraphs if p.text.strip()])
113
  return [{'text': text, 'source': Path(docx_path).name}]
114
 
115
  def extract_txt_text(txt_path):
116
- """Extract from TXT"""
117
  with open(txt_path, 'r', encoding='utf-8') as f:
118
  return [{'text': f.read(), 'source': Path(txt_path).name}]
119
 
120
  def chunk_text(text, size=400):
121
- """Chunk text"""
122
  words = text.split()
123
  chunks = []
124
  for i in range(0, len(words), size):
@@ -128,11 +134,11 @@ def chunk_text(text, size=400):
128
  return chunks
129
 
130
  def process_files(files, progress=gr.Progress()):
131
- """Process files"""
132
  global documents, images, image_captions, embeddings_index
133
 
134
  if not files:
135
- return "⚠️ Upload files first"
136
 
137
  documents = []
138
  images = []
@@ -145,7 +151,7 @@ def process_files(files, progress=gr.Progress()):
145
  ext = Path(file.name).suffix.lower()
146
 
147
  if ext == '.pdf':
148
- # Text
149
  chunks = extract_pdf_text(file.name)
150
  for chunk in chunks:
151
  for small_chunk in chunk_text(chunk['text']):
@@ -155,33 +161,41 @@ def process_files(files, progress=gr.Progress()):
155
  'page': chunk['page']
156
  })
157
 
158
- # Images
159
- pdf_imgs = extract_images_from_pdf(file.name)
160
- for img in pdf_imgs:
161
- images.append(img)
162
  caption = generate_image_caption(img['path'])
163
- image_captions.append(caption)
 
 
164
 
165
  elif ext == '.docx':
166
  chunks = extract_docx_text(file.name)
167
  for chunk in chunks:
168
  for small_chunk in chunk_text(chunk['text']):
169
- documents.append({'text': small_chunk, 'source': chunk['source']})
 
 
 
170
 
171
  elif ext == '.txt':
172
  chunks = extract_txt_text(file.name)
173
  for chunk in chunks:
174
  for small_chunk in chunk_text(chunk['text']):
175
- documents.append({'text': small_chunk, 'source': chunk['source']})
 
 
 
176
 
177
  elif ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp']:
178
- images.append({
179
- 'path': file.name,
180
- 'source': Path(file.name).name,
181
- 'page': ''
182
- })
183
  caption = generate_image_caption(file.name)
184
- image_captions.append(caption)
 
 
 
 
 
 
185
 
186
  # Create embeddings
187
  progress(0.9, desc="Creating embeddings...")
@@ -193,24 +207,24 @@ def process_files(files, progress=gr.Progress()):
193
  index.add(embeddings.astype('float32'))
194
  embeddings_index = index
195
 
196
- progress(1.0, desc="βœ… Done!")
197
 
198
- status = f"βœ… **Processed:**\n"
199
- status += f"πŸ“„ Text chunks: {len(documents)}\n"
200
- status += f"πŸ–ΌοΈ Images found: {len(images)}"
201
 
202
  if images:
203
- status += "\n\n**Images with captions:**\n"
204
- for i, (img, cap) in enumerate(zip(images[:5], image_captions[:5]), 1):
205
  status += f"{i}. {img['source']}"
206
  if img.get('page'):
207
  status += f" (Page {img['page']})"
208
- status += f": {cap}\n"
209
 
210
  return status
211
 
212
  def search_documents(query, k=3):
213
- """Search documents"""
214
  if not documents or embeddings_index is None:
215
  return []
216
 
@@ -223,8 +237,8 @@ def search_documents(query, k=3):
223
  results.append(documents[idx])
224
  return results
225
 
226
- def find_relevant_images(query, top_k=2):
227
- """Find relevant images based on query"""
228
  if not images or not image_captions:
229
  return [], []
230
 
@@ -232,44 +246,56 @@ def find_relevant_images(query, top_k=2):
232
  query_emb = embedding_model.encode(query, convert_to_tensor=True)
233
  caption_embs = embedding_model.encode(image_captions, convert_to_tensor=True)
234
 
235
- # Calculate similarity
236
  similarities = util.cos_sim(query_emb, caption_embs)[0]
237
 
238
- # Get top k
239
- top_indices = torch.topk(similarities, k=min(top_k, len(images))).indices.tolist()
240
-
241
  relevant_imgs = []
242
  explanations = []
243
 
244
- for idx in top_indices:
245
- img_info = images[idx]
246
- caption = image_captions[idx]
247
 
248
- relevant_imgs.append(img_info['path'])
249
-
250
- exp = f"πŸ“„ **{img_info['source']}"
251
- if img_info.get('page'):
252
- exp += f" (Page {img_info['page']})"
253
- exp += f"**\nπŸ’¬ {caption}"
254
-
255
- # Calculate relevance
256
- relevance = float(similarities[idx]) * 100
257
- exp += f"\n🎯 Relevance: {relevance:.1f}%"
 
 
 
 
 
 
 
 
 
 
 
 
 
258
 
259
- explanations.append(exp)
 
260
 
261
  return relevant_imgs, explanations
262
 
263
  def generate_answer(question, context_docs):
264
- """Generate answer"""
265
  context = '\n\n'.join([doc['text'] for doc in context_docs])
266
 
267
- prompt = f"""Answer the question based on this context. Be concise and accurate.
268
 
269
  Context:
270
  {context}
271
 
272
  Question: {question}
 
273
  Answer:"""
274
 
275
  inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1200)
@@ -284,53 +310,69 @@ Answer:"""
284
  )
285
 
286
  answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
287
- answer = answer.split("Answer:")[-1].strip()
 
 
 
288
 
289
  return answer
290
 
291
  def answer_query(question, progress=gr.Progress()):
292
- """Answer with images"""
293
- if not question:
294
- return "⚠️ Enter a question", None, ""
295
 
296
  if not documents:
297
- return "⚠️ Upload documents first", None, ""
298
 
299
- progress(0.2, desc="Searching...")
 
300
  relevant_docs = search_documents(question, k=3)
301
 
302
  if not relevant_docs:
303
- return "❌ No relevant info found", None, ""
304
 
305
- progress(0.5, desc="Generating answer...")
 
306
  answer = generate_answer(question, relevant_docs)
307
 
308
- response = f"## πŸ’‘ Answer:\n{answer}\n\n## πŸ“š Sources:\n"
 
 
 
309
  for i, doc in enumerate(relevant_docs, 1):
310
  source = doc['source']
311
  page = doc.get('page', '')
312
  if page:
313
- response += f"{i}. {source} (Page {page})\n"
314
  else:
315
- response += f"{i}. {source}\n"
316
 
317
- progress(0.8, desc="Finding images...")
318
- relevant_imgs, img_exps = find_relevant_images(question, top_k=2)
 
319
 
320
- if img_exps:
321
- response += f"\n## πŸ–ΌοΈ Related Images:\n"
322
- for exp in img_exps:
323
- response += f"\n{exp}\n"
 
 
 
324
 
325
- progress(1.0, desc="βœ… Done!")
326
 
327
- return response, relevant_imgs if relevant_imgs else None, ""
328
 
329
  # UI
330
- with gr.Blocks(title="DocVision AI", theme=gr.themes.Soft(primary_hue="blue")) as app:
 
 
 
 
331
  gr.Markdown("""
332
- # πŸ“š DocVision AI - Smart Document Q&A
333
- ### Upload documents and ask questions with relevant image detection
334
  """)
335
 
336
  with gr.Row():
@@ -340,41 +382,63 @@ with gr.Blocks(title="DocVision AI", theme=gr.themes.Soft(primary_hue="blue")) a
340
  file_count="multiple",
341
  file_types=[".pdf", ".docx", ".txt", ".jpg", ".png"]
342
  )
343
- process_btn = gr.Button("⚑ Process", variant="primary", size="lg")
344
- status = gr.Markdown(label="Status")
 
 
 
 
345
 
346
  with gr.Column():
347
  question = gr.Textbox(
348
- label="❓ Your Question",
349
- placeholder="Ask anything about your documents...",
350
  lines=3
351
  )
352
- ask_btn = gr.Button("πŸ” Get Answer", variant="primary", size="lg")
 
 
 
 
353
 
354
- answer = gr.Markdown(label="πŸ“ Answer, Sources & Related Images")
355
 
356
- with gr.Row():
357
- gallery = gr.Gallery(
358
- label="πŸ–ΌοΈ Relevant Images",
359
- columns=2,
360
- height=400
361
- )
362
 
 
363
  gr.Examples(
364
  examples=[
365
- ["What is this document about?"],
366
- ["Summarize the key points"],
367
- ["Explain the diagrams or charts shown"],
368
- ["What are the main findings?"]
369
  ],
370
  inputs=question
371
  )
372
 
373
- debug = gr.Textbox(visible=False)
 
 
 
 
 
374
 
375
- process_btn.click(process_files, inputs=[file_input], outputs=[status])
376
- ask_btn.click(answer_query, inputs=[question], outputs=[answer, gallery, debug])
377
- question.submit(answer_query, inputs=[question], outputs=[answer, gallery, debug])
 
 
 
 
 
 
 
 
378
 
379
  if __name__ == "__main__":
380
  app.launch()
 
6
  from sentence_transformers import SentenceTransformer, util
7
  import faiss
8
  import numpy as np
9
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BlipProcessor, BlipForConditionalGeneration
10
  import torch
11
  from datetime import datetime
12
  import fitz # PyMuPDF
13
+ import shutil
14
 
15
  # Load models
16
  print("Loading models...")
 
25
  )
26
 
27
  print("Loading image caption model...")
28
+ caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
29
+ caption_model = BlipForConditionalGeneration.from_pretrained(
30
+ "Salesforce/blip-image-captioning-large",
31
+ torch_dtype=torch.float16
32
+ ).to("cuda" if torch.cuda.is_available() else "cpu")
33
 
34
+ print("βœ… All models loaded!")
35
 
36
  # Storage
37
  documents = []
 
40
  embeddings_index = None
41
 
42
  def generate_image_caption(image_path):
43
+ """Generate detailed caption for image"""
44
  try:
45
  img = Image.open(image_path).convert('RGB')
46
 
47
+ # Generate detailed caption
48
+ inputs = caption_processor(img, return_tensors="pt").to(caption_model.device)
49
+ output = caption_model.generate(
50
+ **inputs,
51
+ max_length=100,
52
+ num_beams=5,
53
+ temperature=0.7
54
+ )
55
+ caption = caption_processor.decode(output[0], skip_special_tokens=True)
56
 
57
  return caption.strip()
58
  except Exception as e:
59
  print(f"Caption error: {e}")
60
+ return ""
61
 
62
  def extract_images_from_pdf(pdf_path):
63
  """Extract images from PDF"""
64
  extracted = []
65
  try:
66
  doc = fitz.open(pdf_path)
67
+
68
  for page_num in range(len(doc)):
69
  page = doc[page_num]
70
  images_list = page.get_images(full=True)
71
 
72
  for img_index, img in enumerate(images_list):
 
 
 
 
 
 
 
 
 
 
73
  try:
74
+ xref = img[0]
75
+ base_image = doc.extract_image(xref)
76
+ image_bytes = base_image["image"]
77
+
78
+ # Save image
79
+ img_path = f"/tmp/pdf_page{page_num+1}_img{img_index}.png"
80
+ with open(img_path, "wb") as f:
81
+ f.write(image_bytes)
82
+
83
+ # Validate image
84
  test_img = Image.open(img_path)
85
  width, height = test_img.size
86
+
87
+ # Only keep meaningful images (not tiny icons/logos)
88
+ if width >= 150 and height >= 150:
89
  extracted.append({
90
  'path': img_path,
91
  'page': page_num + 1,
92
  'source': Path(pdf_path).name
93
  })
94
+ except Exception as e:
95
+ continue
96
 
97
  doc.close()
98
  except Exception as e:
99
+ print(f"PDF image extraction error: {e}")
100
 
101
  return extracted
102
 
103
  def extract_pdf_text(pdf_path):
104
+ """Extract text from PDF"""
105
  chunks = []
106
  with open(pdf_path, 'rb') as f:
107
  pdf = PyPDF2.PdfReader(f)
108
  for i, page in enumerate(pdf.pages):
109
  text = page.extract_text()
110
  if text.strip():
111
+ chunks.append({
112
+ 'text': text,
113
+ 'page': i + 1,
114
+ 'source': Path(pdf_path).name
115
+ })
116
  return chunks
117
 
118
  def extract_docx_text(docx_path):
 
119
  doc = docx.Document(docx_path)
120
  text = '\n'.join([p.text for p in doc.paragraphs if p.text.strip()])
121
  return [{'text': text, 'source': Path(docx_path).name}]
122
 
123
  def extract_txt_text(txt_path):
 
124
  with open(txt_path, 'r', encoding='utf-8') as f:
125
  return [{'text': f.read(), 'source': Path(txt_path).name}]
126
 
127
  def chunk_text(text, size=400):
 
128
  words = text.split()
129
  chunks = []
130
  for i in range(0, len(words), size):
 
134
  return chunks
135
 
136
  def process_files(files, progress=gr.Progress()):
137
+ """Process uploaded files"""
138
  global documents, images, image_captions, embeddings_index
139
 
140
  if not files:
141
+ return "⚠️ Please upload files first"
142
 
143
  documents = []
144
  images = []
 
151
  ext = Path(file.name).suffix.lower()
152
 
153
  if ext == '.pdf':
154
+ # Extract text
155
  chunks = extract_pdf_text(file.name)
156
  for chunk in chunks:
157
  for small_chunk in chunk_text(chunk['text']):
 
161
  'page': chunk['page']
162
  })
163
 
164
+ # Extract images
165
+ pdf_images = extract_images_from_pdf(file.name)
166
+ for img in pdf_images:
 
167
  caption = generate_image_caption(img['path'])
168
+ if caption: # Only add if caption generated
169
+ images.append(img)
170
+ image_captions.append(caption)
171
 
172
  elif ext == '.docx':
173
  chunks = extract_docx_text(file.name)
174
  for chunk in chunks:
175
  for small_chunk in chunk_text(chunk['text']):
176
+ documents.append({
177
+ 'text': small_chunk,
178
+ 'source': chunk['source']
179
+ })
180
 
181
  elif ext == '.txt':
182
  chunks = extract_txt_text(file.name)
183
  for chunk in chunks:
184
  for small_chunk in chunk_text(chunk['text']):
185
+ documents.append({
186
+ 'text': small_chunk,
187
+ 'source': chunk['source']
188
+ })
189
 
190
  elif ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp']:
 
 
 
 
 
191
  caption = generate_image_caption(file.name)
192
+ if caption:
193
+ images.append({
194
+ 'path': file.name,
195
+ 'source': Path(file.name).name,
196
+ 'page': ''
197
+ })
198
+ image_captions.append(caption)
199
 
200
  # Create embeddings
201
  progress(0.9, desc="Creating embeddings...")
 
207
  index.add(embeddings.astype('float32'))
208
  embeddings_index = index
209
 
210
+ progress(1.0, desc="Done!")
211
 
212
+ status = f"βœ… **Processing Complete!**\n\n"
213
+ status += f"πŸ“„ **Text chunks:** {len(documents)}\n"
214
+ status += f"πŸ–ΌοΈ **Images extracted:** {len(images)}\n"
215
 
216
  if images:
217
+ status += f"\n**Sample captions:**\n"
218
+ for i, (img, cap) in enumerate(zip(images[:3], image_captions[:3]), 1):
219
  status += f"{i}. {img['source']}"
220
  if img.get('page'):
221
  status += f" (Page {img['page']})"
222
+ status += f":\n _{cap}_\n"
223
 
224
  return status
225
 
226
  def search_documents(query, k=3):
227
+ """Search relevant documents"""
228
  if not documents or embeddings_index is None:
229
  return []
230
 
 
237
  results.append(documents[idx])
238
  return results
239
 
240
+ def find_relevant_images(query, relevance_threshold=0.25):
241
+ """Find images ONLY if relevant to query"""
242
  if not images or not image_captions:
243
  return [], []
244
 
 
246
  query_emb = embedding_model.encode(query, convert_to_tensor=True)
247
  caption_embs = embedding_model.encode(image_captions, convert_to_tensor=True)
248
 
249
+ # Calculate cosine similarity
250
  similarities = util.cos_sim(query_emb, caption_embs)[0]
251
 
252
+ # Filter by threshold and get top 3
 
 
253
  relevant_imgs = []
254
  explanations = []
255
 
256
+ for idx, sim_score in enumerate(similarities):
257
+ sim_value = float(sim_score)
 
258
 
259
+ # Only show if relevance > threshold
260
+ if sim_value > relevance_threshold:
261
+ img_info = images[idx]
262
+ caption = image_captions[idx]
263
+
264
+ relevant_imgs.append(img_info['path'])
265
+
266
+ # Create explanation
267
+ exp = f"**πŸ“„ Source:** {img_info['source']}"
268
+ if img_info.get('page'):
269
+ exp += f" (Page {img_info['page']})"
270
+ exp += f"\n**πŸ’¬ Description:** {caption}"
271
+ exp += f"\n**🎯 Relevance:** {sim_value * 100:.1f}%\n"
272
+
273
+ explanations.append(exp)
274
+
275
+ # Sort by relevance and take top 3
276
+ if relevant_imgs:
277
+ sorted_pairs = sorted(
278
+ zip(similarities, relevant_imgs, explanations),
279
+ key=lambda x: x[0],
280
+ reverse=True
281
+ )[:3]
282
 
283
+ relevant_imgs = [pair[1] for pair in sorted_pairs]
284
+ explanations = [pair[2] for pair in sorted_pairs]
285
 
286
  return relevant_imgs, explanations
287
 
288
  def generate_answer(question, context_docs):
289
+ """Generate answer from context"""
290
  context = '\n\n'.join([doc['text'] for doc in context_docs])
291
 
292
+ prompt = f"""Answer this question based only on the context provided. Be concise and accurate.
293
 
294
  Context:
295
  {context}
296
 
297
  Question: {question}
298
+
299
  Answer:"""
300
 
301
  inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1200)
 
310
  )
311
 
312
  answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
313
+
314
+ # Extract answer part
315
+ if "Answer:" in answer:
316
+ answer = answer.split("Answer:")[-1].strip()
317
 
318
  return answer
319
 
320
  def answer_query(question, progress=gr.Progress()):
321
+ """Answer question with relevant images only"""
322
+ if not question.strip():
323
+ return "⚠️ Please enter a question", None
324
 
325
  if not documents:
326
+ return "⚠️ Please upload and process documents first", None
327
 
328
+ # Search documents
329
+ progress(0.3, desc="Searching documents...")
330
  relevant_docs = search_documents(question, k=3)
331
 
332
  if not relevant_docs:
333
+ return "❌ No relevant information found", None
334
 
335
+ # Generate answer
336
+ progress(0.6, desc="Generating answer...")
337
  answer = generate_answer(question, relevant_docs)
338
 
339
+ # Format response
340
+ response = f"## πŸ’‘ Answer\n\n{answer}\n\n"
341
+ response += f"## πŸ“š Text Sources\n\n"
342
+
343
  for i, doc in enumerate(relevant_docs, 1):
344
  source = doc['source']
345
  page = doc.get('page', '')
346
  if page:
347
+ response += f"{i}. **{source}** (Page {page})\n"
348
  else:
349
+ response += f"{i}. **{source}**\n"
350
 
351
+ # Find relevant images
352
+ progress(0.9, desc="Finding relevant images...")
353
+ relevant_imgs, img_explanations = find_relevant_images(question, relevance_threshold=0.25)
354
 
355
+ # Add image explanations if found
356
+ if relevant_imgs and img_explanations:
357
+ response += f"\n## πŸ–ΌοΈ Related Images\n\n"
358
+ for exp in img_explanations:
359
+ response += f"{exp}\n"
360
+ else:
361
+ response += f"\n_No relevant images found for this query_\n"
362
 
363
+ progress(1.0, desc="Done!")
364
 
365
+ return response, relevant_imgs if relevant_imgs else None
366
 
367
  # UI
368
+ with gr.Blocks(
369
+ title="DocVision AI",
370
+ theme=gr.themes.Soft(primary_hue="indigo")
371
+ ) as app:
372
+
373
  gr.Markdown("""
374
+ # πŸ“š DocVision AI - Intelligent Document Q&A
375
+ ### Upload documents and get AI-powered answers with relevant images
376
  """)
377
 
378
  with gr.Row():
 
382
  file_count="multiple",
383
  file_types=[".pdf", ".docx", ".txt", ".jpg", ".png"]
384
  )
385
+ process_btn = gr.Button(
386
+ "⚑ Process Documents",
387
+ variant="primary",
388
+ size="lg"
389
+ )
390
+ status = gr.Markdown(label="πŸ“Š Processing Status")
391
 
392
  with gr.Column():
393
  question = gr.Textbox(
394
+ label="❓ Ask Your Question",
395
+ placeholder="What would you like to know about your documents?",
396
  lines=3
397
  )
398
+ ask_btn = gr.Button(
399
+ "πŸ” Get Answer",
400
+ variant="primary",
401
+ size="lg"
402
+ )
403
 
404
+ answer = gr.Markdown(label="πŸ“ Answer with Sources")
405
 
406
+ gallery = gr.Gallery(
407
+ label="πŸ–ΌοΈ Relevant Images (Only shown if related to your question)",
408
+ columns=2,
409
+ height=500,
410
+ show_label=True
411
+ )
412
 
413
+ gr.Markdown("### πŸ’‘ Example Questions")
414
  gr.Examples(
415
  examples=[
416
+ ["What is the main topic of this document?"],
417
+ ["Explain the workflow or architecture shown"],
418
+ ["What are the key findings?"],
419
+ ["Describe any diagrams or charts present"]
420
  ],
421
  inputs=question
422
  )
423
 
424
+ # Event handlers
425
+ process_btn.click(
426
+ process_files,
427
+ inputs=[file_input],
428
+ outputs=[status]
429
+ )
430
 
431
+ ask_btn.click(
432
+ answer_query,
433
+ inputs=[question],
434
+ outputs=[answer, gallery]
435
+ )
436
+
437
+ question.submit(
438
+ answer_query,
439
+ inputs=[question],
440
+ outputs=[answer, gallery]
441
+ )
442
 
443
  if __name__ == "__main__":
444
  app.launch()