prithivMLmods commited on
Commit
3a82189
·
verified ·
1 Parent(s): b560a0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -34
app.py CHANGED
@@ -7,6 +7,7 @@ import asyncio
7
  from threading import Thread
8
  from pathlib import Path
9
  from io import BytesIO
 
10
 
11
  import gradio as gr
12
  import spaces
@@ -91,6 +92,55 @@ def convert_pdf_to_images(file_path: str, dpi: int = 200):
91
  pdf_document.close()
92
  return images
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  @spaces.GPU
95
  def generate_image(text: str, image: Image.Image,
96
  max_new_tokens: int = 1024,
@@ -162,32 +212,24 @@ def generate_video(text: str, video_path: str,
162
  yield buffer, buffer
163
 
164
  @spaces.GPU
165
- def generate_pdf(text: str, pdf_path: str,
166
  max_new_tokens: int = 2048,
167
  temperature: float = 0.6,
168
  top_p: float = 0.9,
169
  top_k: int = 50,
170
  repetition_penalty: float = 1.2):
171
  """
172
- Processes a PDF file page by page and generates a combined textual output.
173
  """
174
- if not pdf_path:
175
- yield "Please upload a PDF file.", "Please upload a PDF file."
176
- return
177
-
178
- try:
179
- page_images = convert_pdf_to_images(pdf_path)
180
- if not page_images:
181
- yield "Could not extract pages from the PDF.", "Could not extract pages from the PDF."
182
- return
183
- except Exception as e:
184
- yield f"Error processing PDF: {e}", f"Error processing PDF: {e}"
185
  return
186
 
 
187
  full_response = ""
188
  for i, image in enumerate(page_images):
189
  page_header = f"--- Page {i+1}/{len(page_images)} ---\n"
190
- yield page_header, page_header
191
 
192
  messages = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": text}]}]
193
  prompt_full = processor_q3vl.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
@@ -205,24 +247,15 @@ def generate_pdf(text: str, pdf_path: str,
205
  yield full_response + page_header + page_buffer, full_response + page_header + page_buffer
206
  time.sleep(0.01)
207
 
208
- full_response += page_header + page_buffer + "\n"
209
 
210
  # --- Gradio Interface ---
211
  image_examples = [
212
  ["Describe the safety measures in the image. Conclude (Safe / Unsafe)..", "images/5.jpg"],
213
  ["Convert this page to doc [markdown] precisely.", "images/3.png"],
214
- ["Explain the creativity in the image.", "images/6.jpg"],
215
- ]
216
-
217
- video_examples = [
218
- ["Explain the video in detail.", "videos/2.mp4"],
219
- ["Explain the ad in detail.", "videos/1.mp4"]
220
  ]
221
-
222
- #pdf_examples = [
223
- # ["Summarize the key findings from this document.", "examples/sample-doc.pdf"],
224
- # ["Extract the main points from each section.", "examples/research-paper.pdf"],
225
- #]
226
 
227
  css = """
228
  .submit-btn { background-color: #2980b9 !important; color: white !important; }
@@ -231,9 +264,10 @@ css = """
231
  """
232
 
233
  with gr.Blocks(css=css) as demo:
 
234
  gr.Markdown("# **Qwen3-VL-Processor**")
235
  with gr.Row():
236
- with gr.Column():
237
  with gr.Tabs():
238
  with gr.TabItem("Image Inference"):
239
  image_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
@@ -248,10 +282,18 @@ with gr.Blocks(css=css) as demo:
248
  gr.Examples(examples=video_examples, inputs=[video_query, video_upload])
249
 
250
  with gr.TabItem("PDF Inference"):
251
- pdf_query = gr.Textbox(label="Query Input", placeholder="e.g., 'Summarize this document'")
252
- pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
253
- pdf_submit = gr.Button("Submit", elem_classes="submit-btn")
254
- #gr.Examples(examples=pdf_examples, inputs=[pdf_query, pdf_upload])
 
 
 
 
 
 
 
 
255
 
256
  with gr.Accordion("Advanced options", open=False):
257
  max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
@@ -260,10 +302,10 @@ with gr.Blocks(css=css) as demo:
260
  top_k = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50)
261
  repetition_penalty = gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2)
262
 
263
- with gr.Column():
264
  with gr.Column(elem_classes="canvas-output"):
265
  gr.Markdown("## Output")
266
- output = gr.Textbox(label="Raw Output Stream", interactive=False, lines=10, show_copy_button=True)
267
  with gr.Accordion("(Result.md)", open=False):
268
  markdown_output = gr.Markdown(label="(Result.Md)")
269
 
@@ -280,9 +322,24 @@ with gr.Blocks(css=css) as demo:
280
  )
281
  pdf_submit.click(
282
  fn=generate_pdf,
283
- inputs=[pdf_query, pdf_upload, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
284
  outputs=[output, markdown_output]
285
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
 
287
  if __name__ == "__main__":
288
  demo.queue(max_size=50).launch(mcp_server=True, ssr_mode=False, show_error=True)
 
7
  from threading import Thread
8
  from pathlib import Path
9
  from io import BytesIO
10
+ from typing import Optional, Tuple, Dict, Any
11
 
12
  import gradio as gr
13
  import spaces
 
92
  pdf_document.close()
93
  return images
94
 
95
+ def get_initial_pdf_state() -> Dict[str, Any]:
96
+ """Returns the default initial state for the PDF viewer."""
97
+ return {"pages": [], "total_pages": 0, "current_page_index": 0}
98
+
99
+ def load_and_preview_pdf(file_path: Optional[str]) -> Tuple[Optional[Image.Image], Dict[str, Any], str]:
100
+ """
101
+ Loads a PDF, converts pages to images, and prepares the state for preview.
102
+ """
103
+ state = get_initial_pdf_state()
104
+ if not file_path:
105
+ return None, state, '<div style="text-align:center;">No file loaded</div>'
106
+
107
+ try:
108
+ pages = convert_pdf_to_images(file_path)
109
+ if not pages:
110
+ return None, state, '<div style="text-align:center;">Could not load file</div>'
111
+
112
+ state["pages"] = pages
113
+ state["total_pages"] = len(pages)
114
+ page_info_html = f'<div style="text-align:center;">Page 1 / {state["total_pages"]}</div>'
115
+ return pages[0], state, page_info_html
116
+ except Exception as e:
117
+ return None, state, f'<div style="text-align:center;">Failed to load preview: {e}</div>'
118
+
119
+ def navigate_pdf_page(direction: str, state: Dict[str, Any]):
120
+ """
121
+ Navigates to the previous or next page in the PDF preview.
122
+ """
123
+ if not state or not state["pages"]:
124
+ return None, state, '<div style="text-align:center;">No file loaded</div>'
125
+
126
+ current_index = state["current_page_index"]
127
+ total_pages = state["total_pages"]
128
+
129
+ if direction == "prev":
130
+ new_index = max(0, current_index - 1)
131
+ elif direction == "next":
132
+ new_index = min(total_pages - 1, current_index + 1)
133
+ else:
134
+ new_index = current_index
135
+
136
+ state["current_page_index"] = new_index
137
+
138
+ image_preview = state["pages"][new_index]
139
+ page_info_html = f'<div style="text-align:center;">Page {new_index + 1} / {total_pages}</div>'
140
+
141
+ return image_preview, state, page_info_html
142
+
143
+
144
  @spaces.GPU
145
  def generate_image(text: str, image: Image.Image,
146
  max_new_tokens: int = 1024,
 
212
  yield buffer, buffer
213
 
214
  @spaces.GPU
215
+ def generate_pdf(text: str, state: Dict[str, Any],
216
  max_new_tokens: int = 2048,
217
  temperature: float = 0.6,
218
  top_p: float = 0.9,
219
  top_k: int = 50,
220
  repetition_penalty: float = 1.2):
221
  """
222
+ Processes a PDF file page by page using the pre-loaded images from the state.
223
  """
224
+ if not state or not state["pages"]:
225
+ yield "Please upload a PDF file first.", "Please upload a PDF file first."
 
 
 
 
 
 
 
 
 
226
  return
227
 
228
+ page_images = state["pages"]
229
  full_response = ""
230
  for i, image in enumerate(page_images):
231
  page_header = f"--- Page {i+1}/{len(page_images)} ---\n"
232
+ yield full_response + page_header, full_response + page_header
233
 
234
  messages = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": text}]}]
235
  prompt_full = processor_q3vl.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
247
  yield full_response + page_header + page_buffer, full_response + page_header + page_buffer
248
  time.sleep(0.01)
249
 
250
+ full_response += page_header + page_buffer + "\n\n"
251
 
252
  # --- Gradio Interface ---
253
  image_examples = [
254
  ["Describe the safety measures in the image. Conclude (Safe / Unsafe)..", "images/5.jpg"],
255
  ["Convert this page to doc [markdown] precisely.", "images/3.png"],
 
 
 
 
 
 
256
  ]
257
+ video_examples = [["Explain the video in detail.", "videos/2.mp4"]]
258
+ pdf_examples = [["examples/sample-doc.pdf"]]
 
 
 
259
 
260
  css = """
261
  .submit-btn { background-color: #2980b9 !important; color: white !important; }
 
264
  """
265
 
266
  with gr.Blocks(css=css) as demo:
267
+ pdf_state = gr.State(value=get_initial_pdf_state())
268
  gr.Markdown("# **Qwen3-VL-Processor**")
269
  with gr.Row():
270
+ with gr.Column(scale=2):
271
  with gr.Tabs():
272
  with gr.TabItem("Image Inference"):
273
  image_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
 
282
  gr.Examples(examples=video_examples, inputs=[video_query, video_upload])
283
 
284
  with gr.TabItem("PDF Inference"):
285
+ with gr.Row():
286
+ with gr.Column(scale=1):
287
+ pdf_query = gr.Textbox(label="Query Input", placeholder="e.g., 'Summarize this document'")
288
+ pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
289
+ gr.Examples(examples=pdf_examples, inputs=[pdf_upload])
290
+ pdf_submit = gr.Button("Submit", elem_classes="submit-btn")
291
+ with gr.Column(scale=1):
292
+ pdf_preview_img = gr.Image(label="PDF Preview", height=280)
293
+ with gr.Row():
294
+ prev_page_btn = gr.Button("◀ Previous")
295
+ page_info = gr.HTML('<div style="text-align:center;">No file loaded</div>')
296
+ next_page_btn = gr.Button("Next ▶")
297
 
298
  with gr.Accordion("Advanced options", open=False):
299
  max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
 
302
  top_k = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50)
303
  repetition_penalty = gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2)
304
 
305
+ with gr.Column(scale=3):
306
  with gr.Column(elem_classes="canvas-output"):
307
  gr.Markdown("## Output")
308
+ output = gr.Textbox(label="Raw Output Stream", interactive=False, lines=15, show_copy_button=True)
309
  with gr.Accordion("(Result.md)", open=False):
310
  markdown_output = gr.Markdown(label="(Result.Md)")
311
 
 
322
  )
323
  pdf_submit.click(
324
  fn=generate_pdf,
325
+ inputs=[pdf_query, pdf_state, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
326
  outputs=[output, markdown_output]
327
  )
328
+ pdf_upload.change(
329
+ fn=load_and_preview_pdf,
330
+ inputs=[pdf_upload],
331
+ outputs=[pdf_preview_img, pdf_state, page_info]
332
+ )
333
+ prev_page_btn.click(
334
+ fn=lambda s: navigate_pdf_page("prev", s),
335
+ inputs=[pdf_state],
336
+ outputs=[pdf_preview_img, pdf_state, page_info]
337
+ )
338
+ next_page_btn.click(
339
+ fn=lambda s: navigate_pdf_page("next", s),
340
+ inputs=[pdf_state],
341
+ outputs=[pdf_preview_img, pdf_state, page_info]
342
+ )
343
 
344
  if __name__ == "__main__":
345
  demo.queue(max_size=50).launch(mcp_server=True, ssr_mode=False, show_error=True)