prithivMLmods commited on
Commit
66c74a2
·
verified ·
1 Parent(s): 2acc319

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -30
app.py CHANGED
@@ -188,21 +188,25 @@ def generate_response(
188
  ):
189
  """Unified generation function for both image and video."""
190
  if media_file is None:
191
- yield "Please upload an image or video file first.", "Please upload an image or video file first."
192
  return
193
 
194
  processor, model = get_model_and_processor(model_name)
195
  if not processor or not model:
196
- yield "Invalid model selected.", "Invalid model selected."
197
  return
198
 
199
  media_type = "video" if is_video_file(media_file) else "image"
200
-
201
- if media_type == "video":
202
- frames = downsample_video(media_file)
203
- images = [frame for frame, _ in frames]
204
- else: # image
205
- images = [Image.open(media_file)]
 
 
 
 
206
 
207
  if model_name == "SmolDocling-256M-preview":
208
  if "OTSL" in query or "code" in query:
@@ -273,17 +277,24 @@ body, .gradio-container { font-family: 'Inter', sans-serif; }
273
 
274
  def handle_file_upload(file):
275
  if file is None:
276
- return None, gr.update(visible=False)
277
  if is_video_file(file.name):
278
- return gr.update(value=file.name, visible=False), gr.update(value=file.name, visible=True)
 
 
 
 
 
 
 
279
  else:
280
- return gr.update(value=file.name, visible=True), gr.update(value=file.name, visible=False)
 
281
 
282
  def clear_all():
283
- return None, None, None, ""
284
 
285
  with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
286
- # Hidden state to store the path to the uploaded file
287
  media_file_path = gr.State(None)
288
 
289
  with gr.Row(elem_classes="main-container"):
@@ -312,25 +323,29 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
312
  with gr.Column(scale=4):
313
  gr.Markdown("# Multimodal OCR")
314
 
315
- # --- Media Display Area ---
316
  with gr.Column(elem_classes="media-display"):
317
  image_display = gr.Image(type="filepath", label="Image Preview", visible=False)
318
  video_display = gr.Video(label="Video Preview", visible=False)
319
  gr.Markdown("Upload an image or video to begin.")
320
 
321
- # --- Examples ---
 
 
 
 
 
 
322
  gr.Examples(
323
  examples=all_examples,
324
- inputs=[media_file_path, "query_input"],
 
 
325
  label="Examples (Click to run)",
326
- fn=handle_file_upload, # Custom function to update media display
327
- outputs=[image_display, video_display]
328
  )
329
 
330
- # --- Chat/Output Window ---
331
  output_display = gr.Markdown(elem_classes="chat-window", value="### Output will be shown here")
332
 
333
- # --- Input Bar ---
334
  with gr.Row(elem_classes="input-bar", vertical=False):
335
  upload_btn = gr.UploadButton("📁 Add Files", file_types=["image", "video"])
336
  model_dropdown = gr.Dropdown(
@@ -338,22 +353,15 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
338
  label="Select Model",
339
  value="Nanonets-OCR-s"
340
  )
341
- query_input = gr.Textbox(
342
- placeholder="Enter your query here...",
343
- show_label=False,
344
- scale=4,
345
- )
346
  submit_btn = gr.Button("▶", elem_classes="submit-button")
347
 
348
  # --- Event Handlers ---
349
  upload_btn.upload(
350
  fn=handle_file_upload,
351
  inputs=[upload_btn],
352
- outputs=[image_display, video_display]
353
  )
354
-
355
- # When file is uploaded, also store its path in the state
356
- upload_btn.upload(lambda f: f.name if f else None, upload_btn, media_file_path)
357
 
358
  submit_btn.click(
359
  fn=generate_response,
@@ -363,7 +371,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
363
 
364
  add_conv_btn.click(
365
  fn=clear_all,
366
- outputs=[media_file_path, image_display, video_display, output_display]
367
  )
368
 
369
 
 
188
  ):
189
  """Unified generation function for both image and video."""
190
  if media_file is None:
191
+ yield "Please upload an image or video file first."
192
  return
193
 
194
  processor, model = get_model_and_processor(model_name)
195
  if not processor or not model:
196
+ yield "Invalid model selected."
197
  return
198
 
199
  media_type = "video" if is_video_file(media_file) else "image"
200
+
201
+ try:
202
+ if media_type == "video":
203
+ frames = downsample_video(media_file)
204
+ images = [frame for frame, _ in frames]
205
+ else: # image
206
+ images = [Image.open(media_file)]
207
+ except Exception as e:
208
+ yield f"Error processing file: {e}"
209
+ return
210
 
211
  if model_name == "SmolDocling-256M-preview":
212
  if "OTSL" in query or "code" in query:
 
277
 
278
  def handle_file_upload(file):
279
  if file is None:
280
+ return None, gr.update(visible=False), gr.update(visible=False)
281
  if is_video_file(file.name):
282
+ return file.name, gr.update(visible=False), gr.update(value=file.name, visible=True)
283
+ else:
284
+ return file.name, gr.update(value=file.name, visible=True), gr.update(visible=False)
285
+
286
+ def handle_example_click(file_path, query):
287
+ if is_video_file(file_path):
288
+ # Update state, hide image, show video, update query
289
+ return file_path, gr.update(visible=False), gr.update(value=file_path, visible=True), query
290
  else:
291
+ # Update state, show image, hide video, update query
292
+ return file_path, gr.update(value=file_path, visible=True), gr.update(visible=False), query
293
 
294
  def clear_all():
295
+ return None, None, None, "### Output will be shown here", ""
296
 
297
  with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
 
298
  media_file_path = gr.State(None)
299
 
300
  with gr.Row(elem_classes="main-container"):
 
323
  with gr.Column(scale=4):
324
  gr.Markdown("# Multimodal OCR")
325
 
 
326
  with gr.Column(elem_classes="media-display"):
327
  image_display = gr.Image(type="filepath", label="Image Preview", visible=False)
328
  video_display = gr.Video(label="Video Preview", visible=False)
329
  gr.Markdown("Upload an image or video to begin.")
330
 
331
+ # Define query_input here so gr.Examples can reference it
332
+ query_input = gr.Textbox(
333
+ placeholder="Enter your query here...",
334
+ show_label=False,
335
+ scale=4,
336
+ )
337
+
338
  gr.Examples(
339
  examples=all_examples,
340
+ inputs=[media_file_path, query_input], # Pass component objects
341
+ outputs=[media_file_path, image_display, video_display, query_input],
342
+ fn=handle_example_click,
343
  label="Examples (Click to run)",
344
+ cache_examples=True
 
345
  )
346
 
 
347
  output_display = gr.Markdown(elem_classes="chat-window", value="### Output will be shown here")
348
 
 
349
  with gr.Row(elem_classes="input-bar", vertical=False):
350
  upload_btn = gr.UploadButton("📁 Add Files", file_types=["image", "video"])
351
  model_dropdown = gr.Dropdown(
 
353
  label="Select Model",
354
  value="Nanonets-OCR-s"
355
  )
356
+ # The query_input is already defined above, but we place it here visually
 
 
 
 
357
  submit_btn = gr.Button("▶", elem_classes="submit-button")
358
 
359
  # --- Event Handlers ---
360
  upload_btn.upload(
361
  fn=handle_file_upload,
362
  inputs=[upload_btn],
363
+ outputs=[media_file_path, image_display, video_display]
364
  )
 
 
 
365
 
366
  submit_btn.click(
367
  fn=generate_response,
 
371
 
372
  add_conv_btn.click(
373
  fn=clear_all,
374
+ outputs=[media_file_path, image_display, video_display, output_display, query_input]
375
  )
376
 
377