prithivMLmods commited on
Commit
d1e8913
·
verified ·
1 Parent(s): d3a73a2

update app

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -349,11 +349,11 @@ css="""
349
  """
350
 
351
  with gr.Blocks() as demo:
352
- gr.Markdown("# **Molmo2 HF Demo🖥️**", elem_id="main-title")
353
- gr.Markdown("Perform multi-image QA, pointing, general video QA, and tracking using the [Molmo2](https://huggingface.co/allenai/Molmo2-8B) multimodal model.")
354
 
355
  with gr.Tabs():
356
- with gr.Tab("Images (QA & Pointing)"):
357
  with gr.Row():
358
  with gr.Column():
359
  img_input = gr.Gallery(label="Input Images", type="filepath", height=400)
@@ -367,9 +367,9 @@ with gr.Blocks() as demo:
367
  gr.Examples(
368
  examples=[
369
  [["example-images/compare1.jpg", "example-images/compare2.jpeg"], "Compare these two images."],
370
- [["example-images/cat1.jpg", "example-images/cat2.jpg", "example-images/dog1.jpg"], "Point to the cats."],
371
- [["example-images/candy.JPG"], "Point to all the candies."],
372
- [["example-images/premium_photo-1691752881339-d78da354ee7e.jpg"], "Point to the girls."],
373
  ],
374
  inputs=[img_input, img_prompt],
375
  label="Image Examples"
@@ -380,7 +380,7 @@ with gr.Blocks() as demo:
380
  outputs=[img_text_out, img_out]
381
  )
382
 
383
- with gr.Tab("Video (QA, Pointing & Tracking)"):
384
  gr.Markdown("**Note:** Video processing takes longer as frames are sampled.")
385
  with gr.Row():
386
  with gr.Column():
@@ -394,7 +394,7 @@ with gr.Blocks() as demo:
394
 
395
  gr.Examples(
396
  examples=[
397
- ["example-videos/sample_video.mp4", "Track the football."],
398
  ["example-videos/drink.mp4", "Explain the video."],
399
  ],
400
  inputs=[vid_input, vid_prompt],
 
349
  """
350
 
351
  with gr.Blocks() as demo:
352
+ gr.Markdown("# **Qwen3-VL-4B-Abliterated🔥**", elem_id="main-title")
353
+ gr.Markdown("Perform multi-image QA, general video QA, and Analysis using the [Qwen3-VL-4B-Instruct-abliterated-v1](https://huggingface.co/prithivMLmods/Qwen3-VL-4B-Instruct-abliterated-v1) multimodal model.")
354
 
355
  with gr.Tabs():
356
+ with gr.Tab("Multi-Image QA"):
357
  with gr.Row():
358
  with gr.Column():
359
  img_input = gr.Gallery(label="Input Images", type="filepath", height=400)
 
367
  gr.Examples(
368
  examples=[
369
  [["example-images/compare1.jpg", "example-images/compare2.jpeg"], "Compare these two images."],
370
+ [["example-images/cat1.jpg", "example-images/cat2.jpg", "example-images/dog1.jpg"], "Odd one out."],
371
+ [["example-images/candy.JPG"], "Mention the coordinates of the candies."],
372
+ [["example-images/premium_photo-1691752881339-d78da354ee7e.jpg"], "Analyze the image in detail."],
373
  ],
374
  inputs=[img_input, img_prompt],
375
  label="Image Examples"
 
380
  outputs=[img_text_out, img_out]
381
  )
382
 
383
+ with gr.Tab("Video QA"):
384
  gr.Markdown("**Note:** Video processing takes longer as frames are sampled.")
385
  with gr.Row():
386
  with gr.Column():
 
394
 
395
  gr.Examples(
396
  examples=[
397
+ ["example-videos/sample_video.mp4", "Analyze the football movements."],
398
  ["example-videos/drink.mp4", "Explain the video."],
399
  ],
400
  inputs=[vid_input, vid_prompt],