Spaces:

prithivMLmods
/

Multimodal-VLM-Thinking

Running on Zero

App Files Files Community

prithivMLmods commited on Oct 11

Commit

63030e5

verified ·

1 Parent(s): 9a4a949

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -16

app.py CHANGED Viewed

@@ -255,21 +255,23 @@ def generate_detection_and_pointing(image: Image.Image, prompt: str, task_type:
 # --- Gradio UI ---
-image_examples = [
-    ["Describe the safety measures in the image. Conclude (Safe / Unsafe)..", "https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/images/5.jpg"],
-    ["Convert this page to doc [markdown] precisely.", "https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/images/3.png"],
-    ["Explain the creativity in the image.", "https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/images/6.jpg"],
-]
-video_examples = [
-    ["Explain the video in detail.", "https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/videos/2.mp4"],
-    ["Explain the ad in detail.", "https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/videos/1.mp4"]
-]
-detection_examples = [
-    ["https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/images/6.jpg", "Object Detection", "the person"],
-    ["https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/images/5.jpg", "Point Detection", "the fire extinguisher"],
-]
 css = """
@@ -288,7 +290,7 @@ with gr.Blocks(css=css) as demo:
                     image_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
                     image_upload = gr.Image(type="pil", label="Image", height=290)
                     image_submit = gr.Button("Submit", elem_classes="submit-btn")
-                    gr.Examples(examples=image_examples, inputs=[image_query, image_upload])
                 with gr.Column():
                      with gr.Column(elem_classes="canvas-output"):
                         gr.Markdown("## Output")
@@ -300,7 +302,7 @@ with gr.Blocks(css=css) as demo:
                     video_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
                     video_upload = gr.Video(label="Video", height=290)
                     video_submit = gr.Button("Submit", elem_classes="submit-btn")
-                    gr.Examples(examples=video_examples, inputs=[video_query, video_upload])
                 with gr.Column():
                      with gr.Column(elem_classes="canvas-output"):
                         gr.Markdown("## Output")
@@ -316,7 +318,7 @@ with gr.Blocks(css=css) as demo:
                 with gr.Column(scale=1):
                     detection_output_image = gr.Image(type="pil", label="Result", height=400)
                     detection_output_textbox = gr.Textbox(label="Model Raw Output (Coordinates)", lines=10, show_copy_button=True)
-            gr.Examples(examples=detection_examples, inputs=[detection_image_input, detection_task_type, detection_prompt_input])
     with gr.Accordion("Advanced options", open=False):
         max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)

 # --- Gradio UI ---
+#image_examples = [
+#    ["Describe the safety measures in the image. Conclude (Safe / Unsafe)..", "https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/images/5.jpg"],
+#    ["Convert this page to doc [markdown] precisely.", "https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/images/3.png"],
+#    ["Explain the creativity in the image.", "https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/images/6.jpg"],
+#]
+#video_examples = [
+#    ["Explain the video in detail.", "https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/videos/2.mp4"],
+#    ["Explain the ad in detail.", "https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/videos/1.mp4"]
+#]
+#detection_examples = [
+#    ["https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/images/6.jpg", "Object Detection", "the person"],
+#    ["https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/images/5.jpg", "Point Detection", "the fire extinguisher"],
+#]
 css = """
                     image_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
                     image_upload = gr.Image(type="pil", label="Image", height=290)
                     image_submit = gr.Button("Submit", elem_classes="submit-btn")
+                    #gr.Examples(examples=image_examples, inputs=[image_query, image_upload])
                 with gr.Column():
                      with gr.Column(elem_classes="canvas-output"):
                         gr.Markdown("## Output")
                     video_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
                     video_upload = gr.Video(label="Video", height=290)
                     video_submit = gr.Button("Submit", elem_classes="submit-btn")
+                    #gr.Examples(examples=video_examples, inputs=[video_query, video_upload])
                 with gr.Column():
                      with gr.Column(elem_classes="canvas-output"):
                         gr.Markdown("## Output")
                 with gr.Column(scale=1):
                     detection_output_image = gr.Image(type="pil", label="Result", height=400)
                     detection_output_textbox = gr.Textbox(label="Model Raw Output (Coordinates)", lines=10, show_copy_button=True)
+            #gr.Examples(examples=detection_examples, inputs=[detection_image_input, detection_task_type, detection_prompt_input])
     with gr.Accordion("Advanced options", open=False):
         max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)