Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -255,21 +255,23 @@ def generate_detection_and_pointing(image: Image.Image, prompt: str, task_type:
|
|
| 255 |
|
| 256 |
# --- Gradio UI ---
|
| 257 |
|
| 258 |
-
image_examples = [
|
| 259 |
-
["Describe the safety measures in the image. Conclude (Safe / Unsafe)..", "https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/images/5.jpg"],
|
| 260 |
-
["Convert this page to doc [markdown] precisely.", "https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/images/3.png"],
|
| 261 |
-
["Explain the creativity in the image.", "https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/images/6.jpg"],
|
| 262 |
-
]
|
| 263 |
|
| 264 |
-
video_examples = [
|
| 265 |
-
["Explain the video in detail.", "https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/videos/2.mp4"],
|
| 266 |
-
["Explain the ad in detail.", "https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/videos/1.mp4"]
|
| 267 |
-
]
|
| 268 |
|
| 269 |
-
detection_examples = [
|
| 270 |
-
["https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/images/6.jpg", "Object Detection", "the person"],
|
| 271 |
-
["https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/images/5.jpg", "Point Detection", "the fire extinguisher"],
|
| 272 |
-
|
|
|
|
|
|
|
| 273 |
|
| 274 |
|
| 275 |
css = """
|
|
@@ -288,7 +290,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 288 |
image_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
|
| 289 |
image_upload = gr.Image(type="pil", label="Image", height=290)
|
| 290 |
image_submit = gr.Button("Submit", elem_classes="submit-btn")
|
| 291 |
-
gr.Examples(examples=image_examples, inputs=[image_query, image_upload])
|
| 292 |
with gr.Column():
|
| 293 |
with gr.Column(elem_classes="canvas-output"):
|
| 294 |
gr.Markdown("## Output")
|
|
@@ -300,7 +302,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 300 |
video_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
|
| 301 |
video_upload = gr.Video(label="Video", height=290)
|
| 302 |
video_submit = gr.Button("Submit", elem_classes="submit-btn")
|
| 303 |
-
gr.Examples(examples=video_examples, inputs=[video_query, video_upload])
|
| 304 |
with gr.Column():
|
| 305 |
with gr.Column(elem_classes="canvas-output"):
|
| 306 |
gr.Markdown("## Output")
|
|
@@ -316,7 +318,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 316 |
with gr.Column(scale=1):
|
| 317 |
detection_output_image = gr.Image(type="pil", label="Result", height=400)
|
| 318 |
detection_output_textbox = gr.Textbox(label="Model Raw Output (Coordinates)", lines=10, show_copy_button=True)
|
| 319 |
-
gr.Examples(examples=detection_examples, inputs=[detection_image_input, detection_task_type, detection_prompt_input])
|
| 320 |
|
| 321 |
with gr.Accordion("Advanced options", open=False):
|
| 322 |
max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
|
|
|
|
| 255 |
|
| 256 |
# --- Gradio UI ---
|
| 257 |
|
| 258 |
+
#image_examples = [
|
| 259 |
+
# ["Describe the safety measures in the image. Conclude (Safe / Unsafe)..", "https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/images/5.jpg"],
|
| 260 |
+
# ["Convert this page to doc [markdown] precisely.", "https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/images/3.png"],
|
| 261 |
+
# ["Explain the creativity in the image.", "https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/images/6.jpg"],
|
| 262 |
+
#]
|
| 263 |
|
| 264 |
+
#video_examples = [
|
| 265 |
+
# ["Explain the video in detail.", "https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/videos/2.mp4"],
|
| 266 |
+
# ["Explain the ad in detail.", "https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/videos/1.mp4"]
|
| 267 |
+
#]
|
| 268 |
|
| 269 |
+
#detection_examples = [
|
| 270 |
+
# ["https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/images/6.jpg", "Object Detection", "the person"],
|
| 271 |
+
# ["https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-Thinking/resolve/main/images/5.jpg", "Point Detection", "the fire extinguisher"],
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
#]
|
| 275 |
|
| 276 |
|
| 277 |
css = """
|
|
|
|
| 290 |
image_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
|
| 291 |
image_upload = gr.Image(type="pil", label="Image", height=290)
|
| 292 |
image_submit = gr.Button("Submit", elem_classes="submit-btn")
|
| 293 |
+
#gr.Examples(examples=image_examples, inputs=[image_query, image_upload])
|
| 294 |
with gr.Column():
|
| 295 |
with gr.Column(elem_classes="canvas-output"):
|
| 296 |
gr.Markdown("## Output")
|
|
|
|
| 302 |
video_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
|
| 303 |
video_upload = gr.Video(label="Video", height=290)
|
| 304 |
video_submit = gr.Button("Submit", elem_classes="submit-btn")
|
| 305 |
+
#gr.Examples(examples=video_examples, inputs=[video_query, video_upload])
|
| 306 |
with gr.Column():
|
| 307 |
with gr.Column(elem_classes="canvas-output"):
|
| 308 |
gr.Markdown("## Output")
|
|
|
|
| 318 |
with gr.Column(scale=1):
|
| 319 |
detection_output_image = gr.Image(type="pil", label="Result", height=400)
|
| 320 |
detection_output_textbox = gr.Textbox(label="Model Raw Output (Coordinates)", lines=10, show_copy_button=True)
|
| 321 |
+
#gr.Examples(examples=detection_examples, inputs=[detection_image_input, detection_task_type, detection_prompt_input])
|
| 322 |
|
| 323 |
with gr.Accordion("Advanced options", open=False):
|
| 324 |
max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
|