Spaces:

dmorawiec
/

Qwen-VL-Object-Detection

Running on Zero

App Files Files Community

Darius Morawiec commited on about 1 month ago

Commit

7b4b54b

1 Parent(s): c8d2dc0

Add image scaling functionality and target size slider

Browse files

Files changed (1) hide show

app.py +27 -2

app.py CHANGED Viewed

@@ -33,6 +33,21 @@ model_ids = [
 ]
 def image_to_base64(image):
     buffered = BytesIO()
     image.save(buffered, format="PNG")
@@ -43,7 +58,7 @@ def image_to_base64(image):
 with gr.Blocks() as demo:
     gr.Markdown("# Qwen-VL Object-Detection")
     gr.Markdown(
-        "Compare [Qwen3-VL](https://huggingface.co/collections/Qwen/qwen3-vl), [Qwen2.5-VL](https://huggingface.co/collections/Qwen/qwen25-vl) and [Qwen2-VL](https://huggingface.co/collections/Qwen/qwen2-vl) models by [Qwen](https://huggingface.co/Qwen) for object detection in images."
     )
     if DEVICE != "cuda":
@@ -86,6 +101,14 @@ with gr.Blocks() as demo:
                 step=32,
                 interactive=True,
             )
         with gr.Column():
             gr.Markdown("## Outputs")
@@ -114,6 +137,7 @@ with gr.Blocks() as demo:
     def run(
         image,
         system_prompt: str,
         user_prompt: str,
         model_id: str,
@@ -164,7 +188,7 @@ with gr.Blocks() as demo:
                 "content": [
                     {
                         "type": "image",
-                        "image": f"data:image;base64,{image_to_base64(image)}",
                     },
                     {"type": "text", "text": system_prompt},
                     {"type": "text", "text": user_prompt},
@@ -230,6 +254,7 @@ with gr.Blocks() as demo:
         fn=run,
         inputs=[
             image_input,
             system_prompt,
             user_prompt,
             input_model_id,

 ]
+def scale_image(image, target_size=1000):
+    width, height = image.size
+    if max(width, height) <= target_size:
+        return image
+    if width >= height:
+        new_width = target_size
+        new_height = int((target_size / width) * height)
+    else:
+        new_height = target_size
+        new_width = int((target_size / height) * width)
+    return image.resize((new_width, new_height))
 def image_to_base64(image):
     buffered = BytesIO()
     image.save(buffered, format="PNG")
 with gr.Blocks() as demo:
     gr.Markdown("# Qwen-VL Object-Detection")
     gr.Markdown(
+        "Compare [Qwen3-VL](https://huggingface.co/collections/Qwen/qwen3-vl), [Qwen2.5-VL](https://huggingface.co/collections/Qwen/qwen25-vl) and [Qwen2-VL](https://huggingface.co/collections/Qwen/qwen2-vl) models by [Qwen](https://huggingface.co/Qwen) for object detection."
     )
     if DEVICE != "cuda":
                 step=32,
                 interactive=True,
             )
+            image_target_size = gr.Slider(
+                label="Image Target Size (longest side)",
+                minimum=256,
+                maximum=3072,
+                value=1024,
+                step=1,
+                interactive=True,
+            )
         with gr.Column():
             gr.Markdown("## Outputs")
     def run(
         image,
+        image_target_size: int,
         system_prompt: str,
         user_prompt: str,
         model_id: str,
                 "content": [
                     {
                         "type": "image",
+                        "image": f"data:image;base64,{image_to_base64(scale_image(image, image_target_size))}",
                     },
                     {"type": "text", "text": system_prompt},
                     {"type": "text", "text": user_prompt},
         fn=run,
         inputs=[
             image_input,
+            image_target_size,
             system_prompt,
             user_prompt,
             input_model_id,