Spaces:

hamada056
/

MIM

Sleeping

App Files Files Community

hamada056 commited on 20 days ago

Commit

3a06621

verified ·

1 Parent(s): a1449bf

Upload app.py

Browse files

Files changed (1) hide show

app.py +120 -0

app.py ADDED Viewed

	@@ -0,0 +1,120 @@

+import gradio as gr
+import os
+import json
+from io import BytesIO
+from PIL import Image, ImageDraw, ImageFont, ImageColor
+import google.generativeai as genai
+from dotenv import load_dotenv
+# =========================
+# 1. SETUP API KEY
+# =========================
+load_dotenv()
+api_key = os.getenv("Gemini_API_Key")
+genai.configure(api_key=api_key)
+# =========================
+# 2. MODEL CONFIG
+# =========================
+bounding_box_system_instructions = """
+Return bounding boxes as a JSON array with labels.
+Never return masks or code fencing.
+Limit to 25 objects.
+If an object appears multiple times, use unique labels.
+"""
+model = genai.GenerativeModel(
+    model_name="gemini-2.5-flash",
+    system_instruction=bounding_box_system_instructions
+)
+generation_config = genai.types.GenerationConfig(
+    temperature=0.5
+)
+# =========================
+# 3. HELPERS
+# =========================
+def parse_json(json_output):
+    lines = json_output.splitlines()
+    for i, line in enumerate(lines):
+        if "```" in line:
+            json_output = "\n".join(lines[i + 1:])
+            json_output = json_output.split("```")[0]
+            break
+    return json_output
+def plot_bounding_boxes(im, bounding_boxes):
+    im = im.copy()
+    width, height = im.size
+    draw = ImageDraw.Draw(im)
+    colors = list(ImageColor.colormap.keys())
+    font = ImageFont.load_default()
+    boxes = json.loads(bounding_boxes)
+    for i, box in enumerate(boxes):
+        color = colors[i % len(colors)]
+        y1, x1, y2, x2 = box["box_2d"]
+        # Convert from 0–1000 scale to image pixels
+        x1 = int(x1 / 1000 * width)
+        x2 = int(x2 / 1000 * width)
+        y1 = int(y1 / 1000 * height)
+        y2 = int(y2 / 1000 * height)
+        draw.rectangle([(x1, y1), (x2, y2)], outline=color, width=4)
+        draw.text((x1 + 6, y1 + 6), box["label"], fill=color, font=font)
+    return im
+# =========================
+# 4. MAIN FUNCTION (GRADIO)
+# =========================
+def detect_objects(user_prompt, image):
+    if image is None:
+        return None
+    prompt = user_prompt.strip()
+    if prompt == "":
+        prompt = "Identify and label the objects in the image."
+    response = model.generate_content(
+        [prompt, image],
+        generation_config=generation_config
+    )
+    bounding_boxes = parse_json(response.text)
+    image_with_boxes = plot_bounding_boxes(image, bounding_boxes)
+    return image_with_boxes
+# =========================
+# 5. GRADIO UI
+# =========================
+with gr.Blocks(title="Gemini Bounding Box Detector") as demo:
+    gr.Markdown("## Gemini Vision – Object Detection (Bounding Boxes Only)")
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(type="pil", label="Upload Image")
+            prompt_input = gr.Textbox(
+                label="Prompt",
+                placeholder="e.g. Detect cookies and plates"
+            )
+            submit_btn = gr.Button("Detect Objects ")
+        with gr.Column():
+            image_output = gr.Image(label="Image with Bounding Boxes")
+    submit_btn.click(
+        fn=detect_objects,
+        inputs=[prompt_input, image_input],
+        outputs=image_output
+    )
+demo.launch()