Spaces:

hamada056
/

MIM

Sleeping

File size: 3,332 Bytes

3a06621

import gradio as gr
import os
import json
from io import BytesIO
from PIL import Image, ImageDraw, ImageFont, ImageColor
import google.generativeai as genai
from dotenv import load_dotenv

# =========================
# 1. SETUP API KEY
# =========================
load_dotenv()
api_key = os.getenv("Gemini_API_Key")
genai.configure(api_key=api_key)

# =========================
# 2. MODEL CONFIG
# =========================
bounding_box_system_instructions = """

Return bounding boxes as a JSON array with labels.

Never return masks or code fencing.

Limit to 25 objects.

If an object appears multiple times, use unique labels.

"""

model = genai.GenerativeModel(
    model_name="gemini-2.5-flash",
    system_instruction=bounding_box_system_instructions
)

generation_config = genai.types.GenerationConfig(
    temperature=0.5
)

# =========================
# 3. HELPERS
# =========================
def parse_json(json_output):
    lines = json_output.splitlines()
    for i, line in enumerate(lines):
        if "```" in line:
            json_output = "\n".join(lines[i + 1:])
            json_output = json_output.split("```")[0]
            break
    return json_output


def plot_bounding_boxes(im, bounding_boxes):
    im = im.copy()
    width, height = im.size
    draw = ImageDraw.Draw(im)

    colors = list(ImageColor.colormap.keys())
    font = ImageFont.load_default()

    boxes = json.loads(bounding_boxes)

    for i, box in enumerate(boxes):
        color = colors[i % len(colors)]
        y1, x1, y2, x2 = box["box_2d"]

        # Convert from 0–1000 scale to image pixels
        x1 = int(x1 / 1000 * width)
        x2 = int(x2 / 1000 * width)
        y1 = int(y1 / 1000 * height)
        y2 = int(y2 / 1000 * height)

        draw.rectangle([(x1, y1), (x2, y2)], outline=color, width=4)
        draw.text((x1 + 6, y1 + 6), box["label"], fill=color, font=font)

    return im


# =========================
# 4. MAIN FUNCTION (GRADIO)
# =========================
def detect_objects(user_prompt, image):
    if image is None:
        return None

    prompt = user_prompt.strip()
    if prompt == "":
        prompt = "Identify and label the objects in the image."

    response = model.generate_content(
        [prompt, image],
        generation_config=generation_config
    )

    bounding_boxes = parse_json(response.text)
    image_with_boxes = plot_bounding_boxes(image, bounding_boxes)

    return image_with_boxes


# =========================
# 5. GRADIO UI
# =========================
with gr.Blocks(title="Gemini Bounding Box Detector") as demo:
    gr.Markdown("## Gemini Vision – Object Detection (Bounding Boxes Only)")

    with gr.Row():
        with gr.Column():
            image_input = gr.Image(type="pil", label="Upload Image")
            prompt_input = gr.Textbox(
                label="Prompt",
                placeholder="e.g. Detect cookies and plates"
            )
            submit_btn = gr.Button("Detect Objects ")

        with gr.Column():
            image_output = gr.Image(label="Image with Bounding Boxes")

    submit_btn.click(
        fn=detect_objects,
        inputs=[prompt_input, image_input],
        outputs=image_output
    )

demo.launch()