File size: 3,332 Bytes
3a06621 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | import gradio as gr
import os
import json
from io import BytesIO
from PIL import Image, ImageDraw, ImageFont, ImageColor
import google.generativeai as genai
from dotenv import load_dotenv
# =========================
# 1. SETUP API KEY
# =========================
load_dotenv()
api_key = os.getenv("Gemini_API_Key")
genai.configure(api_key=api_key)
# =========================
# 2. MODEL CONFIG
# =========================
bounding_box_system_instructions = """
Return bounding boxes as a JSON array with labels.
Never return masks or code fencing.
Limit to 25 objects.
If an object appears multiple times, use unique labels.
"""
model = genai.GenerativeModel(
model_name="gemini-2.5-flash",
system_instruction=bounding_box_system_instructions
)
generation_config = genai.types.GenerationConfig(
temperature=0.5
)
# =========================
# 3. HELPERS
# =========================
def parse_json(json_output):
lines = json_output.splitlines()
for i, line in enumerate(lines):
if "```" in line:
json_output = "\n".join(lines[i + 1:])
json_output = json_output.split("```")[0]
break
return json_output
def plot_bounding_boxes(im, bounding_boxes):
im = im.copy()
width, height = im.size
draw = ImageDraw.Draw(im)
colors = list(ImageColor.colormap.keys())
font = ImageFont.load_default()
boxes = json.loads(bounding_boxes)
for i, box in enumerate(boxes):
color = colors[i % len(colors)]
y1, x1, y2, x2 = box["box_2d"]
# Convert from 0–1000 scale to image pixels
x1 = int(x1 / 1000 * width)
x2 = int(x2 / 1000 * width)
y1 = int(y1 / 1000 * height)
y2 = int(y2 / 1000 * height)
draw.rectangle([(x1, y1), (x2, y2)], outline=color, width=4)
draw.text((x1 + 6, y1 + 6), box["label"], fill=color, font=font)
return im
# =========================
# 4. MAIN FUNCTION (GRADIO)
# =========================
def detect_objects(user_prompt, image):
if image is None:
return None
prompt = user_prompt.strip()
if prompt == "":
prompt = "Identify and label the objects in the image."
response = model.generate_content(
[prompt, image],
generation_config=generation_config
)
bounding_boxes = parse_json(response.text)
image_with_boxes = plot_bounding_boxes(image, bounding_boxes)
return image_with_boxes
# =========================
# 5. GRADIO UI
# =========================
with gr.Blocks(title="Gemini Bounding Box Detector") as demo:
gr.Markdown("## Gemini Vision – Object Detection (Bounding Boxes Only)")
with gr.Row():
with gr.Column():
image_input = gr.Image(type="pil", label="Upload Image")
prompt_input = gr.Textbox(
label="Prompt",
placeholder="e.g. Detect cookies and plates"
)
submit_btn = gr.Button("Detect Objects ")
with gr.Column():
image_output = gr.Image(label="Image with Bounding Boxes")
submit_btn.click(
fn=detect_objects,
inputs=[prompt_input, image_input],
outputs=image_output
)
demo.launch()
|