| | import gradio as gr
|
| | import os
|
| | import json
|
| | from io import BytesIO
|
| | from PIL import Image, ImageDraw, ImageFont, ImageColor
|
| | import google.generativeai as genai
|
| | from dotenv import load_dotenv
|
| |
|
| |
|
| |
|
| |
|
| | load_dotenv()
|
| | api_key = os.getenv("Gemini_API_Key")
|
| | genai.configure(api_key=api_key)
|
| |
|
| |
|
| |
|
| |
|
| | bounding_box_system_instructions = """
|
| | Return bounding boxes as a JSON array with labels.
|
| | Never return masks or code fencing.
|
| | Limit to 25 objects.
|
| | If an object appears multiple times, use unique labels.
|
| | """
|
| |
|
| | model = genai.GenerativeModel(
|
| | model_name="gemini-2.5-flash",
|
| | system_instruction=bounding_box_system_instructions
|
| | )
|
| |
|
| | generation_config = genai.types.GenerationConfig(
|
| | temperature=0.5
|
| | )
|
| |
|
| |
|
| |
|
| |
|
| | def parse_json(json_output):
|
| | lines = json_output.splitlines()
|
| | for i, line in enumerate(lines):
|
| | if "```" in line:
|
| | json_output = "\n".join(lines[i + 1:])
|
| | json_output = json_output.split("```")[0]
|
| | break
|
| | return json_output
|
| |
|
| |
|
| | def plot_bounding_boxes(im, bounding_boxes):
|
| | im = im.copy()
|
| | width, height = im.size
|
| | draw = ImageDraw.Draw(im)
|
| |
|
| | colors = list(ImageColor.colormap.keys())
|
| | font = ImageFont.load_default()
|
| |
|
| | boxes = json.loads(bounding_boxes)
|
| |
|
| | for i, box in enumerate(boxes):
|
| | color = colors[i % len(colors)]
|
| | y1, x1, y2, x2 = box["box_2d"]
|
| |
|
| |
|
| | x1 = int(x1 / 1000 * width)
|
| | x2 = int(x2 / 1000 * width)
|
| | y1 = int(y1 / 1000 * height)
|
| | y2 = int(y2 / 1000 * height)
|
| |
|
| | draw.rectangle([(x1, y1), (x2, y2)], outline=color, width=4)
|
| | draw.text((x1 + 6, y1 + 6), box["label"], fill=color, font=font)
|
| |
|
| | return im
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | def detect_objects(user_prompt, image):
|
| | if image is None:
|
| | return None
|
| |
|
| | prompt = user_prompt.strip()
|
| | if prompt == "":
|
| | prompt = "Identify and label the objects in the image."
|
| |
|
| | response = model.generate_content(
|
| | [prompt, image],
|
| | generation_config=generation_config
|
| | )
|
| |
|
| | bounding_boxes = parse_json(response.text)
|
| | image_with_boxes = plot_bounding_boxes(image, bounding_boxes)
|
| |
|
| | return image_with_boxes
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | with gr.Blocks(title="Gemini Bounding Box Detector") as demo:
|
| | gr.Markdown("## Gemini Vision – Object Detection (Bounding Boxes Only)")
|
| |
|
| | with gr.Row():
|
| | with gr.Column():
|
| | image_input = gr.Image(type="pil", label="Upload Image")
|
| | prompt_input = gr.Textbox(
|
| | label="Prompt",
|
| | placeholder="e.g. Detect cookies and plates"
|
| | )
|
| | submit_btn = gr.Button("Detect Objects ")
|
| |
|
| | with gr.Column():
|
| | image_output = gr.Image(label="Image with Bounding Boxes")
|
| |
|
| | submit_btn.click(
|
| | fn=detect_objects,
|
| | inputs=[prompt_input, image_input],
|
| | outputs=image_output
|
| | )
|
| |
|
| | demo.launch()
|
| |
|