Spaces:

hamada056
/

MIM

Sleeping

App Files Files Community

MIM / app.py

hamada056

Upload app.py

3a06621 verified 20 days ago

raw

history blame contribute delete

3.33 kB

	import gradio as gr
	import os
	import json
	from io import BytesIO
	from PIL import Image, ImageDraw, ImageFont, ImageColor
	import google.generativeai as genai
	from dotenv import load_dotenv

	# =========================
	# 1. SETUP API KEY
	# =========================
	load_dotenv()
	api_key = os.getenv("Gemini_API_Key")
	genai.configure(api_key=api_key)

	# =========================
	# 2. MODEL CONFIG
	# =========================
	bounding_box_system_instructions = """
	Return bounding boxes as a JSON array with labels.
	Never return masks or code fencing.
	Limit to 25 objects.
	If an object appears multiple times, use unique labels.
	"""

	model = genai.GenerativeModel(
	model_name="gemini-2.5-flash",
	system_instruction=bounding_box_system_instructions
	)

	generation_config = genai.types.GenerationConfig(
	temperature=0.5
	)

	# =========================
	# 3. HELPERS
	# =========================
	def parse_json(json_output):
	lines = json_output.splitlines()
	for i, line in enumerate(lines):
	if "```" in line:
	json_output = "\n".join(lines[i + 1:])
	json_output = json_output.split("```")[0]
	break
	return json_output


	def plot_bounding_boxes(im, bounding_boxes):
	im = im.copy()
	width, height = im.size
	draw = ImageDraw.Draw(im)

	colors = list(ImageColor.colormap.keys())
	font = ImageFont.load_default()

	boxes = json.loads(bounding_boxes)

	for i, box in enumerate(boxes):
	color = colors[i % len(colors)]
	y1, x1, y2, x2 = box["box_2d"]

	# Convert from 0–1000 scale to image pixels
	x1 = int(x1 / 1000 * width)
	x2 = int(x2 / 1000 * width)
	y1 = int(y1 / 1000 * height)
	y2 = int(y2 / 1000 * height)

	draw.rectangle([(x1, y1), (x2, y2)], outline=color, width=4)
	draw.text((x1 + 6, y1 + 6), box["label"], fill=color, font=font)

	return im


	# =========================
	# 4. MAIN FUNCTION (GRADIO)
	# =========================
	def detect_objects(user_prompt, image):
	if image is None:
	return None

	prompt = user_prompt.strip()
	if prompt == "":
	prompt = "Identify and label the objects in the image."

	response = model.generate_content(
	[prompt, image],
	generation_config=generation_config
	)

	bounding_boxes = parse_json(response.text)
	image_with_boxes = plot_bounding_boxes(image, bounding_boxes)

	return image_with_boxes


	# =========================
	# 5. GRADIO UI
	# =========================
	with gr.Blocks(title="Gemini Bounding Box Detector") as demo:
	gr.Markdown("## Gemini Vision – Object Detection (Bounding Boxes Only)")

	with gr.Row():
	with gr.Column():
	image_input = gr.Image(type="pil", label="Upload Image")
	prompt_input = gr.Textbox(
	label="Prompt",
	placeholder="e.g. Detect cookies and plates"
	)
	submit_btn = gr.Button("Detect Objects ")

	with gr.Column():
	image_output = gr.Image(label="Image with Bounding Boxes")

	submit_btn.click(
	fn=detect_objects,
	inputs=[prompt_input, image_input],
	outputs=image_output
	)

	demo.launch()