Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| from io import BytesIO | |
| from PIL import Image, ImageDraw, ImageFont | |
| from PIL import ImageColor | |
| import json | |
| import google.generativeai as genai | |
| from google.generativeai import types | |
| from dotenv import load_dotenv | |
| from IPython.display import display | |
| # 1. SETUP API KEY | |
| # ---------------- | |
| load_dotenv() | |
| api_key = os.getenv("Gemini_API_Key") | |
| # Configure the Google AI library | |
| genai.configure(api_key=api_key) | |
| # 2. DEFINE MODEL AND INSTRUCTIONS | |
| bounding_box_system_instructions = """ | |
| Return bounding boxes as a JSON array with labels. Never return masks or code fencing. Limit to 25 objects. | |
| If an object is present multiple times, name them according to their unique characteristic (colors, size, position, unique characteristics, etc..). | |
| """ | |
| model = genai.GenerativeModel( model_name='gemini-2.5-flash', system_instruction=bounding_box_system_instructions) | |
| generation_config = genai.types.GenerationConfig( | |
| temperature=0.5, | |
| ) | |
| # 3. PREPARE IMAGE AND PROMPT | |
| prompt = "Identify and label the objects in the image. Return only the JSON array of bounding boxes and labels as per the system instructions." | |
| #image = "Images/cookies.jpg" | |
| #img = Image.open(BytesIO(open(image, "rb").read())) | |
| # print(f"Original image size: {img.size}") | |
| # resize the image to a max width of 1024 while maintaining aspect ratio | |
| #im = Image.open(image).resize((1024, int(1024 * img.size[1] / img.size[0])), Image.Resampling.LANCZOS) | |
| #print(f"Resized image size: {im.size}") | |
| #im.show() | |
| # Run model to find bounding boxes | |
| #response = model.generate_content([prompt, im], generation_config=generation_config) | |
| #print(response.text) | |
| # def generate_bounding_boxes(prompt, image): | |
| # response = model.generate_content([prompt, image], generation_config=generation_config) | |
| # return response.text | |
| def parse_json(json_output): | |
| lines = json_output.splitlines() | |
| for i, line in enumerate(lines): | |
| if line == "```json": | |
| json_output = "\n".join(lines[i+1:]) # Remove everything before "```json" | |
| json_output = json_output.split("```")[0] # Remove everything after the closing "```" | |
| break | |
| return json_output | |
| #bounding_boxes=parse_json(response.text) | |
| #def plot_bounding_boxes(im, bounding_boxes): | |
| """ | |
| Plots bounding boxes on an image with labels. | |
| """ | |
| image = im.copy() | |
| draw = ImageDraw.Draw(image) | |
| font = ImageFont.load_default() | |
| bounding_boxes_json = json.loads(bounding_boxes) | |
| for i, bounding_box in enumerate(bounding_boxes_json): | |
| print(f"Processing bounding box {i}: {bounding_box}") | |
| label = bounding_box["label"] | |
| x1, y1, x2, y2 = bounding_box["box_2d"] | |
| # Draw rectangle | |
| draw.rectangle( | |
| [(x1, y1), (x2, y2)], | |
| outline="red", | |
| width=10 | |
| ) | |
| # Draw label | |
| draw.text((x1 + 5, y1 + 5), label, fill="red", font=font) | |
| return im | |
| def plot_bounding_boxes(im, bounding_boxes): | |
| """ | |
| Plots bounding boxes on an image with labels. | |
| """ | |
| additional_colors = [colorname for (colorname, colorcode) in ImageColor.colormap.items()] | |
| im = im.copy() | |
| width, height = im.size | |
| draw = ImageDraw.Draw(im) | |
| colors = [ | |
| 'red', 'green', 'blue', 'yellow', 'orange', 'pink', 'purple', 'cyan', | |
| 'lime', 'magenta', 'violet', 'gold', 'silver' | |
| ] + additional_colors | |
| try: | |
| # Use a default font if NotoSansCJK is not available | |
| try: | |
| font = ImageFont.load_default() | |
| except OSError: | |
| print("NotoSansCJK-Regular.ttc not found. Using default font.") | |
| font = ImageFont.load_default() | |
| bounding_boxes_json = json.loads(bounding_boxes) | |
| for i, bounding_box in enumerate(bounding_boxes_json): | |
| color = colors[i % len(colors)] | |
| abs_y1 = int(bounding_box["box_2d"][0] / 1000 * height) | |
| abs_x1 = int(bounding_box["box_2d"][1] / 1000 * width) | |
| abs_y2 = int(bounding_box["box_2d"][2] / 1000 * height) | |
| abs_x2 = int(bounding_box["box_2d"][3] / 1000 * width) | |
| if abs_x1 > abs_x2: | |
| abs_x1, abs_x2 = abs_x2, abs_x1 | |
| if abs_y1 > abs_y2: | |
| abs_y1, abs_y2 = abs_y2, abs_y1 | |
| # Draw bounding box and label | |
| draw.rectangle(((abs_x1, abs_y1), (abs_x2, abs_y2)), outline=color, width=4) | |
| if "label" in bounding_box: | |
| draw.text((abs_x1 + 8, abs_y1 + 6), bounding_box["label"], fill=color, font=font) | |
| except Exception as e: | |
| print(f"Error drawing bounding boxes: {e}") | |
| return im | |
| #im_with_boxes = plot_bounding_boxes(im, bounding_boxes) | |
| #display(im_with_boxes) | |
| #im_with_boxes.save("output_imags/cookies_bounding_boxes.jpg") | |
| #im_with_boxes.show() | |
| #print("Bounding boxes plotted on image.") | |
| def detect_objects(image , prompt): | |
| # Resize image | |
| image = image.resize((1024, int(1024 * image.size[1] / image.size[0]))) | |
| # Generate bounding boxes | |
| response = model.generate_content([prompt, image], generation_config=generation_config) | |
| bounding_boxes = parse_json(response.text) | |
| # Draw boxes | |
| output_image = plot_bounding_boxes(image, bounding_boxes) | |
| return output_image, bounding_boxes | |
| # ================== Gradio Interface ================== | |
| interface = gr.Interface( | |
| fn=detect_objects, | |
| inputs=[gr.Image(type="pil"), gr.Textbox( label="Prompt", value="Identify and label the objects in the image. Return only the JSON array of bounding boxes.")], | |
| outputs=[gr.Image(label="Detected Objects"), gr.Textbox(label="Bounding Boxes JSON")], | |
| title="Object Detection with Gemini" | |
| ) | |
| interface.launch(server_name="0.0.0.0", server_port=7860) |