import gradio as gr import os import json from PIL import Image, ImageDraw, ImageFont import google.generativeai as genai # --- 1. SETUP --- # using the key you provided api_key = "AIzaSyABaidsygD73gbaSlMHlkrhhiDT8NDzrjE" genai.configure(api_key=api_key) model = genai.GenerativeModel('gemini-1.5-flash') # --- 2. LOGIC --- def plot_bounding_boxes(im, boxes): """Draws bounding boxes on the image.""" # Create a copy so we don't ruin the original im = im.copy() draw = ImageDraw.Draw(im) width, height = im.size # Simple list of colors colors = ['red', 'green', 'blue', 'yellow', 'cyan'] for i, box in enumerate(boxes): color = colors[i % len(colors)] # Get the numbers (default to 0 if missing) ymin, xmin, ymax, xmax = box.get("box_2d", [0,0,0,0]) label = box.get("label", "Object") # Convert 0-1000 scale to pixels left = int((xmin / 1000) * width) top = int((ymin / 1000) * height) right = int((xmax / 1000) * width) bottom = int((ymax / 1000) * height) # Draw the box draw.rectangle([left, top, right, bottom], outline=color, width=4) # Draw the text # (We keep it simple to avoid font errors) draw.text((left, top), label, fill=color) return im def detect(image): if image is None: return None, "Please upload an image." # The prompt we send to Gemini prompt = """ Detect objects in this image. Return a JSON Array. Format: [{"box_2d": [ymin, xmin, ymax, xmax], "label": "name"}] """ try: # 1. Ask Gemini response = model.generate_content([prompt, image]) text_data = response.text # 2. Clean the text (remove markdown ```json ... ```) text_data = text_data.replace("```json", "").replace("```", "") # 3. Convert text to list boxes = json.loads(text_data) # 4. Draw boxes result_image = plot_bounding_boxes(image, boxes) return result_image, str(boxes) except Exception as e: return image, f"Error: {str(e)}" # --- 3. UI --- with gr.Blocks() as demo: gr.Markdown("# 🔍 Gemini Object Detector") with gr.Row(): inp = gr.Image(type="pil", label="Input Image") out = gr.Image(type="pil", label="Result") debug = gr.Textbox(label="Debug Information") btn = gr.Button("Detect Objects") btn.click(detect, inp, [out, debug]) if __name__ == "__main__": demo.launch()