File size: 2,576 Bytes
c7e5c40
 
 
a7cd855
c7e5c40
 
a7cd855
 
2019de1
 
c7e5c40
a7cd855
c7e5c40
a7cd855
c7e5c40
a7cd855
 
c7e5c40
2019de1
a7cd855
c7e5c40
a7cd855
 
c7e5c40
a7cd855
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c7e5c40
a7cd855
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import gradio as gr
import os
import json
from PIL import Image, ImageDraw, ImageFont
import google.generativeai as genai

# --- 1. SETUP ---
# using the key you provided
api_key = "AIzaSyABaidsygD73gbaSlMHlkrhhiDT8NDzrjE"
genai.configure(api_key=api_key)

model = genai.GenerativeModel('gemini-1.5-flash')

# --- 2. LOGIC ---
def plot_bounding_boxes(im, boxes):
    """Draws bounding boxes on the image."""
    # Create a copy so we don't ruin the original
    im = im.copy()
    draw = ImageDraw.Draw(im)
    width, height = im.size
    
    # Simple list of colors
    colors = ['red', 'green', 'blue', 'yellow', 'cyan']
    
    for i, box in enumerate(boxes):
        color = colors[i % len(colors)]
        
        # Get the numbers (default to 0 if missing)
        ymin, xmin, ymax, xmax = box.get("box_2d", [0,0,0,0])
        label = box.get("label", "Object")

        # Convert 0-1000 scale to pixels
        left = int((xmin / 1000) * width)
        top = int((ymin / 1000) * height)
        right = int((xmax / 1000) * width)
        bottom = int((ymax / 1000) * height)

        # Draw the box
        draw.rectangle([left, top, right, bottom], outline=color, width=4)
        
        # Draw the text
        # (We keep it simple to avoid font errors)
        draw.text((left, top), label, fill=color)

    return im

def detect(image):
    if image is None:
        return None, "Please upload an image."

    # The prompt we send to Gemini
    prompt = """
    Detect objects in this image.
    Return a JSON Array.
    Format: [{"box_2d": [ymin, xmin, ymax, xmax], "label": "name"}]
    """

    try:
        # 1. Ask Gemini
        response = model.generate_content([prompt, image])
        text_data = response.text
        
        # 2. Clean the text (remove markdown ```json ... ```)
        text_data = text_data.replace("```json", "").replace("```", "")
        
        # 3. Convert text to list
        boxes = json.loads(text_data)
        
        # 4. Draw boxes
        result_image = plot_bounding_boxes(image, boxes)
        
        return result_image, str(boxes)

    except Exception as e:
        return image, f"Error: {str(e)}"

# --- 3. UI ---
with gr.Blocks() as demo:
    gr.Markdown("# 🔍 Gemini Object Detector")
    
    with gr.Row():
        inp = gr.Image(type="pil", label="Input Image")
        out = gr.Image(type="pil", label="Result")
    
    debug = gr.Textbox(label="Debug Information")
    
    btn = gr.Button("Detect Objects")
    btn.click(detect, inp, [out, debug])

if __name__ == "__main__":
    demo.launch()