roqaia123 commited on
Commit
c898b20
·
verified ·
1 Parent(s): c8b0495

Upload 8 files

Browse files
Files changed (9) hide show
  1. .gitattributes +2 -0
  2. api key.env +1 -0
  3. code.py +161 -0
  4. cookies.jpg +0 -0
  5. foreign_menu.jpg +3 -0
  6. messed_room.jpg +0 -0
  7. test.py +117 -0
  8. yoga.jpg +0 -0
  9. zoom_face.png +3 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ foreign_menu.jpg filter=lfs diff=lfs merge=lfs -text
37
+ zoom_face.png filter=lfs diff=lfs merge=lfs -text
api key.env ADDED
@@ -0,0 +1 @@
 
 
1
+ GOOGLE_API_KEY=AIzaSyABaidsygD73gbaSlMHlkrhhiDT8NDzrjE
code.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import json
4
+ from PIL import Image, ImageDraw, ImageFont, ImageColor
5
+ import google.generativeai as genai
6
+ from dotenv import load_dotenv
7
+
8
+ # --- 1. SETUP ---
9
+ load_dotenv("api key.env")
10
+ api_key = os.getenv("GOOGLE_API_KEY")
11
+ if not api_key:
12
+ print("⚠️ Warning: 'GOOGLE_API_KEY' not found in environment variables.")
13
+
14
+ genai.configure(api_key=api_key)
15
+
16
+ # --- 2. CONFIGURATION ---
17
+ # We set response_mime_type to 'application/json' to force valid JSON output
18
+ generation_config = {
19
+ "temperature": 0.5,
20
+ "response_mime_type": "application/json"
21
+ }
22
+
23
+ bounding_box_system_instructions = """
24
+ Return bounding boxes as a JSON array with labels.
25
+ Format: [{"box_2d": [ymin, xmin, ymax, xmax], "label": "string"}]
26
+ Coordinate scale: 0-1000.
27
+ Limit to 25 objects.
28
+ Do not use markdown code blocks or fencing. Just return the raw JSON array.
29
+ """
30
+
31
+ model = genai.GenerativeModel(
32
+ model_name='gemini-2.5-flash', # verified as valid for your context
33
+ system_instruction=bounding_box_system_instructions
34
+ )
35
+
36
+ # --- 3. HELPER FUNCTIONS ---
37
+
38
+ def parse_json(json_output):
39
+ """
40
+ Robust parsing: Finds the first '[' and last ']' to extract valid JSON.
41
+ This handles cases where the model might still add text preamble.
42
+ """
43
+ try:
44
+ # If the model follows the MIME type strictly, this might be clean already.
45
+ # But we double-check for safety.
46
+ start = json_output.find('[')
47
+ end = json_output.rfind(']')
48
+
49
+ if start != -1 and end != -1:
50
+ json_str = json_output[start : end+1]
51
+ return json.loads(json_str)
52
+
53
+ # Fallback: try loading directly if no brackets found (e.g. empty response)
54
+ return json.loads(json_output)
55
+ except Exception as e:
56
+ print(f"JSON Parsing Error: {e}")
57
+ return []
58
+
59
+ def plot_bounding_boxes(im, boxes):
60
+ """
61
+ Plots bounding boxes on the image.
62
+ """
63
+ im = im.copy()
64
+ width, height = im.size
65
+ draw = ImageDraw.Draw(im)
66
+
67
+ # Colors
68
+ additional_colors = [colorname for (colorname, colorcode) in ImageColor.colormap.items()]
69
+ colors = ['red', 'green', 'blue', 'yellow', 'orange', 'cyan', 'magenta'] + additional_colors
70
+
71
+ try:
72
+ # Load Font
73
+ try:
74
+ # Try loading a system font, fallback to default
75
+ font = ImageFont.truetype("arial.ttf", 20)
76
+ except OSError:
77
+ font = ImageFont.load_default()
78
+
79
+ for i, box in enumerate(boxes):
80
+ color = colors[i % len(colors)]
81
+
82
+ # Normalize coordinates (0-1000 scale) to pixels
83
+ ymin, xmin, ymax, xmax = box.get("box_2d", [0,0,0,0])
84
+ label = box.get("label", "Object")
85
+
86
+ # Swap if coordinates are inverted
87
+ if xmin > xmax: xmin, xmax = xmax, xmin
88
+ if ymin > ymax: ymin, ymax = ymax, ymin
89
+
90
+ left = int((xmin / 1000) * width)
91
+ top = int((ymin / 1000) * height)
92
+ right = int((xmax / 1000) * width)
93
+ bottom = int((ymax / 1000) * height)
94
+
95
+ # Draw Box
96
+ draw.rectangle([left, top, right, bottom], outline=color, width=4)
97
+
98
+ # Draw Label
99
+ text_bbox = draw.textbbox((left, top), label, font=font)
100
+ # Add a small background for the text so it's readable
101
+ draw.rectangle(text_bbox, fill=color)
102
+ draw.text((left, top), label, fill="white", font=font)
103
+
104
+ except Exception as e:
105
+ print(f"Plotting Error: {e}")
106
+ return im
107
+
108
+ return im
109
+
110
+ # --- 4. MAIN GRADIO FUNCTION ---
111
+
112
+ def detect_objects(image):
113
+ if image is None:
114
+ return None, []
115
+
116
+ # Resize for consistency (optional, but good for speed)
117
+ target_width = 1024
118
+ w, h = image.size
119
+ if w > target_width:
120
+ image = image.resize((target_width, int(target_width * h / w)), Image.Resampling.LANCZOS)
121
+
122
+ prompt = "Identify and label the objects in the image."
123
+
124
+ try:
125
+ # Generate Content
126
+ response = model.generate_content(
127
+ [prompt, image],
128
+ generation_config=generation_config
129
+ )
130
+ print("Raw Response:", response.text) # For debugging
131
+
132
+ # Parse
133
+ boxes_data = parse_json(response.text)
134
+
135
+ # Plot
136
+ result_image = plot_bounding_boxes(image, boxes_data)
137
+
138
+ return result_image, boxes_data
139
+
140
+ except Exception as e:
141
+ print(f"API Error: {e}")
142
+ return image, [{"error": str(e)}]
143
+
144
+ # --- 5. UI SETUP ---
145
+
146
+ with gr.Blocks() as demo:
147
+ gr.Markdown("# 👁️ Object Detection with Gemini 2.5")
148
+
149
+ with gr.Row():
150
+ img_input = gr.Image(type="pil", label="Upload Image")
151
+ img_output = gr.Image(type="pil", label="Detected Objects")
152
+
153
+ json_output = gr.JSON(label="Bounding Box Data")
154
+
155
+ btn = gr.Button("Detect Objects", variant="primary")
156
+
157
+ # Event Listener
158
+ btn.click(detect_objects, inputs=img_input, outputs=[img_output, json_output])
159
+
160
+ if __name__ == "__main__":
161
+ demo.launch()
cookies.jpg ADDED
foreign_menu.jpg ADDED

Git LFS Details

  • SHA256: 4ee1c12308dc143c7794d740b5186ca8005b7e9a1ec6f20b045cf14cdbde4484
  • Pointer size: 131 Bytes
  • Size of remote file: 104 kB
messed_room.jpg ADDED
test.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ import os
4
+ from io import BytesIO
5
+ from PIL import Image, ImageDraw, ImageFont
6
+ from PIL import ImageColor
7
+ import json
8
+ import google.generativeai as genai
9
+ from google.generativeai import types
10
+ from dotenv import load_dotenv
11
+ from IPython.display import display
12
+
13
+ # 1. SETUP API KEY
14
+ # ----------------
15
+ load_dotenv()
16
+ api_key = os.getenv("Gemini_API_Key")
17
+ # Configure the Google AI library
18
+ genai.configure(api_key=api_key)
19
+
20
+
21
+ # 2. DEFINE MODEL AND INSTRUCTIONS
22
+
23
+ bounding_box_system_instructions = """
24
+ Return bounding boxes as a JSON array with labels. Never return masks or code fencing. Limit to 25 objects.
25
+ If an object is present multiple times, name them according to their unique characteristic (colors, size, position, unique characteristics, etc..).
26
+ """
27
+ model = genai.GenerativeModel( model_name='gemini-2.5-flash', system_instruction=bounding_box_system_instructions)
28
+ generation_config = genai.types.GenerationConfig(
29
+ temperature=0.5,
30
+
31
+ )
32
+
33
+
34
+
35
+ # 3. PREPARE IMAGE AND PROMPT
36
+
37
+
38
+
39
+
40
+ def parse_json(json_output):
41
+ lines = json_output.splitlines()
42
+ for i, line in enumerate(lines):
43
+ if line == "```json":
44
+ json_output = "\n".join(lines[i+1:]) # Remove everything before "```json"
45
+ json_output = json_output.split("```")[0] # Remove everything after the closing "```"
46
+ break
47
+ return json_output
48
+ print("After parsing JSON from model response...")
49
+
50
+
51
+ def plot_bounding_boxes(im, bounding_boxes):
52
+ """
53
+ Plots bounding boxes on an image with labels.
54
+ """
55
+ additional_colors = [colorname for (colorname, colorcode) in ImageColor.colormap.items()]
56
+
57
+ im = im.copy()
58
+ width, height = im.size
59
+ draw = ImageDraw.Draw(im)
60
+ colors = [
61
+ 'red', 'green', 'blue', 'yellow', 'orange', 'pink', 'purple', 'cyan',
62
+ 'lime', 'magenta', 'violet', 'gold', 'silver'
63
+ ] + additional_colors
64
+
65
+ try:
66
+ # Use a default font if NotoSansCJK is not available
67
+ try:
68
+ font = ImageFont.load_default()
69
+ except OSError:
70
+ print("NotoSansCJK-Regular.ttc not found. Using default font.")
71
+ font = ImageFont.load_default()
72
+
73
+ bounding_boxes_json = json.loads(bounding_boxes)
74
+ for i, bounding_box in enumerate(bounding_boxes_json):
75
+ color = colors[i % len(colors)]
76
+ abs_y1 = int(bounding_box["box_2d"][0] / 1000 * height)
77
+ abs_x1 = int(bounding_box["box_2d"][1] / 1000 * width)
78
+ abs_y2 = int(bounding_box["box_2d"][2] / 1000 * height)
79
+ abs_x2 = int(bounding_box["box_2d"][3] / 1000 * width)
80
+
81
+ if abs_x1 > abs_x2:
82
+ abs_x1, abs_x2 = abs_x2, abs_x1
83
+
84
+ if abs_y1 > abs_y2:
85
+ abs_y1, abs_y2 = abs_y2, abs_y1
86
+
87
+ # Draw bounding box and label
88
+ draw.rectangle(((abs_x1, abs_y1), (abs_x2, abs_y2)), outline=color, width=4)
89
+ if "label" in bounding_box:
90
+ draw.text((abs_x1 + 8, abs_y1 + 6), bounding_box["label"], fill=color, font=font)
91
+ except Exception as e:
92
+ print(f"Error drawing bounding boxes: {e}")
93
+
94
+ return im
95
+
96
+ prompt = "Identify and label the objects in the image. Return only the JSON array of bounding boxes and labels as per the system instructions."
97
+ image = "Images/cookies.jpg"
98
+ img = Image.open(BytesIO(open(image, "rb").read()))
99
+ print(f"Original image size: {img.size}")
100
+
101
+ # resize the image to a max width of 1024 while maintaining aspect ratio
102
+ im = Image.open(image).resize((1024, int(1024 * img.size[1] / img.size[0])), Image.Resampling.LANCZOS)
103
+ print(f"Resized image size: {im.size}")
104
+ im.show()
105
+
106
+ # Run model to find bounding boxes
107
+ response = model.generate_content([prompt, im], generation_config=generation_config)
108
+ print("Raw model response:")
109
+ print(response.text )
110
+
111
+ bounding_boxes=parse_json(response.text)
112
+
113
+
114
+ im_with_boxes = plot_bounding_boxes(im, bounding_boxes)
115
+ display(im_with_boxes)
116
+ im_with_boxes.save("output_imags/cookies_bounding_boxes.jpg")
117
+ print("Bounding boxes plotted on image.")
yoga.jpg ADDED
zoom_face.png ADDED

Git LFS Details

  • SHA256: 50881ef7ec7f02d5e3ff2c3bd2a05acf51d57e3347277a22240cd07bcf62ccd0
  • Pointer size: 132 Bytes
  • Size of remote file: 1.36 MB