Manar11 commited on
Commit
1000528
·
verified ·
1 Parent(s): bcd18cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +160 -160
app.py CHANGED
@@ -1,161 +1,161 @@
1
-
2
- import gradio as gr
3
- import os
4
- from io import BytesIO
5
- from PIL import Image, ImageDraw, ImageFont
6
- from PIL import ImageColor
7
- import json
8
- import google.generativeai as genai
9
- from google.generativeai import types
10
- from dotenv import load_dotenv
11
- from IPython.display import display
12
-
13
- # 1. SETUP API KEY
14
- # ----------------
15
- load_dotenv()
16
- api_key = os.getenv("Gemini_API_Key")
17
- # Configure the Google AI library
18
- genai.configure(api_key=api_key)
19
-
20
-
21
- # 2. DEFINE MODEL AND INSTRUCTIONS
22
-
23
- bounding_box_system_instructions = """
24
- Return bounding boxes as a JSON array with labels. Never return masks or code fencing. Limit to 25 objects.
25
- If an object is present multiple times, name them according to their unique characteristic (colors, size, position, unique characteristics, etc..).
26
- """
27
- model = genai.GenerativeModel( model_name='gemini-2.5-flash', system_instruction=bounding_box_system_instructions)
28
- generation_config = genai.types.GenerationConfig(
29
- temperature=0.5,
30
-
31
- )
32
-
33
-
34
-
35
- # 3. PREPARE IMAGE AND PROMPT
36
- prompt = "Identify and label the objects in the image. Return only the JSON array of bounding boxes and labels as per the system instructions."
37
- #image = "Images/cookies.jpg"
38
- #img = Image.open(BytesIO(open(image, "rb").read()))
39
- # print(f"Original image size: {img.size}")
40
-
41
- # resize the image to a max width of 1024 while maintaining aspect ratio
42
- #im = Image.open(image).resize((1024, int(1024 * img.size[1] / img.size[0])), Image.Resampling.LANCZOS)
43
- #print(f"Resized image size: {im.size}")
44
- #im.show()
45
-
46
-
47
- # Run model to find bounding boxes
48
- #response = model.generate_content([prompt, im], generation_config=generation_config)
49
-
50
- #print(response.text)
51
- # def generate_bounding_boxes(prompt, image):
52
- # response = model.generate_content([prompt, image], generation_config=generation_config)
53
- # return response.text
54
-
55
-
56
- def parse_json(json_output):
57
- lines = json_output.splitlines()
58
- for i, line in enumerate(lines):
59
- if line == "```json":
60
- json_output = "\n".join(lines[i+1:]) # Remove everything before "```json"
61
- json_output = json_output.split("```")[0] # Remove everything after the closing "```"
62
- break
63
- return json_output
64
-
65
- #bounding_boxes=parse_json(response.text)
66
-
67
- #def plot_bounding_boxes(im, bounding_boxes):
68
- """
69
- Plots bounding boxes on an image with labels.
70
- """
71
- image = im.copy()
72
- draw = ImageDraw.Draw(image)
73
- font = ImageFont.load_default()
74
-
75
- bounding_boxes_json = json.loads(bounding_boxes)
76
- for i, bounding_box in enumerate(bounding_boxes_json):
77
- print(f"Processing bounding box {i}: {bounding_box}")
78
- label = bounding_box["label"]
79
- x1, y1, x2, y2 = bounding_box["box_2d"]
80
- # Draw rectangle
81
- draw.rectangle(
82
- [(x1, y1), (x2, y2)],
83
- outline="red",
84
- width=10
85
- )
86
- # Draw label
87
- draw.text((x1 + 5, y1 + 5), label, fill="red", font=font)
88
- return im
89
- def plot_bounding_boxes(im, bounding_boxes):
90
- """
91
- Plots bounding boxes on an image with labels.
92
- """
93
- additional_colors = [colorname for (colorname, colorcode) in ImageColor.colormap.items()]
94
-
95
- im = im.copy()
96
- width, height = im.size
97
- draw = ImageDraw.Draw(im)
98
- colors = [
99
- 'red', 'green', 'blue', 'yellow', 'orange', 'pink', 'purple', 'cyan',
100
- 'lime', 'magenta', 'violet', 'gold', 'silver'
101
- ] + additional_colors
102
-
103
- try:
104
- # Use a default font if NotoSansCJK is not available
105
- try:
106
- font = ImageFont.load_default()
107
- except OSError:
108
- print("NotoSansCJK-Regular.ttc not found. Using default font.")
109
- font = ImageFont.load_default()
110
-
111
- bounding_boxes_json = json.loads(bounding_boxes)
112
- for i, bounding_box in enumerate(bounding_boxes_json):
113
- color = colors[i % len(colors)]
114
- abs_y1 = int(bounding_box["box_2d"][0] / 1000 * height)
115
- abs_x1 = int(bounding_box["box_2d"][1] / 1000 * width)
116
- abs_y2 = int(bounding_box["box_2d"][2] / 1000 * height)
117
- abs_x2 = int(bounding_box["box_2d"][3] / 1000 * width)
118
-
119
- if abs_x1 > abs_x2:
120
- abs_x1, abs_x2 = abs_x2, abs_x1
121
-
122
- if abs_y1 > abs_y2:
123
- abs_y1, abs_y2 = abs_y2, abs_y1
124
-
125
- # Draw bounding box and label
126
- draw.rectangle(((abs_x1, abs_y1), (abs_x2, abs_y2)), outline=color, width=4)
127
- if "label" in bounding_box:
128
- draw.text((abs_x1 + 8, abs_y1 + 6), bounding_box["label"], fill=color, font=font)
129
- except Exception as e:
130
- print(f"Error drawing bounding boxes: {e}")
131
-
132
- return im
133
- #im_with_boxes = plot_bounding_boxes(im, bounding_boxes)
134
- #display(im_with_boxes)
135
- #im_with_boxes.save("output_imags/cookies_bounding_boxes.jpg")
136
- #im_with_boxes.show()
137
- #print("Bounding boxes plotted on image.")
138
-
139
-
140
- def detect_objects(image , prompt):
141
- # Resize image
142
- image = image.resize((1024, int(1024 * image.size[1] / image.size[0])))
143
-
144
- # Generate bounding boxes
145
- response = model.generate_content([prompt, image], generation_config=generation_config)
146
- bounding_boxes = parse_json(response.text)
147
-
148
- # Draw boxes
149
- output_image = plot_bounding_boxes(image, bounding_boxes)
150
-
151
- return output_image, bounding_boxes
152
-
153
- # ================== Gradio Interface ==================
154
- interface = gr.Interface(
155
- fn=detect_objects,
156
- inputs=[gr.Image(type="pil"), gr.Textbox( label="Prompt", value="Identify and label the objects in the image. Return only the JSON array of bounding boxes.")],
157
- outputs=[gr.Image(label="Detected Objects"), gr.Textbox(label="Bounding Boxes JSON")],
158
- title="Object Detection with Gemini"
159
- )
160
-
161
  interface.launch()
 
1
+
2
+ import gradio as gr
3
+ import os
4
+ from io import BytesIO
5
+ from PIL import Image, ImageDraw, ImageFont
6
+ from PIL import ImageColor
7
+ import json
8
+ import google.generativeai as genai
9
+ from google.generativeai import types
10
+ from dotenv import load_dotenv
11
+
12
+
13
+ # 1. SETUP API KEY
14
+ # ----------------
15
+ load_dotenv()
16
+ api_key = os.getenv("Gemini_API_Key")
17
+ # Configure the Google AI library
18
+ genai.configure(api_key=api_key)
19
+
20
+
21
+ # 2. DEFINE MODEL AND INSTRUCTIONS
22
+
23
+ bounding_box_system_instructions = """
24
+ Return bounding boxes as a JSON array with labels. Never return masks or code fencing. Limit to 25 objects.
25
+ If an object is present multiple times, name them according to their unique characteristic (colors, size, position, unique characteristics, etc..).
26
+ """
27
+ model = genai.GenerativeModel( model_name='gemini-2.5-flash', system_instruction=bounding_box_system_instructions)
28
+ generation_config = genai.types.GenerationConfig(
29
+ temperature=0.5,
30
+
31
+ )
32
+
33
+
34
+
35
+ # 3. PREPARE IMAGE AND PROMPT
36
+ prompt = "Identify and label the objects in the image. Return only the JSON array of bounding boxes and labels as per the system instructions."
37
+ #image = "Images/cookies.jpg"
38
+ #img = Image.open(BytesIO(open(image, "rb").read()))
39
+ # print(f"Original image size: {img.size}")
40
+
41
+ # resize the image to a max width of 1024 while maintaining aspect ratio
42
+ #im = Image.open(image).resize((1024, int(1024 * img.size[1] / img.size[0])), Image.Resampling.LANCZOS)
43
+ #print(f"Resized image size: {im.size}")
44
+ #im.show()
45
+
46
+
47
+ # Run model to find bounding boxes
48
+ #response = model.generate_content([prompt, im], generation_config=generation_config)
49
+
50
+ #print(response.text)
51
+ # def generate_bounding_boxes(prompt, image):
52
+ # response = model.generate_content([prompt, image], generation_config=generation_config)
53
+ # return response.text
54
+
55
+
56
+ def parse_json(json_output):
57
+ lines = json_output.splitlines()
58
+ for i, line in enumerate(lines):
59
+ if line == "```json":
60
+ json_output = "\n".join(lines[i+1:]) # Remove everything before "```json"
61
+ json_output = json_output.split("```")[0] # Remove everything after the closing "```"
62
+ break
63
+ return json_output
64
+
65
+ #bounding_boxes=parse_json(response.text)
66
+
67
+ #def plot_bounding_boxes(im, bounding_boxes):
68
+ """
69
+ Plots bounding boxes on an image with labels.
70
+ """
71
+ image = im.copy()
72
+ draw = ImageDraw.Draw(image)
73
+ font = ImageFont.load_default()
74
+
75
+ bounding_boxes_json = json.loads(bounding_boxes)
76
+ for i, bounding_box in enumerate(bounding_boxes_json):
77
+ print(f"Processing bounding box {i}: {bounding_box}")
78
+ label = bounding_box["label"]
79
+ x1, y1, x2, y2 = bounding_box["box_2d"]
80
+ # Draw rectangle
81
+ draw.rectangle(
82
+ [(x1, y1), (x2, y2)],
83
+ outline="red",
84
+ width=10
85
+ )
86
+ # Draw label
87
+ draw.text((x1 + 5, y1 + 5), label, fill="red", font=font)
88
+ return im
89
+ def plot_bounding_boxes(im, bounding_boxes):
90
+ """
91
+ Plots bounding boxes on an image with labels.
92
+ """
93
+ additional_colors = [colorname for (colorname, colorcode) in ImageColor.colormap.items()]
94
+
95
+ im = im.copy()
96
+ width, height = im.size
97
+ draw = ImageDraw.Draw(im)
98
+ colors = [
99
+ 'red', 'green', 'blue', 'yellow', 'orange', 'pink', 'purple', 'cyan',
100
+ 'lime', 'magenta', 'violet', 'gold', 'silver'
101
+ ] + additional_colors
102
+
103
+ try:
104
+ # Use a default font if NotoSansCJK is not available
105
+ try:
106
+ font = ImageFont.load_default()
107
+ except OSError:
108
+ print("NotoSansCJK-Regular.ttc not found. Using default font.")
109
+ font = ImageFont.load_default()
110
+
111
+ bounding_boxes_json = json.loads(bounding_boxes)
112
+ for i, bounding_box in enumerate(bounding_boxes_json):
113
+ color = colors[i % len(colors)]
114
+ abs_y1 = int(bounding_box["box_2d"][0] / 1000 * height)
115
+ abs_x1 = int(bounding_box["box_2d"][1] / 1000 * width)
116
+ abs_y2 = int(bounding_box["box_2d"][2] / 1000 * height)
117
+ abs_x2 = int(bounding_box["box_2d"][3] / 1000 * width)
118
+
119
+ if abs_x1 > abs_x2:
120
+ abs_x1, abs_x2 = abs_x2, abs_x1
121
+
122
+ if abs_y1 > abs_y2:
123
+ abs_y1, abs_y2 = abs_y2, abs_y1
124
+
125
+ # Draw bounding box and label
126
+ draw.rectangle(((abs_x1, abs_y1), (abs_x2, abs_y2)), outline=color, width=4)
127
+ if "label" in bounding_box:
128
+ draw.text((abs_x1 + 8, abs_y1 + 6), bounding_box["label"], fill=color, font=font)
129
+ except Exception as e:
130
+ print(f"Error drawing bounding boxes: {e}")
131
+
132
+ return im
133
+ #im_with_boxes = plot_bounding_boxes(im, bounding_boxes)
134
+ #display(im_with_boxes)
135
+ #im_with_boxes.save("output_imags/cookies_bounding_boxes.jpg")
136
+ #im_with_boxes.show()
137
+ #print("Bounding boxes plotted on image.")
138
+
139
+
140
+ def detect_objects(image , prompt):
141
+ # Resize image
142
+ image = image.resize((1024, int(1024 * image.size[1] / image.size[0])))
143
+
144
+ # Generate bounding boxes
145
+ response = model.generate_content([prompt, image], generation_config=generation_config)
146
+ bounding_boxes = parse_json(response.text)
147
+
148
+ # Draw boxes
149
+ output_image = plot_bounding_boxes(image, bounding_boxes)
150
+
151
+ return output_image, bounding_boxes
152
+
153
+ # ================== Gradio Interface ==================
154
+ interface = gr.Interface(
155
+ fn=detect_objects,
156
+ inputs=[gr.Image(type="pil"), gr.Textbox( label="Prompt", value="Identify and label the objects in the image. Return only the JSON array of bounding boxes.")],
157
+ outputs=[gr.Image(label="Detected Objects"), gr.Textbox(label="Bounding Boxes JSON")],
158
+ title="Object Detection with Gemini"
159
+ )
160
+
161
  interface.launch()