Spaces:

Manar11
/

object-detecation

Sleeping

App Files Files Community

Manar11 commited on Feb 10

Commit

1000528

verified ·

1 Parent(s): bcd18cb

Update app.py

Browse files

Files changed (1) hide show

app.py +160 -160

app.py CHANGED Viewed

@@ -1,161 +1,161 @@
-import gradio as gr
-import os
-from io import BytesIO
-from PIL import Image, ImageDraw, ImageFont
-from PIL import ImageColor
-import json
-import google.generativeai as genai
-from google.generativeai import types
-from dotenv import load_dotenv
-from IPython.display import display
-# 1. SETUP API KEY
-# ----------------
-load_dotenv()
-api_key = os.getenv("Gemini_API_Key")
-# Configure the Google AI library
-genai.configure(api_key=api_key)
-# 2. DEFINE MODEL AND INSTRUCTIONS
-bounding_box_system_instructions = """
-    Return bounding boxes as a JSON array with labels. Never return masks or code fencing. Limit to 25 objects.
-    If an object is present multiple times, name them according to their unique characteristic (colors, size, position, unique characteristics, etc..).
-      """
-model = genai.GenerativeModel( model_name='gemini-2.5-flash', system_instruction=bounding_box_system_instructions)
-generation_config = genai.types.GenerationConfig(
-        temperature=0.5,
-    )
-# 3. PREPARE IMAGE AND PROMPT
-prompt = "Identify and label the objects in the image. Return only the JSON array of bounding boxes and labels as per the system instructions."
-#image = "Images/cookies.jpg"
-#img = Image.open(BytesIO(open(image, "rb").read()))
-# print(f"Original image size: {img.size}")
-# resize the image to a max width of 1024 while maintaining aspect ratio
-#im = Image.open(image).resize((1024, int(1024 * img.size[1] / img.size[0])), Image.Resampling.LANCZOS)
-#print(f"Resized image size: {im.size}")
-#im.show()
-# Run model to find bounding boxes
-#response =  model.generate_content([prompt, im], generation_config=generation_config)
-#print(response.text)
-# def generate_bounding_boxes(prompt, image):
-    # response = model.generate_content([prompt, image], generation_config=generation_config)
-    # return response.text
-def parse_json(json_output):
-    lines = json_output.splitlines()
-    for i, line in enumerate(lines):
-        if line == "```json":
-            json_output = "\n".join(lines[i+1:])  # Remove everything before "```json"
-            json_output = json_output.split("```")[0]  # Remove everything after the closing "```"
-            break
-    return json_output
-#bounding_boxes=parse_json(response.text)
-#def plot_bounding_boxes(im, bounding_boxes):
-    """
-    Plots bounding boxes on an image with labels.
-    """
-    image = im.copy()
-    draw = ImageDraw.Draw(image)
-    font = ImageFont.load_default()
-    bounding_boxes_json = json.loads(bounding_boxes)
-    for i, bounding_box in enumerate(bounding_boxes_json):
-        print(f"Processing bounding box {i}: {bounding_box}")
-        label = bounding_box["label"]
-        x1, y1, x2, y2 = bounding_box["box_2d"]
-        # Draw rectangle
-        draw.rectangle(
-            [(x1, y1), (x2, y2)],
-            outline="red",
-            width=10
-        )
-        # Draw label
-        draw.text((x1 + 5, y1 + 5), label, fill="red", font=font)
-    return im
-def plot_bounding_boxes(im, bounding_boxes):
-    """
-    Plots bounding boxes on an image with labels.
-    """
-    additional_colors = [colorname for (colorname, colorcode) in ImageColor.colormap.items()]
-    im = im.copy()
-    width, height = im.size
-    draw = ImageDraw.Draw(im)
-    colors = [
-        'red', 'green', 'blue', 'yellow', 'orange', 'pink', 'purple', 'cyan',
-        'lime', 'magenta', 'violet', 'gold', 'silver'
-    ] + additional_colors
-    try:
-        # Use a default font if NotoSansCJK is not available
-        try:
-            font = ImageFont.load_default()
-        except OSError:
-            print("NotoSansCJK-Regular.ttc not found. Using default font.")
-            font = ImageFont.load_default()
-        bounding_boxes_json = json.loads(bounding_boxes)
-        for i, bounding_box in enumerate(bounding_boxes_json):
-            color = colors[i % len(colors)]
-            abs_y1 = int(bounding_box["box_2d"][0] / 1000 * height)
-            abs_x1 = int(bounding_box["box_2d"][1] / 1000 * width)
-            abs_y2 = int(bounding_box["box_2d"][2] / 1000 * height)
-            abs_x2 = int(bounding_box["box_2d"][3] / 1000 * width)
-            if abs_x1 > abs_x2:
-                abs_x1, abs_x2 = abs_x2, abs_x1
-            if abs_y1 > abs_y2:
-                abs_y1, abs_y2 = abs_y2, abs_y1
-            # Draw bounding box and label
-            draw.rectangle(((abs_x1, abs_y1), (abs_x2, abs_y2)), outline=color, width=4)
-            if "label" in bounding_box:
-                draw.text((abs_x1 + 8, abs_y1 + 6), bounding_box["label"], fill=color, font=font)
-    except Exception as e:
-        print(f"Error drawing bounding boxes: {e}")
-    return im
-#im_with_boxes = plot_bounding_boxes(im, bounding_boxes)
-#display(im_with_boxes)
-#im_with_boxes.save("output_imags/cookies_bounding_boxes.jpg")
-#im_with_boxes.show()
-#print("Bounding boxes plotted on image.")
-def detect_objects(image , prompt):
-    # Resize image
-    image = image.resize((1024, int(1024 * image.size[1] / image.size[0])))
-    # Generate bounding boxes
-    response = model.generate_content([prompt, image], generation_config=generation_config)
-    bounding_boxes = parse_json(response.text)
-    # Draw boxes
-    output_image = plot_bounding_boxes(image, bounding_boxes)
-    return output_image, bounding_boxes
-# ================== Gradio Interface ==================
-interface = gr.Interface(
-    fn=detect_objects,
-    inputs=[gr.Image(type="pil"), gr.Textbox( label="Prompt", value="Identify and label the objects in the image. Return only the JSON array of bounding boxes.")],
-    outputs=[gr.Image(label="Detected Objects"), gr.Textbox(label="Bounding Boxes JSON")],
-    title="Object Detection with Gemini"
-)
 interface.launch()

+import gradio as gr
+import os
+from io import BytesIO
+from PIL import Image, ImageDraw, ImageFont
+from PIL import ImageColor
+import json
+import google.generativeai as genai
+from google.generativeai import types
+from dotenv import load_dotenv
+# 1. SETUP API KEY
+# ----------------
+load_dotenv()
+api_key = os.getenv("Gemini_API_Key")
+# Configure the Google AI library
+genai.configure(api_key=api_key)
+# 2. DEFINE MODEL AND INSTRUCTIONS
+bounding_box_system_instructions = """
+    Return bounding boxes as a JSON array with labels. Never return masks or code fencing. Limit to 25 objects.
+    If an object is present multiple times, name them according to their unique characteristic (colors, size, position, unique characteristics, etc..).
+      """
+model = genai.GenerativeModel( model_name='gemini-2.5-flash', system_instruction=bounding_box_system_instructions)
+generation_config = genai.types.GenerationConfig(
+        temperature=0.5,
+    )
+# 3. PREPARE IMAGE AND PROMPT
+prompt = "Identify and label the objects in the image. Return only the JSON array of bounding boxes and labels as per the system instructions."
+#image = "Images/cookies.jpg"
+#img = Image.open(BytesIO(open(image, "rb").read()))
+# print(f"Original image size: {img.size}")
+# resize the image to a max width of 1024 while maintaining aspect ratio
+#im = Image.open(image).resize((1024, int(1024 * img.size[1] / img.size[0])), Image.Resampling.LANCZOS)
+#print(f"Resized image size: {im.size}")
+#im.show()
+# Run model to find bounding boxes
+#response =  model.generate_content([prompt, im], generation_config=generation_config)
+#print(response.text)
+# def generate_bounding_boxes(prompt, image):
+    # response = model.generate_content([prompt, image], generation_config=generation_config)
+    # return response.text
+def parse_json(json_output):
+    lines = json_output.splitlines()
+    for i, line in enumerate(lines):
+        if line == "```json":
+            json_output = "\n".join(lines[i+1:])  # Remove everything before "```json"
+            json_output = json_output.split("```")[0]  # Remove everything after the closing "```"
+            break
+    return json_output
+#bounding_boxes=parse_json(response.text)
+#def plot_bounding_boxes(im, bounding_boxes):
+    """
+    Plots bounding boxes on an image with labels.
+    """
+    image = im.copy()
+    draw = ImageDraw.Draw(image)
+    font = ImageFont.load_default()
+    bounding_boxes_json = json.loads(bounding_boxes)
+    for i, bounding_box in enumerate(bounding_boxes_json):
+        print(f"Processing bounding box {i}: {bounding_box}")
+        label = bounding_box["label"]
+        x1, y1, x2, y2 = bounding_box["box_2d"]
+        # Draw rectangle
+        draw.rectangle(
+            [(x1, y1), (x2, y2)],
+            outline="red",
+            width=10
+        )
+        # Draw label
+        draw.text((x1 + 5, y1 + 5), label, fill="red", font=font)
+    return im
+def plot_bounding_boxes(im, bounding_boxes):
+    """
+    Plots bounding boxes on an image with labels.
+    """
+    additional_colors = [colorname for (colorname, colorcode) in ImageColor.colormap.items()]
+    im = im.copy()
+    width, height = im.size
+    draw = ImageDraw.Draw(im)
+    colors = [
+        'red', 'green', 'blue', 'yellow', 'orange', 'pink', 'purple', 'cyan',
+        'lime', 'magenta', 'violet', 'gold', 'silver'
+    ] + additional_colors
+    try:
+        # Use a default font if NotoSansCJK is not available
+        try:
+            font = ImageFont.load_default()
+        except OSError:
+            print("NotoSansCJK-Regular.ttc not found. Using default font.")
+            font = ImageFont.load_default()
+        bounding_boxes_json = json.loads(bounding_boxes)
+        for i, bounding_box in enumerate(bounding_boxes_json):
+            color = colors[i % len(colors)]
+            abs_y1 = int(bounding_box["box_2d"][0] / 1000 * height)
+            abs_x1 = int(bounding_box["box_2d"][1] / 1000 * width)
+            abs_y2 = int(bounding_box["box_2d"][2] / 1000 * height)
+            abs_x2 = int(bounding_box["box_2d"][3] / 1000 * width)
+            if abs_x1 > abs_x2:
+                abs_x1, abs_x2 = abs_x2, abs_x1
+            if abs_y1 > abs_y2:
+                abs_y1, abs_y2 = abs_y2, abs_y1
+            # Draw bounding box and label
+            draw.rectangle(((abs_x1, abs_y1), (abs_x2, abs_y2)), outline=color, width=4)
+            if "label" in bounding_box:
+                draw.text((abs_x1 + 8, abs_y1 + 6), bounding_box["label"], fill=color, font=font)
+    except Exception as e:
+        print(f"Error drawing bounding boxes: {e}")
+    return im
+#im_with_boxes = plot_bounding_boxes(im, bounding_boxes)
+#display(im_with_boxes)
+#im_with_boxes.save("output_imags/cookies_bounding_boxes.jpg")
+#im_with_boxes.show()
+#print("Bounding boxes plotted on image.")
+def detect_objects(image , prompt):
+    # Resize image
+    image = image.resize((1024, int(1024 * image.size[1] / image.size[0])))
+    # Generate bounding boxes
+    response = model.generate_content([prompt, image], generation_config=generation_config)
+    bounding_boxes = parse_json(response.text)
+    # Draw boxes
+    output_image = plot_bounding_boxes(image, bounding_boxes)
+    return output_image, bounding_boxes
+# ================== Gradio Interface ==================
+interface = gr.Interface(
+    fn=detect_objects,
+    inputs=[gr.Image(type="pil"), gr.Textbox( label="Prompt", value="Identify and label the objects in the image. Return only the JSON array of bounding boxes.")],
+    outputs=[gr.Image(label="Detected Objects"), gr.Textbox(label="Bounding Boxes JSON")],
+    title="Object Detection with Gemini"
+)
 interface.launch()