Spaces:

roqaia123
/

Object_Detection

Sleeping

App Files Files Community

roqaia123 commited on Feb 10

Commit

c898b20

verified ·

1 Parent(s): c8b0495

Upload 8 files

Browse files

Files changed (9) hide show

.gitattributes +2 -0
api key.env +1 -0
code.py +161 -0
cookies.jpg +0 -0
foreign_menu.jpg +3 -0
messed_room.jpg +0 -0
test.py +117 -0
yoga.jpg +0 -0
zoom_face.png +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+foreign_menu.jpg filter=lfs diff=lfs merge=lfs -text
+zoom_face.png filter=lfs diff=lfs merge=lfs -text

api key.env ADDED Viewed

	@@ -0,0 +1 @@


1	+ GOOGLE_API_KEY=AIzaSyABaidsygD73gbaSlMHlkrhhiDT8NDzrjE

code.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import gradio as gr
+import os
+import json
+from PIL import Image, ImageDraw, ImageFont, ImageColor
+import google.generativeai as genai
+from dotenv import load_dotenv
+# --- 1. SETUP ---
+load_dotenv("api key.env")
+api_key = os.getenv("GOOGLE_API_KEY")
+if not api_key:
+    print("⚠️ Warning: 'GOOGLE_API_KEY' not found in environment variables.")
+genai.configure(api_key=api_key)
+# --- 2. CONFIGURATION ---
+# We set response_mime_type to 'application/json' to force valid JSON output
+generation_config = {
+    "temperature": 0.5,
+    "response_mime_type": "application/json"
+}
+bounding_box_system_instructions = """
+    Return bounding boxes as a JSON array with labels.
+    Format: [{"box_2d": [ymin, xmin, ymax, xmax], "label": "string"}]
+    Coordinate scale: 0-1000.
+    Limit to 25 objects.
+    Do not use markdown code blocks or fencing. Just return the raw JSON array.
+"""
+model = genai.GenerativeModel(
+    model_name='gemini-2.5-flash', # verified as valid for your context
+    system_instruction=bounding_box_system_instructions
+)
+# --- 3. HELPER FUNCTIONS ---
+def parse_json(json_output):
+    """
+    Robust parsing: Finds the first '[' and last ']' to extract valid JSON.
+    This handles cases where the model might still add text preamble.
+    """
+    try:
+        # If the model follows the MIME type strictly, this might be clean already.
+        # But we double-check for safety.
+        start = json_output.find('[')
+        end = json_output.rfind(']')
+        if start != -1 and end != -1:
+            json_str = json_output[start : end+1]
+            return json.loads(json_str)
+        # Fallback: try loading directly if no brackets found (e.g. empty response)
+        return json.loads(json_output)
+    except Exception as e:
+        print(f"JSON Parsing Error: {e}")
+        return []
+def plot_bounding_boxes(im, boxes):
+    """
+    Plots bounding boxes on the image.
+    """
+    im = im.copy()
+    width, height = im.size
+    draw = ImageDraw.Draw(im)
+    # Colors
+    additional_colors = [colorname for (colorname, colorcode) in ImageColor.colormap.items()]
+    colors = ['red', 'green', 'blue', 'yellow', 'orange', 'cyan', 'magenta'] + additional_colors
+    try:
+        # Load Font
+        try:
+            # Try loading a system font, fallback to default
+            font = ImageFont.truetype("arial.ttf", 20)
+        except OSError:
+            font = ImageFont.load_default()
+        for i, box in enumerate(boxes):
+            color = colors[i % len(colors)]
+            # Normalize coordinates (0-1000 scale) to pixels
+            ymin, xmin, ymax, xmax = box.get("box_2d", [0,0,0,0])
+            label = box.get("label", "Object")
+            # Swap if coordinates are inverted
+            if xmin > xmax: xmin, xmax = xmax, xmin
+            if ymin > ymax: ymin, ymax = ymax, ymin
+            left = int((xmin / 1000) * width)
+            top = int((ymin / 1000) * height)
+            right = int((xmax / 1000) * width)
+            bottom = int((ymax / 1000) * height)
+            # Draw Box
+            draw.rectangle([left, top, right, bottom], outline=color, width=4)
+            # Draw Label
+            text_bbox = draw.textbbox((left, top), label, font=font)
+            # Add a small background for the text so it's readable
+            draw.rectangle(text_bbox, fill=color)
+            draw.text((left, top), label, fill="white", font=font)
+    except Exception as e:
+        print(f"Plotting Error: {e}")
+        return im
+    return im
+# --- 4. MAIN GRADIO FUNCTION ---
+def detect_objects(image):
+    if image is None:
+        return None, []
+    # Resize for consistency (optional, but good for speed)
+    target_width = 1024
+    w, h = image.size
+    if w > target_width:
+        image = image.resize((target_width, int(target_width * h / w)), Image.Resampling.LANCZOS)
+    prompt = "Identify and label the objects in the image."
+    try:
+        # Generate Content
+        response = model.generate_content(
+            [prompt, image],
+            generation_config=generation_config
+        )
+        print("Raw Response:", response.text) # For debugging
+        # Parse
+        boxes_data = parse_json(response.text)
+        # Plot
+        result_image = plot_bounding_boxes(image, boxes_data)
+        return result_image, boxes_data
+    except Exception as e:
+        print(f"API Error: {e}")
+        return image, [{"error": str(e)}]
+# --- 5. UI SETUP ---
+with gr.Blocks() as demo:
+    gr.Markdown("# 👁️ Object Detection with Gemini 2.5")
+    with gr.Row():
+        img_input = gr.Image(type="pil", label="Upload Image")
+        img_output = gr.Image(type="pil", label="Detected Objects")
+    json_output = gr.JSON(label="Bounding Box Data")
+    btn = gr.Button("Detect Objects", variant="primary")
+    # Event Listener
+    btn.click(detect_objects, inputs=img_input, outputs=[img_output, json_output])
+if __name__ == "__main__":
+    demo.launch()

cookies.jpg ADDED Viewed

foreign_menu.jpg ADDED Viewed

Git LFS Details

SHA256: 4ee1c12308dc143c7794d740b5186ca8005b7e9a1ec6f20b045cf14cdbde4484
Pointer size: 131 Bytes
Size of remote file: 104 kB

messed_room.jpg ADDED Viewed

test.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import gradio as gr
+import os
+from io import BytesIO
+from PIL import Image, ImageDraw, ImageFont
+from PIL import ImageColor
+import json
+import google.generativeai as genai
+from google.generativeai import types
+from dotenv import load_dotenv
+from IPython.display import display
+# 1. SETUP API KEY
+# ----------------
+load_dotenv()
+api_key = os.getenv("Gemini_API_Key")
+# Configure the Google AI library
+genai.configure(api_key=api_key)
+# 2. DEFINE MODEL AND INSTRUCTIONS
+bounding_box_system_instructions = """
+    Return bounding boxes as a JSON array with labels. Never return masks or code fencing. Limit to 25 objects.
+    If an object is present multiple times, name them according to their unique characteristic (colors, size, position, unique characteristics, etc..).
+      """
+model = genai.GenerativeModel( model_name='gemini-2.5-flash', system_instruction=bounding_box_system_instructions)
+generation_config = genai.types.GenerationConfig(
+        temperature=0.5,
+    )
+# 3. PREPARE IMAGE AND PROMPT
+def parse_json(json_output):
+    lines = json_output.splitlines()
+    for i, line in enumerate(lines):
+        if line == "```json":
+            json_output = "\n".join(lines[i+1:])  # Remove everything before "```json"
+            json_output = json_output.split("```")[0]  # Remove everything after the closing "```"
+            break
+    return json_output
+print("After parsing JSON from model response...")
+def plot_bounding_boxes(im, bounding_boxes):
+    """
+    Plots bounding boxes on an image with labels.
+    """
+    additional_colors = [colorname for (colorname, colorcode) in ImageColor.colormap.items()]
+    im = im.copy()
+    width, height = im.size
+    draw = ImageDraw.Draw(im)
+    colors = [
+        'red', 'green', 'blue', 'yellow', 'orange', 'pink', 'purple', 'cyan',
+        'lime', 'magenta', 'violet', 'gold', 'silver'
+    ] + additional_colors
+    try:
+        # Use a default font if NotoSansCJK is not available
+        try:
+            font = ImageFont.load_default()
+        except OSError:
+            print("NotoSansCJK-Regular.ttc not found. Using default font.")
+            font = ImageFont.load_default()
+        bounding_boxes_json = json.loads(bounding_boxes)
+        for i, bounding_box in enumerate(bounding_boxes_json):
+            color = colors[i % len(colors)]
+            abs_y1 = int(bounding_box["box_2d"][0] / 1000 * height)
+            abs_x1 = int(bounding_box["box_2d"][1] / 1000 * width)
+            abs_y2 = int(bounding_box["box_2d"][2] / 1000 * height)
+            abs_x2 = int(bounding_box["box_2d"][3] / 1000 * width)
+            if abs_x1 > abs_x2:
+                abs_x1, abs_x2 = abs_x2, abs_x1
+            if abs_y1 > abs_y2:
+                abs_y1, abs_y2 = abs_y2, abs_y1
+            # Draw bounding box and label
+            draw.rectangle(((abs_x1, abs_y1), (abs_x2, abs_y2)), outline=color, width=4)
+            if "label" in bounding_box:
+                draw.text((abs_x1 + 8, abs_y1 + 6), bounding_box["label"], fill=color, font=font)
+    except Exception as e:
+        print(f"Error drawing bounding boxes: {e}")
+    return im
+prompt = "Identify and label the objects in the image. Return only the JSON array of bounding boxes and labels as per the system instructions."
+image = "Images/cookies.jpg"
+img = Image.open(BytesIO(open(image, "rb").read()))
+print(f"Original image size: {img.size}")
+# resize the image to a max width of 1024 while maintaining aspect ratio
+im = Image.open(image).resize((1024, int(1024 * img.size[1] / img.size[0])), Image.Resampling.LANCZOS)
+print(f"Resized image size: {im.size}")
+im.show()
+# Run model to find bounding boxes
+response =  model.generate_content([prompt, im], generation_config=generation_config)
+print("Raw model response:")
+print(response.text )
+bounding_boxes=parse_json(response.text)
+im_with_boxes = plot_bounding_boxes(im, bounding_boxes)
+display(im_with_boxes)
+im_with_boxes.save("output_imags/cookies_bounding_boxes.jpg")
+print("Bounding boxes plotted on image.")

yoga.jpg ADDED Viewed

zoom_face.png ADDED Viewed

Git LFS Details

SHA256: 50881ef7ec7f02d5e3ff2c3bd2a05acf51d57e3347277a22240cd07bcf62ccd0
Pointer size: 132 Bytes
Size of remote file: 1.36 MB