Spaces:

ha7naa
/

gemini-bounding-boxes

Runtime error

App Files Files Community

ha7naa commited on Feb 10

Commit

c46b909

verified ·

1 Parent(s): 9be97dd

Upload 2 files

Browse files

Files changed (2) hide show

app.py +125 -0
requirements.txt +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import gradio as gr
+import os
+from io import BytesIO
+from PIL import Image, ImageDraw, ImageFont
+from PIL import ImageColor
+import json
+import google.generativeai as genai
+from google.generativeai import types
+from dotenv import load_dotenv
+from IPython.display import display
+# 1. SETUP API KEY
+# ----------------
+load_dotenv()
+api_key = os.getenv("Gemini_API_Key")
+# Configure the Google AI library
+genai.configure(api_key=api_key)
+# 2. DEFINE MODEL AND INSTRUCTIONS
+bounding_box_system_instructions = """
+    Return bounding boxes as a JSON array with labels. Never return masks or code fencing. Limit to 25 objects.
+    If an object is present multiple times, name them according to their unique characteristic (colors, size, position, unique characteristics, etc..).
+      """
+model = genai.GenerativeModel( model_name='gemini-2.5-flash', system_instruction=bounding_box_system_instructions)
+generation_config = genai.types.GenerationConfig(
+        temperature=0.5,
+    )
+# 3. PREPARE IMAGE AND PROMPT
+def parse_json(json_output):
+    lines = json_output.splitlines()
+    for i, line in enumerate(lines):
+        if line == "```json":
+            json_output = "\n".join(lines[i+1:])  # Remove everything before "```json"
+            json_output = json_output.split("```")[0]  # Remove everything after the closing "```"
+            break
+    return json_output
+print("After parsing JSON from model response...")
+def plot_bounding_boxes(im, bounding_boxes):
+    """
+    Plots bounding boxes on an image with labels.
+    """
+    additional_colors = [colorname for (colorname, colorcode) in ImageColor.colormap.items()]
+    im = im.copy()
+    width, height = im.size
+    draw = ImageDraw.Draw(im)
+    colors = [
+        'red', 'green', 'blue', 'yellow', 'orange', 'pink', 'purple', 'cyan',
+        'lime', 'magenta', 'violet', 'gold', 'silver'
+    ] + additional_colors
+    try:
+        # Use a default font if NotoSansCJK is not available
+        try:
+            font = ImageFont.load_default()
+        except OSError:
+            print("NotoSansCJK-Regular.ttc not found. Using default font.")
+            font = ImageFont.load_default()
+        bounding_boxes_json = json.loads(bounding_boxes)
+        for i, bounding_box in enumerate(bounding_boxes_json):
+            color = colors[i % len(colors)]
+            abs_y1 = int(bounding_box["box_2d"][0] / 1000 * height)
+            abs_x1 = int(bounding_box["box_2d"][1] / 1000 * width)
+            abs_y2 = int(bounding_box["box_2d"][2] / 1000 * height)
+            abs_x2 = int(bounding_box["box_2d"][3] / 1000 * width)
+            if abs_x1 > abs_x2:
+                abs_x1, abs_x2 = abs_x2, abs_x1
+            if abs_y1 > abs_y2:
+                abs_y1, abs_y2 = abs_y2, abs_y1
+            # Draw bounding box and label
+            draw.rectangle(((abs_x1, abs_y1), (abs_x2, abs_y2)), outline=color, width=4)
+            if "label" in bounding_box:
+                draw.text((abs_x1 + 8, abs_y1 + 6), bounding_box["label"], fill=color, font=font)
+    except Exception as e:
+        print(f"Error drawing bounding boxes: {e}")
+    return im
+def detect_and_draw_gradio(user_prompt: str, image: Image.Image, max_width: int = 1024):
+    if image is None:
+        return None, "Please upload an image."
+    if not user_prompt or not user_prompt.strip():
+        user_prompt = PROMPT
+    image = image.convert("RGB")
+    W, H = image.size
+    # resize
+    if W > max_width:
+        newW = max_width
+        newH = int(newW * H / W)
+        im_resized = image.resize((newW, newH), Image.Resampling.LANCZOS)
+    else:
+        im_resized = image
+    # send prompt + image
+    response = model.generate_content([user_prompt, im_resized], generation_config=generation_config)
+    raw_text = getattr(response, "text", "") or ""
+    bounding_boxes = parse_json(raw_text)
+    try:
+        json.loads(bounding_boxes)
+    except Exception:
+        return im_resized, raw_text  # debugging
+    out_img = plot_bounding_boxes(im_resized, bounding_boxes)
+    return out_img, bounding_boxes

requirements.txt ADDED Viewed

File without changes