Spaces:

kalhar
/

EagleEye

Runtime error

App Files Files Community

Kalhar.Pandya commited on Apr 7, 2025

Commit

05bac69

1 Parent(s): 81ce8e4

final

Browse files

Files changed (2) hide show

.env +1 -1
app.py +187 -288

.env CHANGED Viewed

@@ -1,9 +1,9 @@
-OPENAI_API_KEY=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
 OPENAI_MODEL=gpt-4o-mini
 ROW_COUNT=7
 COL_COUNT=7
 ZOOM_LEVELS=1
 OVERLAP_FRAC=0.5
 PAD_FRAC=0

 OPENAI_MODEL=gpt-4o-mini
 ROW_COUNT=7
 COL_COUNT=7
 ZOOM_LEVELS=1
 OVERLAP_FRAC=0.5
 PAD_FRAC=0
+MAX_CANDIDATES=3

app.py CHANGED Viewed

@@ -1,129 +1,97 @@
-import asyncio, base64, json, math, os, tempfile
 from pathlib import Path
 import cv2
 import gradio as gr
 import numpy as np
 from dotenv import load_dotenv
 from openai import OpenAI
 # ─────────── ENV + DEFAULTS ───────────
 load_dotenv()
-def _env(k, d=""): return os.getenv(k, d).split("#", 1)[0].strip()
-# API key and model will be provided through the UI
 DEFAULT_MODEL = _env("OPENAI_MODEL", "gpt-4o")
-# Available models for dropdown selection - all support vision capabilities
 AVAILABLE_MODELS = [
-    "gpt-4o",                     # Current flagship model (most recommended)
-    "gpt-4o-mini",                # More economical version of gpt-4o
     "o1",                         # Advanced reasoning model with vision support
-    "o1-mini",                    # Smaller, faster version of o1
     "o3-mini",                    # Newest reasoning model (Jan 2025)
     "gpt-4-vision-preview",       # Original vision model (being deprecated)
     "gpt-4-turbo"                 # Older model with vision support
 ]
 DEFAULTS = dict(
-    row     = int(_env("ROW_COUNT",     7)),
-    col     = int(_env("COL_COUNT",     7)),
-    zoom    = int(_env("ZOOM_LEVELS",   1)),
-    overlap = float(_env("OVERLAP_FRAC", 0.5)),
-    pad     = float(_env("PAD_FRAC",    0.0)),
 )
 DEFAULT_PROMPT = (
-    "You are a highly detailed vision inspector specialized in human detection from aerial imagery. "
-    "You are provided with an image that may be divided into grid cells, each labeled with a unique number. "
-    "Your task is to examine the entire image (or each grid cell) and determine whether there is any sign of a human presence. "
-    "Partial visibility is acceptable—look for any visible human features such as limbs, faces, clothing, or distinct shadows and silhouettes that contrast with natural surroundings. "
-    "Consider unusual color patterns, shapes, or textures that might indicate a person, even if partially obscured by vegetation or terrain. "
-    "Take your time to analyze all clues carefully, and if there is any doubt, mention your top candidate grid cell(s). "
     "Respond strictly with valid JSON in the following format:\n"
-    "  {\"detected\":\"YES/NO/MAYBE\", \"confidence\":<float between 0 and 1>, \"reason\":\"<15 words max>\"}\n"
-    "For example, if a grid cell shows a clear human silhouette with contrasting clothing, your response might be:\n"
-    "  {\"detected\":\"YES\", \"confidence\":0.87, \"reason\":\"Clear human figure in grid cell 23 with distinct clothing and shadow.\"}\n"
-    "- YES: A human or clear human-like feature is observed.\n"
-    "- MAYBE: Ambiguous or partial human evidence is present.\n"
-    "- NO: No evidence of human presence is detected."
 )
 # ─────────── HELPERS ───────────
 def encode(img):
-    """Encode image to base64 string"""
-    # Set JPEG quality to higher value for better image quality
     encode_params = [int(cv2.IMWRITE_JPEG_QUALITY), 95]
     _, buf = cv2.imencode(".jpg", img, encode_params)
     return base64.b64encode(buf).decode()
-async def ask_api(img, api_key, model="gpt-4o", custom_prompt=None):
-    """Ask OpenAI API about an image"""
-    client = OpenAI(api_key=api_key)
-    prompt = custom_prompt or DEFAULT_PROMPT
-    msg=[{"role":"user","content":[
-        {"type":"text","text":prompt},
-        {"type":"image_url","image_url":{"url":f"data:image/jpeg;base64,{encode(img)}"}}
-    ]}]
-    delay=1
-    for attempt in range(5):  # Limit retries to 5
-        try:
-            r = await asyncio.to_thread(
-                client.chat.completions.create,
-                model=model, messages=msg, max_tokens=60,
-                response_format={"type":"json_object"}
-            )
-            return json.loads(r.choices[0].message.content)
-        except Exception as e:
-            if "rate limit" in str(e).lower():
-                await asyncio.sleep(delay)
-                delay=min(delay*2,32)
-            else:
-                return {"detected":"NO","confidence":0,"reason":f"Error: {str(e)[:50]}..."}
-    # If we get here, we've exhausted all retries
-    return {"detected":"NO","confidence":0,"reason":"Too many API retries, please try again later"}
-def crop(img,r):
-    """Crop image to region r=(x,y,w,h) in relative coordinates"""
-    H,W=img.shape[:2]; x,y,w,h=r
-    return img[int(y*H):int((y+h)*H), int(x*W):int((x+w)*W)]
-def split(r, rows, cols, ov, pad):
-    """Split region r into a grid of subregions with overlap and padding"""
-    x0,y0,w,h=r; tw,th=w/cols,h/rows
-    sx,sy=tw*(1-ov),th*(1-ov)
-    nx=max(1,int((w-tw)//sx)+1); ny=max(1,int((h-th)//sy)+1)
-    tiles=[]
     for ry in range(ny):
         for cx in range(nx):
-            sx0=min(x0+cx*sx, x0+w-tw)
-            sy0=min(y0+ry*sy, y0+h-th)
-            px,py=tw*pad,th*pad
-            tiles.append((sx0+px,sy0+py,tw-2*px,th-2*py))
     return tiles
-def rank(det): return {"YES":0,"MAYBE":1}.get(det,2)
-# ─────────── RECURSIVE SEARCH ───────────
-# More distinct colors with better contrast
-STAGE_COLOURS = [(0, 165, 255),   # Orange
-                 (0, 255, 0),     # Green
-                 (255, 0, 0),     # Blue (in RGB)
-                 (255, 255, 0),   # Cyan (in RGB)
-                 (128, 0, 128)]   # Purple
 def draw_path(img, path, results=None):
-    """Draw search path on image with optional detection results"""
     out = img.copy()
     for i, r in enumerate(path):
-        x,y,w,h = r; H,W = img.shape[:2]
-        x1,y1,x2,y2 = int(x*W), int(y*H), int((x+w)*W), int((y+h)*H)
         color = STAGE_COLOURS[i % len(STAGE_COLOURS)]
-        cv2.rectangle(out, (x1,y1), (x2,y2), color, 2)
-        # Add stage label
         label = f"S{i+1}"
         if results and i < len(results):
             res = results[i]
@@ -131,265 +99,196 @@ def draw_path(img, path, results=None):
                 det = res["detected"]
                 conf = res.get("confidence", 0)
                 label += f": {det} ({conf:.2f})"
-        # Text with background for better visibility
         font = cv2.FONT_HERSHEY_SIMPLEX
         font_scale = 0.5
         thickness = 1
         text_size = cv2.getTextSize(label, font, font_scale, thickness)[0]
-        # Draw background rectangle for text
-        cv2.rectangle(out, (x1, y1-text_size[1]-5), (x1+text_size[0]+5, y1), color, -1)
-        cv2.putText(out, label, (x1+2, y1-5), font, font_scale, (255,255,255), thickness)
     return out
-async def recurse(img, region, depth, rows, cols, ov, pad, prog, api_key, model, all_results=None):
     """
-    Returns (final_region, path, results) where:
-    - path is a list of chosen regions, one per stage
-    - results is a list of API results for each stage
     """
     if all_results is None:
         all_results = []
     if depth == 0:
-        return region, [], all_results
-    subs = split(region, rows, cols, ov, pad)
-    prog(0, desc=f"Stage {depth}: scanning {len(subs)} tiles...")
-    async def task(i, r):
-        crop_img = crop(img, r)
-        result = await ask_api(crop_img, api_key, model)
-        return i, result, r
-    results = [None] * len(subs)
-    regions = [None] * len(subs)
-    for c in asyncio.as_completed([task(i, r) for i, r in enumerate(subs)]):
-        i, res, r = await c
-        results[i] = res
-        regions[i] = r
-        prog((i+1)/len(subs), desc=f"Stage {depth}: {i+1}/{len(subs)} tiles processed")
-    best_idx, score = None, (3, -1)
-    for i, d in enumerate(results):
-        s = (rank(d["detected"]), -d["confidence"])
-        if s < score:
-            best_idx, score = i, s
-    if best_idx is None:
-        best_idx = 0
-    best_region = regions[best_idx]
-    stage_results = {"region": best_region, "results": results, "best_idx": best_idx}
-    all_results.append(stage_results)
-    final_reg, sub_path, all_res = await recurse(
-        img, best_region, depth - 1, rows, cols, ov, pad, prog, api_key, model, all_results
-    )
-    return final_reg, [best_region] + sub_path, all_res
-# ─────────── GRADIO PIPELINE ───────────
-def run_pipeline(pil_img, api_key, model, rows, cols, zoom, ov, pad, progress=gr.Progress()):
-    """Main pipeline to process an image and find humans"""
-    # Input validation and error checking
     error_message = None
-    # Check if image was provided
     if pil_img is None:
         error_message = "Error: Please upload an image to analyze."
-    # Check for API key
     elif not api_key or api_key.strip() == "":
-        error_message = "Error: OpenAI API key is required to run the search."
-    # Check if model is selected
     elif not model or model.strip() == "":
         error_message = "Error: Please select an OpenAI model."
     if error_message:
-        return (None, None, None, error_message)
-    # Input validation
     try:
-        rows = max(1, min(int(rows), 10))
-        cols = max(1, min(int(cols), 10))
-        zoom = max(1, min(int(zoom), 3))
-        ov = max(0, min(float(ov), 0.9))
-        pad = max(0, min(float(pad), 0.3))
-    except (ValueError, TypeError):
-        return (None, None, None, "Error: Invalid parameter values. Using defaults instead.")
-    with tempfile.TemporaryDirectory() as td:
-        try:
-            img_path = str(Path(td) / "in.jpg")
-            pil_img.save(img_path)
-            # Convert to RGB after reading (OpenCV reads as BGR)
-            img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
-            progress(0, desc=f"Starting recursive search using {model}...")
-            final_reg, path, all_results = asyncio.run(
-                recurse(img, (0, 0, 1, 1), zoom, rows, cols, ov, pad, progress, api_key, model)
-            )
-            # Get final crop (already in RGB)
-            crop_img = crop(img, final_reg)
-            # Collect results for each stage
-            stage_results = []
-            best_results = []
-            for stage in all_results:
-                best_idx = stage["best_idx"]
-                results = stage["results"]
-                if best_idx is not None and best_idx < len(results):
-                    best_results.append(results[best_idx])
-            # Create the path visualization (already in RGB)
-            path_img = draw_path(img, path, best_results)
-            # Create visualization of all tiles in the first stage
-            if all_results and len(all_results) > 0:
-                first_stage = all_results[0]
-                stage_img = img.copy()
-                for i, r in enumerate(first_stage["results"]):
-                    region = first_stage["region"] if i == first_stage["best_idx"] else None
-                    x,y,w,h = split(region or (0,0,1,1), rows, cols, ov, pad)[i]
-                    H,W = img.shape[:2]
-                    x1,y1,x2,y2 = int(x*W), int(y*H), int((x+w)*W), int((y+h)*H)
-                    # Color based on detection
-                    if r["detected"] == "YES":
-                        color = (0, 255, 0)  # Green
-                    elif r["detected"] == "MAYBE":
-                        color = (0, 165, 255)  # Orange
-                    else:
-                        color = (255, 0, 0)  # Red (in RGB)
-                    # Draw rectangle with confidence
-                    cv2.rectangle(stage_img, (x1,y1), (x2,y2), color, 1)
-                    conf = r.get("confidence", 0)
-                    # Add text with background
-                    label = f"{r['detected']} ({conf:.2f})"
-                    font = cv2.FONT_HERSHEY_SIMPLEX
-                    font_scale = 0.4
-                    thickness = 1
-                    text_size = cv2.getTextSize(label, font, font_scale, thickness)[0]
-                    # Draw background for text
-                    cv2.rectangle(stage_img, (x1, y1-text_size[1]-5), (x1+text_size[0]+5, y1), (0,0,0), -1)
-                    cv2.putText(stage_img, label, (x1+2, y1-5), font, font_scale, (255,255,255), thickness)
-                # Mark best tile with thicker border
-                best_idx = first_stage["best_idx"]
-                if best_idx is not None and best_idx < len(split((0,0,1,1), rows, cols, ov, pad)):
-                    r = split((0,0,1,1), rows, cols, ov, pad)[best_idx]
-                    x,y,w,h = r
-                    H,W = img.shape[:2]
-                    x1,y1,x2,y2 = int(x*W), int(y*H), int((x+w)*W), int((y+h)*H)
-                    cv2.rectangle(stage_img, (x1,y1), (x2,y2), (0,255,255), 3)  # Yellow thick border
-            else:
-                stage_img = img.copy()
-            # Create a summary of the results
-            summary = []
-            for i, res in enumerate(best_results):
-                summary.append(f"Stage {i+1}: {res['detected']} ({res['confidence']:.2f}) - {res['reason']}")
-            summary_text = "\n".join(summary)
-            # Return results
-            return crop_img, path_img, stage_img, summary_text
-        except Exception as e:
-            # Handle any other exceptions
-            return (None, None, None, f"Error: {str(e)}")
-# ─────────── UI ───────────
-with gr.Blocks(title="Eagle‑Eyes Search", css="footer {visibility: hidden}") as demo:
     gr.Markdown("""
-    # 🦅 Eagle-Eyes Search
-    Upload an image to find humans using recursive zoom technology. The system divides the image
-    into a grid and recursively zooms into the most promising regions.
-    How it works:
-    1. The image is divided into a grid based on your settings
-    2. Each grid cell is analyzed for human presence
-    3. The most promising cell is selected for the next zoom level
-    4. This process repeats for the specified number of zoom levels
     """)
     with gr.Row():
         with gr.Column(scale=1):
             img_in = gr.Image(type="pil", label="Input Image")
-            # API Key input (password field)
             api_key = gr.Textbox(
                 label="OpenAI API Key",
                 placeholder="Enter your OpenAI API key here...",
                 type="password",
                 info="Your API key will be used only for this session and not stored"
             )
-            # Model selection dropdown
             model = gr.Dropdown(
                 choices=AVAILABLE_MODELS,
                 value=DEFAULT_MODEL,
                 label="Model Selection",
                 info="Select the OpenAI model to use for analysis"
             )
             with gr.Group():
                 with gr.Row():
-                    row  = gr.Number(value=DEFAULTS["row"], label="Grid Rows", precision=0, minimum=1, maximum=10)
-                    col  = gr.Number(value=DEFAULTS["col"], label="Grid Columns", precision=0, minimum=1, maximum=10)
-                    zoom = gr.Number(value=DEFAULTS["zoom"], label="Zoom Levels", precision=0, minimum=1, maximum=3)
                 with gr.Row():
-                    ov   = gr.Slider(0, 0.9, step=0.05, value=DEFAULTS["overlap"], label="Tile Overlap")
-                    pad  = gr.Slider(0, 0.3, step=0.01, value=DEFAULTS["pad"], label="Tile Padding")
             btn = gr.Button("🔍 Run Search", variant="primary")
             summary_out = gr.Textbox(label="Results Summary", lines=5, interactive=False)
-        with gr.Column(scale=2):
-            with gr.Tab("Final Crop"):
-                crop_out = gr.Image(label="Final Crop (Most Likely Human Location)")
-            with gr.Tab("Search Path"):
-                path_out = gr.Image(label="Search Path (Colored by Zoom Level)")
-            with gr.Tab("First Stage Analysis"):
-                stage_out = gr.Image(label="First Stage Grid Analysis")
     gr.Markdown("""
     ### Tips for Best Results
-    - **OpenAI API Key**: Required to use this tool. Your key remains private and is not stored
-    - **Model Selection**: Choose the appropriate OpenAI model:
-      - `gpt-4o`: Best overall performance for general vision tasks (recommended)
-      - `gpt-4o-mini`: More economical version of gpt-4o with good performance
-      - `o1`: Advanced reasoning model, excellent for complex analysis
-      - `o1-mini`: Smaller, faster version of o1
-      - `o3-mini`: Newest reasoning model (Jan 2025), optimized for STEM tasks
-      - `gpt-4-vision-preview`: Original vision model (being deprecated)
-      - `gpt-4-turbo`: Older model with vision capabilities
-    - **Grid Size**: More rows/columns give better precision but require more API calls
-    - **Zoom Levels**: More levels allow deeper searching in complex images
-    - **Overlap**: Higher overlap prevents missing objects at tile boundaries
-    - **Padding**: Reduces edge artifacts in grid cells
-    This tool uses OpenAI's vision API to analyze image regions and detect human presence.
     """)
-    btn.click(run_pipeline,
-              inputs=[img_in, api_key, model, row, col, zoom, ov, pad],
-              outputs=[crop_out, path_out, stage_out, summary_out])
 if __name__ == "__main__":
-    demo.launch()

+import asyncio, base64, json, os, tempfile
 from pathlib import Path
 import cv2
 import gradio as gr
 import numpy as np
+from PIL import Image
 from dotenv import load_dotenv
 from openai import OpenAI
 # ─────────── ENV + DEFAULTS ───────────
 load_dotenv()
+def _env(k, d=""):
+    return os.getenv(k, d).split("#", 1)[0].strip()
 DEFAULT_MODEL = _env("OPENAI_MODEL", "gpt-4o")
 AVAILABLE_MODELS = [
+    "gpt-4o",                     # Current flagship model with vision support
+    "gpt-4o-mini",                # More economical version of gpt-4o with vision support
     "o1",                         # Advanced reasoning model with vision support
+    "o1-mini",                    # Smaller, faster version (if needed)
     "o3-mini",                    # Newest reasoning model (Jan 2025)
     "gpt-4-vision-preview",       # Original vision model (being deprecated)
     "gpt-4-turbo"                 # Older model with vision support
 ]
 DEFAULTS = dict(
+    row           = int(_env("ROW_COUNT", 7)),
+    col           = int(_env("COL_COUNT", 7)),
+    zoom          = int(_env("ZOOM_LEVELS", 2)),  # Recursion depth (zoom levels)
+    overlap       = 0.0,  # Fixed at 0 as requested
+    pad           = 0.0,  # Fixed at 0 as requested
+    max_candidates = int(_env("MAX_CANDIDATES", 3))  # Maximum number of candidates per search
 )
+# ─────────── PROMPT FOR GRID CELL ANALYSIS ───────────
 DEFAULT_PROMPT = (
+    "You are a vision inspector. Look at the image and determine if a human is present. "
+    "Partial visibility is acceptable—consider clues like limbs, clothing, silhouettes, shadows, or partial faces. "
     "Respond strictly with valid JSON in the following format:\n"
+    '{"detected":"YES/NO/MAYBE", "confidence":<float between 0 and 1>, "reason":"<15 words max>"}\n'
+    "- YES: Clearly visible human feature(s) are observed.\n"
+    "- MAYBE: Ambiguous or partial evidence is present.\n"
+    "- NO: No evidence of a human is detected."
 )
 # ─────────── HELPERS ───────────
 def encode(img):
+    """Encode image to a Base64 string."""
     encode_params = [int(cv2.IMWRITE_JPEG_QUALITY), 95]
     _, buf = cv2.imencode(".jpg", img, encode_params)
     return base64.b64encode(buf).decode()
+def crop(img, r):
+    """Crop image to region r=(x,y,w,h) in relative coordinates."""
+    H, W = img.shape[:2]
+    x, y, w, h = r
+    return img[int(y * H):int((y + h) * H), int(x * W):int((x + w) * W)]
+def split(r, rows, cols, ov=0.0, pad=0.0):
+    """
+    Split region r=(x,y,w,h) into a grid of subregions with specified rows and columns.
+    Overlap and padding are fixed at 0 as configured.
+    """
+    x0, y0, w, h = r
+    tw, th = w / cols, h / rows
+    sx, sy = tw, th  # no overlap since ov=0.0
+    tiles = []
+    # Calculate number of grid cells
+    nx = max(1, int((w - tw) // sx) + 1)
+    ny = max(1, int((h - th) // sy) + 1)
     for ry in range(ny):
         for cx in range(nx):
+            sx0 = min(x0 + cx * sx, x0 + w - tw)
+            sy0 = min(y0 + ry * sy, y0 + h - th)
+            tiles.append((sx0, sy0, tw, th))
     return tiles
+def rank(det):
+    """Rank the detection result."""
+    return {"YES": 0, "MAYBE": 1}.get(det, 2)
 def draw_path(img, path, results=None):
+    """Draw the search path on the image with rectangles for each stage."""
     out = img.copy()
+    STAGE_COLOURS = [(0, 165, 255), (0, 255, 0), (255, 0, 0), (255, 255, 0), (128, 0, 128)]
     for i, r in enumerate(path):
+        x, y, w, h = r
+        H, W = img.shape[:2]
+        x1, y1 = int(x * W), int(y * H)
+        x2, y2 = int((x + w) * W), int((y + h) * H)
         color = STAGE_COLOURS[i % len(STAGE_COLOURS)]
+        cv2.rectangle(out, (x1, y1), (x2, y2), color, 2)
         label = f"S{i+1}"
         if results and i < len(results):
             res = results[i]
                 det = res["detected"]
                 conf = res.get("confidence", 0)
                 label += f": {det} ({conf:.2f})"
         font = cv2.FONT_HERSHEY_SIMPLEX
         font_scale = 0.5
         thickness = 1
         text_size = cv2.getTextSize(label, font, font_scale, thickness)[0]
+        cv2.rectangle(out, (x1, y1 - text_size[1] - 5), (x1 + text_size[0] + 5, y1), color, -1)
+        cv2.putText(out, label, (x1 + 2, y1 - 5), font, font_scale, (255, 255, 255), thickness)
     return out
+# ─────────── API CALL FOR A SINGLE GRID CELL ───────────
+async def ask_api(img, api_key, model="gpt-4o", custom_prompt=None):
+    """Send one grid cell image to the OpenAI API and return the result."""
+    client = OpenAI(api_key=api_key)
+    prompt = custom_prompt or DEFAULT_PROMPT
+    msg = [{
+        "role": "user",
+        "content": [
+            {"type": "text", "text": prompt},
+            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode(img)}"}}
+        ]
+    }]
+    delay = 1
+    for attempt in range(5):
+        try:
+            response = await asyncio.to_thread(
+                client.chat.completions.create,
+                model=model,
+                messages=msg,
+                max_tokens=60,
+                response_format={"type": "json_object"}
+            )
+            return json.loads(response.choices[0].message.content)
+        except Exception as e:
+            if "rate limit" in str(e).lower():
+                await asyncio.sleep(delay)
+                delay = min(delay * 2, 32)
+            else:
+                return {"detected": "NO", "confidence": 0, "reason": f"Error: {str(e)[:50]}..."}
+    return {"detected": "NO", "confidence": 0, "reason": "Too many API retries, please try again later"}
+# ─────────── RECURSIVE SEARCH FUNCTION (MULTI-CANDIDATE) ───────────
+async def recurse_multi(img, region, depth, rows, cols, prog, api_key, model, max_candidates, all_results=None):
     """
+    Recursively analyze grid cells, allowing up to max_candidates per stage.
+    Returns a list of branch dictionaries with keys:
+      - "final_region": final region in the branch,
+      - "path": list of regions (from higher to lower levels),
+      - "stage_results": list of API results per stage.
     """
     if all_results is None:
         all_results = []
     if depth == 0:
+        return [{"final_region": region, "path": [], "stage_results": []}]
+    subs = split(region, rows, cols, ov=0.0, pad=0.0)
+    prog(0, desc=f"Stage {depth}: scanning {len(subs)} grid cells...")
+    tasks = []
+    for sub in subs:
+        crop_img = crop(img, sub)
+        tasks.append(ask_api(crop_img, api_key, model))
+    results = await asyncio.gather(*tasks)
+    # Pair each subregion with its result
+    sub_results = list(zip(subs, results))
+    # Sort by (rank, -confidence)
+    sub_results.sort(key=lambda tup: (rank(tup[1]["detected"]), -tup[1].get("confidence", 0)))
+    # Select candidates with positive detection ("YES" or "MAYBE"); if none, take best candidate
+    candidates = [tup for tup in sub_results if tup[1]["detected"] in ("YES", "MAYBE")]
+    if not candidates:
+        candidates = [sub_results[0]]
+    candidates = candidates[:max_candidates]
+    branches = []
+    for candidate_region, candidate_result in candidates:
+        # For current candidate, record its stage result
+        current_stage = {"region": candidate_region, "result": candidate_result}
+        # Recursively search within candidate region
+        child_branches = await recurse_multi(img, candidate_region, depth - 1, rows, cols, prog, api_key, model, max_candidates)
+        for branch in child_branches:
+            branch["path"].insert(0, candidate_region)
+            branch["stage_results"].insert(0, candidate_result)
+            branches.append(branch)
+    return branches
+# ─────────── PIPELINE FUNCTION ───────────
+def run_pipeline(pil_img, api_key, model, rows, cols, zoom, max_candidates, progress=gr.Progress()):
+    """
+    Process a single uploaded image:
+      1. Divide the image into grid cells.
+      2. Recursively zoom in by exploring up to max_candidates per stage.
+      3. Draw the search path on the original image.
+      4. Return the final cropped region (from the best branch), its search path, and a summary.
+    """
     error_message = None
     if pil_img is None:
         error_message = "Error: Please upload an image to analyze."
     elif not api_key or api_key.strip() == "":
+        error_message = "Error: OpenAI API key is required."
     elif not model or model.strip() == "":
         error_message = "Error: Please select an OpenAI model."
     if error_message:
+        return None, None, error_message
     try:
+        img_np = np.array(pil_img)
+    except Exception as e:
+        return None, None, f"Error converting image: {str(e)}"
+    full_region = (0, 0, 1, 1)
+    progress(0, desc=f"Starting recursive grid search using {model}...")
+    branches = asyncio.run(
+        recurse_multi(img_np, full_region, zoom, rows, cols, progress, api_key, model, max_candidates)
+    )
+    if not branches:
+        return None, None, "No branch found."
+    # Select the branch with the highest confidence in its most zoomed-in stage
+    best_branch = max(branches, key=lambda b: b["stage_results"][0].get("confidence", 0))
+    final_reg = best_branch["final_region"] if "final_region" in best_branch else best_branch["path"][0]
+    final_crop = crop(img_np, final_reg)
+    final_crop_pil = Image.fromarray(final_crop)
+    # Draw search path using the branch's path and stage_results (reverse to show top-level first)
+    path_order = list(reversed(best_branch["path"]))
+    stage_results_order = list(reversed(best_branch["stage_results"]))
+    path_img = draw_path(img_np, path_order, stage_results_order)
+    path_img_pil = Image.fromarray(path_img)
+    # Build summary text for the branch
+    summary_lines = []
+    for i, res in enumerate(stage_results_order):
+        summary_lines.append(f"Stage {i+1}: {res['detected']} ({res['confidence']:.2f}) - {res['reason']}")
+    summary_text = "\n".join(summary_lines)
+    return final_crop_pil, path_img_pil, summary_text
+# ─────────── GRADIO UI ───────────
+with gr.Blocks(title="Eagle‑Eyes Recursive Grid Search", css="footer {visibility: hidden}") as demo:
     gr.Markdown("""
+    # 🦅 Eagle‑Eyes Recursive Grid Search
+    Upload a single image. The tool will divide the image into grid cells and recursively zoom in on the most promising region.
+    At each stage, up to a configurable number of positive candidates are explored. The search path is drawn on the original image.
     """)
     with gr.Row():
         with gr.Column(scale=1):
             img_in = gr.Image(type="pil", label="Input Image")
             api_key = gr.Textbox(
                 label="OpenAI API Key",
                 placeholder="Enter your OpenAI API key here...",
                 type="password",
                 info="Your API key will be used only for this session and not stored"
             )
             model = gr.Dropdown(
                 choices=AVAILABLE_MODELS,
                 value=DEFAULT_MODEL,
                 label="Model Selection",
                 info="Select the OpenAI model to use for analysis"
             )
             with gr.Group():
                 with gr.Row():
+                    row = gr.Number(value=DEFAULTS["row"], label="Grid Rows", precision=0, minimum=1, maximum=10)
+                    col = gr.Number(value=DEFAULTS["col"], label="Grid Columns", precision=0, minimum=1, maximum=10)
+                    zoom = gr.Number(value=DEFAULTS["zoom"], label="Zoom Levels", precision=0, minimum=1, maximum=5)
                 with gr.Row():
+                    max_candidates = gr.Number(value=DEFAULTS["max_candidates"], label="Max Candidates per Stage", precision=0, minimum=1, maximum=10)
             btn = gr.Button("🔍 Run Search", variant="primary")
             summary_out = gr.Textbox(label="Results Summary", lines=5, interactive=False)
+        with gr.Column(scale=1):
+            crop_out = gr.Image(label="Final Crop (Zoomed Region)")
+            path_out = gr.Image(label="Search Path Visualization")
     gr.Markdown("""
     ### Tips for Best Results
+    - **OpenAI API Key**: Required for this tool. Your key remains private.
+    - **Model Selection**: Choose a model with vision support (e.g., `gpt-4o`, `gpt-4o-mini`, `o1`, etc.).
+    - **Grid Settings**: Adjust rows and columns to fine-tune segmentation.
+    - **Zoom Levels**: More zoom levels perform deeper recursive search.
+    - **Max Candidates per Stage**: Controls how many positive grid cells to explore at each stage.
     """)
+    btn.click(
+        run_pipeline,
+        inputs=[img_in, api_key, model, row, col, zoom, max_candidates],
+        outputs=[crop_out, path_out, summary_out]
+    )
 if __name__ == "__main__":
+    demo.launch()