obj_localizer / app.py
3v324v23's picture
fix: resolve all ruff lint errors
cf388f7
Raw
History Blame Contribute Delete
6.54 kB
"""SpaceDebris Localizer - Gradio application.
Uses nvidia/LocateAnything-3B to locate space debris, satellite fragments,
and spacecraft components in space imagery.
"""
from __future__ import annotations
import json
import logging
import os
import gradio as gr
from PIL import Image
from src.config import APP_SUBTITLE, APP_TITLE
from src.inference import LocateAnythingWorker, run_localization
from src.prompts import get_example_prompts
from src.utils import ensure_rgb, format_json_output, format_metadata, validate_image
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)
worker: LocateAnythingWorker | None = None
def get_worker() -> LocateAnythingWorker:
"""Lazy-load the model worker on first use."""
global worker
if worker is None:
logger.info("Loading LocateAnything-3B model...")
worker = LocateAnythingWorker()
worker.load()
logger.info("Model loaded successfully.")
return worker
def run_inference(
image: Image.Image | None,
prompt: str,
) -> tuple[Image.Image | None, str, str, str, str]:
"""Main inference function for Gradio interface.
Returns:
(annotated_image, metadata, raw_output, json_output, status_message)
"""
is_valid, error_msg = validate_image(image)
if not is_valid:
return None, "", "", "", f"Error: {error_msg}"
if not prompt or not prompt.strip():
return None, "", "", "", "Error: Please enter a detection prompt."
try:
image_rgb = ensure_rgb(image)
w = get_worker()
annotated, raw_output, parsed = run_localization(image_rgb, prompt.strip(), worker=w)
metadata = format_metadata(parsed)
json_out = format_json_output(parsed)
json_str = json.dumps(json_out, indent=2, ensure_ascii=False)
status = f"Done. Found {parsed.num_detections} object(s)."
if parsed.parse_errors:
status += f" ({len(parsed.parse_errors)} warning(s))"
return annotated, metadata, raw_output, json_str, status
except Exception as exc:
logger.exception("Inference failed")
return None, "", "", "", f"Inference error: {exc}"
def build_app() -> gr.Blocks:
"""Build the Gradio Blocks interface."""
with gr.Blocks(
title=APP_TITLE,
theme=gr.themes.Soft(),
css="""
.main-title { text-align: center; margin-bottom: 0; }
.subtitle { text-align: center; color: #666; margin-top: 0; }
.footer { text-align: center; color: #999; font-size: 0.85em; margin-top: 20px; }
""",
) as app:
gr.HTML(f"""
<h1 class="main-title">{APP_TITLE}</h1>
<p class="subtitle">{APP_SUBTITLE}</p>
""")
gr.Markdown("""
> **How it works:** Upload a space or satellite image and enter a natural-language
> prompt describing what to locate. The model grounds your query in the image and
> returns bounding box coordinates. Detection quality depends on image resolution,
> object visibility, and model grounding capability.
""")
with gr.Row():
with gr.Column(scale=1):
input_image = gr.Image(type="pil", label="Upload Image")
prompt_input = gr.Textbox(
label="Detection Prompt",
placeholder="e.g. Locate all the instances that match the following description: space debris.",
lines=2,
)
run_btn = gr.Button("Run Localization", variant="primary", size="lg")
status_text = gr.Textbox(label="Status", interactive=False, lines=1)
with gr.Column(scale=1):
output_image = gr.Image(type="pil", label="Annotated Image")
with gr.Tabs():
with gr.TabItem("Metadata"):
metadata_output = gr.Textbox(label="Detection Metadata", lines=6, interactive=False)
with gr.TabItem("Raw Output"):
raw_output = gr.Textbox(label="Raw Model Output", lines=8, interactive=False, show_copy_button=True)
with gr.TabItem("JSON Output"):
json_output = gr.Code(label="Parsed JSON", language="json", lines=8)
gr.Markdown("### Example Prompts")
gr.Markdown("Click an example to load it into the prompt field.")
examples_list = get_example_prompts()
gr.Examples(
examples=examples_list,
inputs=[prompt_input],
label="Space Debris Prompts",
)
with gr.Accordion("About This Project", open=False):
gr.Markdown("""
**SpaceDebris Localizer** is a hackathon prototype demonstrating how NVIDIA's
**LocateAnything-3B** vision-language model can be applied to orbital debris
localization and satellite component identification.
### Capabilities
- Open-set object detection from natural-language prompts
- Bounding-box grounding for arbitrary visual concepts
- Structured output with pixel-coordinate parsing
### Limitations
- The model was trained on general grounding data, not specifically orbital imagery
- Detection quality depends heavily on image resolution and object clarity
- Small debris fragments may not be reliably detected
- This is a proof-of-concept, not a production debris tracking system
### Model
- [nvidia/LocateAnything-3B](https://huggingface.co/nvidia/LocateAnything-3B) on Hugging Face
- 3B parameter vision-language model with Parallel Box Decoding
- Coordinates are normalized to [0, 1000] and converted to pixel space
""")
gr.HTML('<p class="footer">Powered by nvidia/LocateAnything-3B | SpaceDebris Localizer</p>')
run_btn.click(
fn=run_inference,
inputs=[input_image, prompt_input],
outputs=[output_image, metadata_output, raw_output, json_output, status_text],
)
prompt_input.submit(
fn=run_inference,
inputs=[input_image, prompt_input],
outputs=[output_image, metadata_output, raw_output, json_output, status_text],
)
return app
if __name__ == "__main__":
app = build_app()
app.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))