"""SpaceDebris Localizer - Gradio application. Uses nvidia/LocateAnything-3B to locate space debris, satellite fragments, and spacecraft components in space imagery. """ from __future__ import annotations import json import logging import os import gradio as gr from PIL import Image from src.config import APP_SUBTITLE, APP_TITLE from src.inference import LocateAnythingWorker, run_localization from src.prompts import get_example_prompts from src.utils import ensure_rgb, format_json_output, format_metadata, validate_image logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") logger = logging.getLogger(__name__) worker: LocateAnythingWorker | None = None def get_worker() -> LocateAnythingWorker: """Lazy-load the model worker on first use.""" global worker if worker is None: logger.info("Loading LocateAnything-3B model...") worker = LocateAnythingWorker() worker.load() logger.info("Model loaded successfully.") return worker def run_inference( image: Image.Image | None, prompt: str, ) -> tuple[Image.Image | None, str, str, str, str]: """Main inference function for Gradio interface. Returns: (annotated_image, metadata, raw_output, json_output, status_message) """ is_valid, error_msg = validate_image(image) if not is_valid: return None, "", "", "", f"Error: {error_msg}" if not prompt or not prompt.strip(): return None, "", "", "", "Error: Please enter a detection prompt." try: image_rgb = ensure_rgb(image) w = get_worker() annotated, raw_output, parsed = run_localization(image_rgb, prompt.strip(), worker=w) metadata = format_metadata(parsed) json_out = format_json_output(parsed) json_str = json.dumps(json_out, indent=2, ensure_ascii=False) status = f"Done. Found {parsed.num_detections} object(s)." if parsed.parse_errors: status += f" ({len(parsed.parse_errors)} warning(s))" return annotated, metadata, raw_output, json_str, status except Exception as exc: logger.exception("Inference failed") return None, "", "", "", f"Inference error: {exc}" def build_app() -> gr.Blocks: """Build the Gradio Blocks interface.""" with gr.Blocks( title=APP_TITLE, theme=gr.themes.Soft(), css=""" .main-title { text-align: center; margin-bottom: 0; } .subtitle { text-align: center; color: #666; margin-top: 0; } .footer { text-align: center; color: #999; font-size: 0.85em; margin-top: 20px; } """, ) as app: gr.HTML(f"""
{APP_SUBTITLE}
""") gr.Markdown(""" > **How it works:** Upload a space or satellite image and enter a natural-language > prompt describing what to locate. The model grounds your query in the image and > returns bounding box coordinates. Detection quality depends on image resolution, > object visibility, and model grounding capability. """) with gr.Row(): with gr.Column(scale=1): input_image = gr.Image(type="pil", label="Upload Image") prompt_input = gr.Textbox( label="Detection Prompt", placeholder="e.g. Locate all the instances that match the following description: space debris.", lines=2, ) run_btn = gr.Button("Run Localization", variant="primary", size="lg") status_text = gr.Textbox(label="Status", interactive=False, lines=1) with gr.Column(scale=1): output_image = gr.Image(type="pil", label="Annotated Image") with gr.Tabs(): with gr.TabItem("Metadata"): metadata_output = gr.Textbox(label="Detection Metadata", lines=6, interactive=False) with gr.TabItem("Raw Output"): raw_output = gr.Textbox(label="Raw Model Output", lines=8, interactive=False, show_copy_button=True) with gr.TabItem("JSON Output"): json_output = gr.Code(label="Parsed JSON", language="json", lines=8) gr.Markdown("### Example Prompts") gr.Markdown("Click an example to load it into the prompt field.") examples_list = get_example_prompts() gr.Examples( examples=examples_list, inputs=[prompt_input], label="Space Debris Prompts", ) with gr.Accordion("About This Project", open=False): gr.Markdown(""" **SpaceDebris Localizer** is a hackathon prototype demonstrating how NVIDIA's **LocateAnything-3B** vision-language model can be applied to orbital debris localization and satellite component identification. ### Capabilities - Open-set object detection from natural-language prompts - Bounding-box grounding for arbitrary visual concepts - Structured output with pixel-coordinate parsing ### Limitations - The model was trained on general grounding data, not specifically orbital imagery - Detection quality depends heavily on image resolution and object clarity - Small debris fragments may not be reliably detected - This is a proof-of-concept, not a production debris tracking system ### Model - [nvidia/LocateAnything-3B](https://huggingface.co/nvidia/LocateAnything-3B) on Hugging Face - 3B parameter vision-language model with Parallel Box Decoding - Coordinates are normalized to [0, 1000] and converted to pixel space """) gr.HTML('') run_btn.click( fn=run_inference, inputs=[input_image, prompt_input], outputs=[output_image, metadata_output, raw_output, json_output, status_text], ) prompt_input.submit( fn=run_inference, inputs=[input_image, prompt_input], outputs=[output_image, metadata_output, raw_output, json_output, status_text], ) return app if __name__ == "__main__": app = build_app() app.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))