Spaces:
Running
Running
| """SpaceDebris Localizer - Gradio application. | |
| Uses nvidia/LocateAnything-3B to locate space debris, satellite fragments, | |
| and spacecraft components in space imagery. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import logging | |
| import os | |
| import gradio as gr | |
| from PIL import Image | |
| from src.config import APP_SUBTITLE, APP_TITLE | |
| from src.inference import LocateAnythingWorker, run_localization | |
| from src.prompts import get_example_prompts | |
| from src.utils import ensure_rgb, format_json_output, format_metadata, validate_image | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") | |
| logger = logging.getLogger(__name__) | |
| worker: LocateAnythingWorker | None = None | |
| def get_worker() -> LocateAnythingWorker: | |
| """Lazy-load the model worker on first use.""" | |
| global worker | |
| if worker is None: | |
| logger.info("Loading LocateAnything-3B model...") | |
| worker = LocateAnythingWorker() | |
| worker.load() | |
| logger.info("Model loaded successfully.") | |
| return worker | |
| def run_inference( | |
| image: Image.Image | None, | |
| prompt: str, | |
| ) -> tuple[Image.Image | None, str, str, str, str]: | |
| """Main inference function for Gradio interface. | |
| Returns: | |
| (annotated_image, metadata, raw_output, json_output, status_message) | |
| """ | |
| is_valid, error_msg = validate_image(image) | |
| if not is_valid: | |
| return None, "", "", "", f"Error: {error_msg}" | |
| if not prompt or not prompt.strip(): | |
| return None, "", "", "", "Error: Please enter a detection prompt." | |
| try: | |
| image_rgb = ensure_rgb(image) | |
| w = get_worker() | |
| annotated, raw_output, parsed = run_localization(image_rgb, prompt.strip(), worker=w) | |
| metadata = format_metadata(parsed) | |
| json_out = format_json_output(parsed) | |
| json_str = json.dumps(json_out, indent=2, ensure_ascii=False) | |
| status = f"Done. Found {parsed.num_detections} object(s)." | |
| if parsed.parse_errors: | |
| status += f" ({len(parsed.parse_errors)} warning(s))" | |
| return annotated, metadata, raw_output, json_str, status | |
| except Exception as exc: | |
| logger.exception("Inference failed") | |
| return None, "", "", "", f"Inference error: {exc}" | |
| def build_app() -> gr.Blocks: | |
| """Build the Gradio Blocks interface.""" | |
| with gr.Blocks( | |
| title=APP_TITLE, | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .main-title { text-align: center; margin-bottom: 0; } | |
| .subtitle { text-align: center; color: #666; margin-top: 0; } | |
| .footer { text-align: center; color: #999; font-size: 0.85em; margin-top: 20px; } | |
| """, | |
| ) as app: | |
| gr.HTML(f""" | |
| <h1 class="main-title">{APP_TITLE}</h1> | |
| <p class="subtitle">{APP_SUBTITLE}</p> | |
| """) | |
| gr.Markdown(""" | |
| > **How it works:** Upload a space or satellite image and enter a natural-language | |
| > prompt describing what to locate. The model grounds your query in the image and | |
| > returns bounding box coordinates. Detection quality depends on image resolution, | |
| > object visibility, and model grounding capability. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| input_image = gr.Image(type="pil", label="Upload Image") | |
| prompt_input = gr.Textbox( | |
| label="Detection Prompt", | |
| placeholder="e.g. Locate all the instances that match the following description: space debris.", | |
| lines=2, | |
| ) | |
| run_btn = gr.Button("Run Localization", variant="primary", size="lg") | |
| status_text = gr.Textbox(label="Status", interactive=False, lines=1) | |
| with gr.Column(scale=1): | |
| output_image = gr.Image(type="pil", label="Annotated Image") | |
| with gr.Tabs(): | |
| with gr.TabItem("Metadata"): | |
| metadata_output = gr.Textbox(label="Detection Metadata", lines=6, interactive=False) | |
| with gr.TabItem("Raw Output"): | |
| raw_output = gr.Textbox(label="Raw Model Output", lines=8, interactive=False, show_copy_button=True) | |
| with gr.TabItem("JSON Output"): | |
| json_output = gr.Code(label="Parsed JSON", language="json", lines=8) | |
| gr.Markdown("### Example Prompts") | |
| gr.Markdown("Click an example to load it into the prompt field.") | |
| examples_list = get_example_prompts() | |
| gr.Examples( | |
| examples=examples_list, | |
| inputs=[prompt_input], | |
| label="Space Debris Prompts", | |
| ) | |
| with gr.Accordion("About This Project", open=False): | |
| gr.Markdown(""" | |
| **SpaceDebris Localizer** is a hackathon prototype demonstrating how NVIDIA's | |
| **LocateAnything-3B** vision-language model can be applied to orbital debris | |
| localization and satellite component identification. | |
| ### Capabilities | |
| - Open-set object detection from natural-language prompts | |
| - Bounding-box grounding for arbitrary visual concepts | |
| - Structured output with pixel-coordinate parsing | |
| ### Limitations | |
| - The model was trained on general grounding data, not specifically orbital imagery | |
| - Detection quality depends heavily on image resolution and object clarity | |
| - Small debris fragments may not be reliably detected | |
| - This is a proof-of-concept, not a production debris tracking system | |
| ### Model | |
| - [nvidia/LocateAnything-3B](https://huggingface.co/nvidia/LocateAnything-3B) on Hugging Face | |
| - 3B parameter vision-language model with Parallel Box Decoding | |
| - Coordinates are normalized to [0, 1000] and converted to pixel space | |
| """) | |
| gr.HTML('<p class="footer">Powered by nvidia/LocateAnything-3B | SpaceDebris Localizer</p>') | |
| run_btn.click( | |
| fn=run_inference, | |
| inputs=[input_image, prompt_input], | |
| outputs=[output_image, metadata_output, raw_output, json_output, status_text], | |
| ) | |
| prompt_input.submit( | |
| fn=run_inference, | |
| inputs=[input_image, prompt_input], | |
| outputs=[output_image, metadata_output, raw_output, json_output, status_text], | |
| ) | |
| return app | |
| if __name__ == "__main__": | |
| app = build_app() | |
| app.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860"))) | |