Spaces:
Running
Running
File size: 6,541 Bytes
23db765 cf388f7 23db765 cf388f7 23db765 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | """SpaceDebris Localizer - Gradio application.
Uses nvidia/LocateAnything-3B to locate space debris, satellite fragments,
and spacecraft components in space imagery.
"""
from __future__ import annotations
import json
import logging
import os
import gradio as gr
from PIL import Image
from src.config import APP_SUBTITLE, APP_TITLE
from src.inference import LocateAnythingWorker, run_localization
from src.prompts import get_example_prompts
from src.utils import ensure_rgb, format_json_output, format_metadata, validate_image
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)
worker: LocateAnythingWorker | None = None
def get_worker() -> LocateAnythingWorker:
"""Lazy-load the model worker on first use."""
global worker
if worker is None:
logger.info("Loading LocateAnything-3B model...")
worker = LocateAnythingWorker()
worker.load()
logger.info("Model loaded successfully.")
return worker
def run_inference(
image: Image.Image | None,
prompt: str,
) -> tuple[Image.Image | None, str, str, str, str]:
"""Main inference function for Gradio interface.
Returns:
(annotated_image, metadata, raw_output, json_output, status_message)
"""
is_valid, error_msg = validate_image(image)
if not is_valid:
return None, "", "", "", f"Error: {error_msg}"
if not prompt or not prompt.strip():
return None, "", "", "", "Error: Please enter a detection prompt."
try:
image_rgb = ensure_rgb(image)
w = get_worker()
annotated, raw_output, parsed = run_localization(image_rgb, prompt.strip(), worker=w)
metadata = format_metadata(parsed)
json_out = format_json_output(parsed)
json_str = json.dumps(json_out, indent=2, ensure_ascii=False)
status = f"Done. Found {parsed.num_detections} object(s)."
if parsed.parse_errors:
status += f" ({len(parsed.parse_errors)} warning(s))"
return annotated, metadata, raw_output, json_str, status
except Exception as exc:
logger.exception("Inference failed")
return None, "", "", "", f"Inference error: {exc}"
def build_app() -> gr.Blocks:
"""Build the Gradio Blocks interface."""
with gr.Blocks(
title=APP_TITLE,
theme=gr.themes.Soft(),
css="""
.main-title { text-align: center; margin-bottom: 0; }
.subtitle { text-align: center; color: #666; margin-top: 0; }
.footer { text-align: center; color: #999; font-size: 0.85em; margin-top: 20px; }
""",
) as app:
gr.HTML(f"""
<h1 class="main-title">{APP_TITLE}</h1>
<p class="subtitle">{APP_SUBTITLE}</p>
""")
gr.Markdown("""
> **How it works:** Upload a space or satellite image and enter a natural-language
> prompt describing what to locate. The model grounds your query in the image and
> returns bounding box coordinates. Detection quality depends on image resolution,
> object visibility, and model grounding capability.
""")
with gr.Row():
with gr.Column(scale=1):
input_image = gr.Image(type="pil", label="Upload Image")
prompt_input = gr.Textbox(
label="Detection Prompt",
placeholder="e.g. Locate all the instances that match the following description: space debris.",
lines=2,
)
run_btn = gr.Button("Run Localization", variant="primary", size="lg")
status_text = gr.Textbox(label="Status", interactive=False, lines=1)
with gr.Column(scale=1):
output_image = gr.Image(type="pil", label="Annotated Image")
with gr.Tabs():
with gr.TabItem("Metadata"):
metadata_output = gr.Textbox(label="Detection Metadata", lines=6, interactive=False)
with gr.TabItem("Raw Output"):
raw_output = gr.Textbox(label="Raw Model Output", lines=8, interactive=False, show_copy_button=True)
with gr.TabItem("JSON Output"):
json_output = gr.Code(label="Parsed JSON", language="json", lines=8)
gr.Markdown("### Example Prompts")
gr.Markdown("Click an example to load it into the prompt field.")
examples_list = get_example_prompts()
gr.Examples(
examples=examples_list,
inputs=[prompt_input],
label="Space Debris Prompts",
)
with gr.Accordion("About This Project", open=False):
gr.Markdown("""
**SpaceDebris Localizer** is a hackathon prototype demonstrating how NVIDIA's
**LocateAnything-3B** vision-language model can be applied to orbital debris
localization and satellite component identification.
### Capabilities
- Open-set object detection from natural-language prompts
- Bounding-box grounding for arbitrary visual concepts
- Structured output with pixel-coordinate parsing
### Limitations
- The model was trained on general grounding data, not specifically orbital imagery
- Detection quality depends heavily on image resolution and object clarity
- Small debris fragments may not be reliably detected
- This is a proof-of-concept, not a production debris tracking system
### Model
- [nvidia/LocateAnything-3B](https://huggingface.co/nvidia/LocateAnything-3B) on Hugging Face
- 3B parameter vision-language model with Parallel Box Decoding
- Coordinates are normalized to [0, 1000] and converted to pixel space
""")
gr.HTML('<p class="footer">Powered by nvidia/LocateAnything-3B | SpaceDebris Localizer</p>')
run_btn.click(
fn=run_inference,
inputs=[input_image, prompt_input],
outputs=[output_image, metadata_output, raw_output, json_output, status_text],
)
prompt_input.submit(
fn=run_inference,
inputs=[input_image, prompt_input],
outputs=[output_image, metadata_output, raw_output, json_output, status_text],
)
return app
if __name__ == "__main__":
app = build_app()
app.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))
|