Spaces:

Sathvik0101
/

obj_localizer

Running

App Files Files Community

obj_localizer / app.py

3v324v23

fix: resolve all ruff lint errors

cf388f7 13 days ago

Raw

History Blame Contribute Delete

6.54 kB

	"""SpaceDebris Localizer - Gradio application.

	Uses nvidia/LocateAnything-3B to locate space debris, satellite fragments,
	and spacecraft components in space imagery.
	"""

	from __future__ import annotations

	import json
	import logging
	import os

	import gradio as gr
	from PIL import Image

	from src.config import APP_SUBTITLE, APP_TITLE
	from src.inference import LocateAnythingWorker, run_localization
	from src.prompts import get_example_prompts
	from src.utils import ensure_rgb, format_json_output, format_metadata, validate_image

	logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
	logger = logging.getLogger(__name__)

	worker: LocateAnythingWorker \| None = None


	def get_worker() -> LocateAnythingWorker:
	"""Lazy-load the model worker on first use."""
	global worker
	if worker is None:
	logger.info("Loading LocateAnything-3B model...")
	worker = LocateAnythingWorker()
	worker.load()
	logger.info("Model loaded successfully.")
	return worker


	def run_inference(
	image: Image.Image \| None,
	prompt: str,
	) -> tuple[Image.Image \| None, str, str, str, str]:
	"""Main inference function for Gradio interface.

	Returns:
	(annotated_image, metadata, raw_output, json_output, status_message)
	"""
	is_valid, error_msg = validate_image(image)
	if not is_valid:
	return None, "", "", "", f"Error: {error_msg}"

	if not prompt or not prompt.strip():
	return None, "", "", "", "Error: Please enter a detection prompt."

	try:
	image_rgb = ensure_rgb(image)
	w = get_worker()
	annotated, raw_output, parsed = run_localization(image_rgb, prompt.strip(), worker=w)

	metadata = format_metadata(parsed)
	json_out = format_json_output(parsed)
	json_str = json.dumps(json_out, indent=2, ensure_ascii=False)

	status = f"Done. Found {parsed.num_detections} object(s)."
	if parsed.parse_errors:
	status += f" ({len(parsed.parse_errors)} warning(s))"

	return annotated, metadata, raw_output, json_str, status

	except Exception as exc:
	logger.exception("Inference failed")
	return None, "", "", "", f"Inference error: {exc}"


	def build_app() -> gr.Blocks:
	"""Build the Gradio Blocks interface."""
	with gr.Blocks(
	title=APP_TITLE,
	theme=gr.themes.Soft(),
	css="""
	.main-title { text-align: center; margin-bottom: 0; }
	.subtitle { text-align: center; color: #666; margin-top: 0; }
	.footer { text-align: center; color: #999; font-size: 0.85em; margin-top: 20px; }
	""",
	) as app:
	gr.HTML(f"""
	<h1 class="main-title">{APP_TITLE}</h1>
	<p class="subtitle">{APP_SUBTITLE}</p>
	""")

	gr.Markdown("""
	> How it works: Upload a space or satellite image and enter a natural-language
	> prompt describing what to locate. The model grounds your query in the image and
	> returns bounding box coordinates. Detection quality depends on image resolution,
	> object visibility, and model grounding capability.
	""")

	with gr.Row():
	with gr.Column(scale=1):
	input_image = gr.Image(type="pil", label="Upload Image")
	prompt_input = gr.Textbox(
	label="Detection Prompt",
	placeholder="e.g. Locate all the instances that match the following description: space debris.",
	lines=2,
	)
	run_btn = gr.Button("Run Localization", variant="primary", size="lg")
	status_text = gr.Textbox(label="Status", interactive=False, lines=1)

	with gr.Column(scale=1):
	output_image = gr.Image(type="pil", label="Annotated Image")
	with gr.Tabs():
	with gr.TabItem("Metadata"):
	metadata_output = gr.Textbox(label="Detection Metadata", lines=6, interactive=False)
	with gr.TabItem("Raw Output"):
	raw_output = gr.Textbox(label="Raw Model Output", lines=8, interactive=False, show_copy_button=True)
	with gr.TabItem("JSON Output"):
	json_output = gr.Code(label="Parsed JSON", language="json", lines=8)

	gr.Markdown("### Example Prompts")
	gr.Markdown("Click an example to load it into the prompt field.")
	examples_list = get_example_prompts()
	gr.Examples(
	examples=examples_list,
	inputs=[prompt_input],
	label="Space Debris Prompts",
	)

	with gr.Accordion("About This Project", open=False):
	gr.Markdown("""
	SpaceDebris Localizer is a hackathon prototype demonstrating how NVIDIA's
	LocateAnything-3B vision-language model can be applied to orbital debris
	localization and satellite component identification.

	### Capabilities
	- Open-set object detection from natural-language prompts
	- Bounding-box grounding for arbitrary visual concepts
	- Structured output with pixel-coordinate parsing

	### Limitations
	- The model was trained on general grounding data, not specifically orbital imagery
	- Detection quality depends heavily on image resolution and object clarity
	- Small debris fragments may not be reliably detected
	- This is a proof-of-concept, not a production debris tracking system

	### Model
	- [nvidia/LocateAnything-3B](https://huggingface.co/nvidia/LocateAnything-3B) on Hugging Face
	- 3B parameter vision-language model with Parallel Box Decoding
	- Coordinates are normalized to [0, 1000] and converted to pixel space
	""")

	gr.HTML('<p class="footer">Powered by nvidia/LocateAnything-3B \| SpaceDebris Localizer</p>')

	run_btn.click(
	fn=run_inference,
	inputs=[input_image, prompt_input],
	outputs=[output_image, metadata_output, raw_output, json_output, status_text],
	)
	prompt_input.submit(
	fn=run_inference,
	inputs=[input_image, prompt_input],
	outputs=[output_image, metadata_output, raw_output, json_output, status_text],
	)

	return app


	if __name__ == "__main__":
	app = build_app()
	app.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))