Spaces:

Sathvik0101
/

obj_localizer

Running

App Files Files Community

obj_localizer / src /parsing.py

3v324v23

feat: initialize SpaceDebris Localizer project

23db765 13 days ago

Raw

History Blame Contribute Delete

5.52 kB

	"""Output parsing for LocateAnything-3B bounding box responses."""

	from __future__ import annotations

	import re
	from dataclasses import dataclass, field
	from typing import Any

	from src.config import COORD_MAX, DEFAULT_CONFIDENCE


	@dataclass
	class BBox:
	"""A parsed bounding box in pixel coordinates."""

	x1: float
	y1: float
	x2: float
	y2: float
	confidence: float = DEFAULT_CONFIDENCE
	label: str = ""

	@property
	def width(self) -> float:
	return max(0.0, self.x2 - self.x1)

	@property
	def height(self) -> float:
	return max(0.0, self.y2 - self.y1)

	@property
	def area(self) -> float:
	return self.width * self.height

	@property
	def center(self) -> tuple[float, float]:
	return ((self.x1 + self.x2) / 2, (self.y1 + self.y2) / 2)

	def is_valid(self, img_w: int, img_h: int) -> bool:
	"""Check if box is within image bounds and has positive area."""
	return (
	self.x1 >= 0
	and self.y1 >= 0
	and self.x2 <= img_w + 1
	and self.y2 <= img_h + 1
	and self.width > 1
	and self.height > 1
	)

	def clamp(self, img_w: int, img_h: int) -> BBox:
	"""Return a clamped copy within image bounds."""
	return BBox(
	x1=max(0, min(self.x1, img_w)),
	y1=max(0, min(self.y1, img_h)),
	x2=max(0, min(self.x2, img_w)),
	y2=max(0, min(self.y2, img_h)),
	confidence=self.confidence,
	label=self.label,
	)

	def to_dict(self) -> dict[str, Any]:
	return {
	"x1": round(self.x1, 2),
	"y1": round(self.y1, 2),
	"x2": round(self.x2, 2),
	"y2": round(self.y2, 2),
	"width": round(self.width, 2),
	"height": round(self.height, 2),
	"confidence": self.confidence,
	"label": self.label,
	}


	@dataclass
	class ParseResult:
	"""Structured result from parsing model output."""

	boxes: list[BBox] = field(default_factory=list)
	raw_output: str = ""
	parse_errors: list[str] = field(default_factory=list)

	@property
	def num_detections(self) -> int:
	return len(self.boxes)

	def to_dict(self) -> dict[str, Any]:
	return {
	"num_detections": self.num_detections,
	"boxes": [b.to_dict() for b in self.boxes],
	"raw_output": self.raw_output,
	"parse_errors": self.parse_errors,
	}


	BOX_PATTERN_4 = re.compile(r"<box><(\d+)><(\d+)><(\d+)><(\d+)></box>")
	BOX_PATTERN_4_ALT = re.compile(r"<box>\s(\d+)\s,\s(\d+)\s,\s(\d+)\s,\s(\d+)\s</box>")
	BOX_PATTERN_2 = re.compile(r"<box><(\d+)><(\d+)></box>")


	def _norm_to_pixel(val: int, scale: int) -> float:
	"""Convert normalized [0, 1000] coordinate to pixel coordinate."""
	return val / COORD_MAX * scale


	def parse_boxes(
	raw_output: str,
	image_width: int,
	image_height: int,
	) -> ParseResult:
	"""Parse model output into structured bounding boxes.

	The model outputs coordinates normalized to [0, 1000].
	This function converts them to pixel coordinates.
	"""
	result = ParseResult(raw_output=raw_output)
	seen: set[tuple[float, float, float, float]] = set()

	for match in BOX_PATTERN_4.finditer(raw_output):
	try:
	x1 = _norm_to_pixel(int(match.group(1)), image_width)
	y1 = _norm_to_pixel(int(match.group(2)), image_height)
	x2 = _norm_to_pixel(int(match.group(3)), image_width)
	y2 = _norm_to_pixel(int(match.group(4)), image_height)
	key = (round(x1, 1), round(y1, 1), round(x2, 1), round(y2, 1))
	if key not in seen:
	seen.add(key)
	box = BBox(x1=x1, y1=y1, x2=x2, y2=y2)
	if box.is_valid(image_width, image_height):
	result.boxes.append(box)
	else:
	result.parse_errors.append(f"Out-of-bounds box discarded: {key}")
	except (ValueError, IndexError) as exc:
	result.parse_errors.append(f"Failed to parse box: {exc}")

	if not result.boxes:
	for match in BOX_PATTERN_4_ALT.finditer(raw_output):
	try:
	x1 = _norm_to_pixel(int(match.group(1)), image_width)
	y1 = _norm_to_pixel(int(match.group(2)), image_height)
	x2 = _norm_to_pixel(int(match.group(3)), image_width)
	y2 = _norm_to_pixel(int(match.group(4)), image_height)
	key = (round(x1, 1), round(y1, 1), round(x2, 1), round(y2, 1))
	if key not in seen:
	seen.add(key)
	box = BBox(x1=x1, y1=y1, x2=x2, y2=y2)
	if box.is_valid(image_width, image_height):
	result.boxes.append(box)
	except (ValueError, IndexError) as exc:
	result.parse_errors.append(f"Failed to parse alt box: {exc}")

	return result


	def parse_points(
	raw_output: str,
	image_width: int,
	image_height: int,
	) -> list[dict[str, float]]:
	"""Parse model output into pixel-coordinate points."""
	points = []
	for match in BOX_PATTERN_2.finditer(raw_output):
	try:
	x = _norm_to_pixel(int(match.group(1)), image_width)
	y = _norm_to_pixel(int(match.group(2)), image_height)
	points.append({"x": round(x, 2), "y": round(y, 2)})
	except (ValueError, IndexError):
	pass
	return points