import argparse import json import sys from io import BytesIO from pathlib import Path from typing import Any, Dict, List from PIL import Image from ultralytics import YOLO import numpy as np def load_image(frame: Any, base_dir: Path) -> Image.Image: if isinstance(frame, (bytes, bytearray, memoryview)): return Image.open(BytesIO(frame)).convert("RGB") path = Path(str(frame)) if not path.is_absolute(): path = (Path.cwd() / path).resolve() if not path.exists(): candidate = (base_dir / str(frame)).resolve() if candidate.exists(): path = candidate return Image.open(path).convert("RGB") def load_model(*_args: Any, **_kwargs: Any): base_dir = Path(__file__).resolve().parent model_path = base_dir / "yolo12l-person-seg.pt" if not model_path.exists(): return None return YOLO(str(model_path)) def run_model(model, frame: "np.ndarray") -> List[Dict[str, Any]]: image = Image.fromarray(frame) results = model(image) detections: List[Dict[str, Any]] = [] result = results[0] names = result.names or model.names name_overrides = {"item": "person"} for det_idx, box in enumerate(result.boxes): xyxy = box.xyxy[0].tolist() class_id = int(box.cls[0].item()) class_name = names.get(class_id, str(class_id)) class_name = name_overrides.get(class_name, class_name) detections.append( { "frame_idx": 0, "class": class_name, "bbox": [float(x) for x in xyxy], "score": float(box.conf[0].item()), "track_id": f"f0-d{det_idx}", } ) return detections def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser(description="Run YOLO12l person segmentation.") parser.add_argument( "--stdin-raw", action="store_true", default=True, help="Read raw image bytes from stdin.", ) return parser if __name__ == "__main__": args = build_parser().parse_args() base_dir = Path(__file__).resolve().parent model = load_model() if model is None: print("[]") sys.exit(0) try: image = load_image(sys.stdin.buffer.read(), base_dir) except Exception: print("[]") sys.exit(0) frame = np.array(image) output = run_model(model, frame) print(json.dumps(output))