Spaces:

ByteDance
/

XVerse

Running on Zero

App Files Files Community

XVerse / eval /tools /florence_sam.py

helloworld-S

Init

08f2d0e verified 5 months ago

raw

history blame

2.9 kB

	# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import os
	import sys
	import torch
	import cv2
	from PIL import Image
	from eval.grounded_sam.grounded_sam2_florence2_autolabel_pipeline import FlorenceSAM

	class ObjectDetector:
	def __init__(self, device):
	self.device = torch.device(device)
	self.detector = FlorenceSAM(device)

	def get_instances(self, gen_image, label, min_size=64):
	_, instance_result_dict = \
	self.detector.od_grounding_and_segmentation(
	image=gen_image, text_input=label,
	)
	instances = instance_result_dict["instance_images"]

	filtered_instances = []
	for img in instances:
	width, height = img.shape[:2]
	if width * height < min_size * min_size or min(width, height) < min_size // 4:
	continue

	img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	img = Image.fromarray(img)
	filtered_instances.append(img)

	return filtered_instances

	def get_multiple_instances(self, gen_image, label, min_size=64):
	# self.detector.phrase_grounding_and_segmentation(
	_, instance_result_dict = \
	self.detector.od_grounding_and_segmentation(
	image=gen_image, text_input=label,
	)

	return instance_result_dict


	if __name__ == "__main__":
	# online demo: https://dun.163.com/trial/face/compare
	from glob import glob
	from tqdm import tqdm
	from src.train.data.data_utils import split_grid, pad_to_square
	from eval.idip.dino import DINOScore

	detector = ObjectDetector("cuda")
	dino_model = DINOScore("cuda")

	gen_image = Image.open("assets/tests/20250320-151038.jpeg").convert("RGB")
	label = "two people"

	save_dir = f"tmp"
	os.makedirs(save_dir, exist_ok=True)

	# for i, img in enumerate(split_grid(gen_image)):
	for i, img in enumerate([gen_image]):
	found_ips = detector.get_instances(img, label, min_size=img.size[0]//20)[:3]
	found_ips = [pad_to_square(x) for x in found_ips]
	for j, ip in enumerate(found_ips):
	# score = dino_model(real_image, ip)
	score = 1
	pad_to_square(ip).save(f"{save_dir}/{label}_{i}_{j}_{score}.png")