ํคํฌ์ธํธ ํ์ง [[keypoint-detection]]
[[open-in-colab]]
ํคํฌ์ธํธ ๊ฐ์ง(Keypoint detection)์ ์ด๋ฏธ์ง ๋ด์ ํน์ ํฌ์ธํธ๋ฅผ ์๋ณํ๊ณ ์์น๋ฅผ ํ์งํฉ๋๋ค. ์ด๋ฌํ ํคํฌ์ธํธ๋ ๋๋๋งํฌ๋ผ๊ณ ๋ ๋ถ๋ฆฌ๋ฉฐ ์ผ๊ตด ํน์ง์ด๋ ๋ฌผ์ฒด์ ์ผ๋ถ์ ๊ฐ์ ์๋ฏธ ์๋ ํน์ง์ ๋ํ๋ ๋๋ค. ํคํฌ์ธํธ ๊ฐ์ง ๋ชจ๋ธ๋ค์ ์ด๋ฏธ์ง๋ฅผ ์ ๋ ฅ์ผ๋ก ๋ฐ์ ์๋์ ๊ฐ์ ์ถ๋ ฅ์ ๋ฐํํฉ๋๋ค.
- ํคํฌ์ธํธ๋ค๊ณผ ์ ์: ๊ด์ฌ ํฌ์ธํธ๋ค๊ณผ ํด๋น ํฌ์ธํธ์ ๋ํ ์ ๋ขฐ๋ ์ ์
- ๋์คํฌ๋ฆฝํฐ(Descriptors): ๊ฐ ํคํฌ์ธํธ๋ฅผ ๋๋ฌ์ผ ์ด๋ฏธ์ง ์์ญ์ ํํ์ผ๋ก ํ ์ค์ฒ, ๊ทธ๋ผ๋ฐ์ด์ , ๋ฐฉํฅ ๋ฐ ๊ธฐํ ์์ฑ์ ์บก์ฒํฉ๋๋ค.
์ด๋ฒ ๊ฐ์ด๋์์๋ ์ด๋ฏธ์ง์์ ํคํฌ์ธํธ๋ฅผ ์ถ์ถํ๋ ๋ฐฉ๋ฒ์ ๋ค๋ฃจ์ด ๋ณด๊ฒ ์ต๋๋ค.
์ด๋ฒ ํํ ๋ฆฌ์ผ์์๋ ํคํฌ์ธํธ ๊ฐ์ง์ ๊ธฐ๋ณธ์ด ๋๋ ๋ชจ๋ธ์ธ SuperPoint๋ฅผ ์ฌ์ฉํด๋ณด๊ฒ ์ต๋๋ค.
from transformers import AutoImageProcessor, SuperPointForKeypointDetection
processor = AutoImageProcessor.from_pretrained("magic-leap-community/superpoint")
model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint")
์๋์ ์ด๋ฏธ์ง๋ก ๋ชจ๋ธ์ ํ ์คํธ ํด๋ณด๊ฒ ์ต๋๋ค.
import torch
from PIL import Image
import requests
import cv2
url_image_1 = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"
image_1 = Image.open(requests.get(url_image_1, stream=True).raw)
url_image_2 = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.png"
image_2 = Image.open(requests.get(url_image_2, stream=True).raw)
images = [image_1, image_2]
์ด์ ์ ๋ ฅ์ ์ฒ๋ฆฌํ๊ณ ์ถ๋ก ์ ํ ์ ์์ต๋๋ค.
inputs = processor(images,return_tensors="pt").to(model.device, model.dtype)
outputs = model(**inputs)
๋ชจ๋ธ ์ถ๋ ฅ์๋ ๋ฐฐ์น ๋ด์ ๊ฐ ํญ๋ชฉ์ ๋ํ ์๋์ ์ธ ํคํฌ์ธํธ, ๋์คํฌ๋ฆฝํฐ, ๋ง์คํฌ์ ์ ์๊ฐ ์์ต๋๋ค. ๋ง์คํฌ๋ ์ด๋ฏธ์ง์์ ํคํฌ์ธํธ๊ฐ ์๋ ์์ญ์ ๊ฐ์กฐํ๋ ์ญํ ์ ํฉ๋๋ค.
SuperPointKeypointDescriptionOutput(loss=None, keypoints=tensor([[[0.0437, 0.0167],
[0.0688, 0.0167],
[0.0172, 0.0188],
...,
[0.5984, 0.9812],
[0.6953, 0.9812]]]),
scores=tensor([[0.0056, 0.0053, 0.0079, ..., 0.0125, 0.0539, 0.0377],
[0.0206, 0.0058, 0.0065, ..., 0.0000, 0.0000, 0.0000]],
grad_fn=<CopySlices>), descriptors=tensor([[[-0.0807, 0.0114, -0.1210, ..., -0.1122, 0.0899, 0.0357],
[-0.0807, 0.0114, -0.1210, ..., -0.1122, 0.0899, 0.0357],
[-0.0807, 0.0114, -0.1210, ..., -0.1122, 0.0899, 0.0357],
...],
grad_fn=<CopySlices>), mask=tensor([[1, 1, 1, ..., 1, 1, 1],
[1, 1, 1, ..., 0, 0, 0]], dtype=torch.int32), hidden_states=None)
์ด๋ฏธ์ง์ ์ค์ ํคํฌ์ธํธ๋ฅผ ํ์ํ๊ธฐ ์ํด์ ๊ฒฐ๊ณผ๊ฐ์ ํ์ฒ๋ฆฌ ํด์ผํฉ๋๋ค. ์ด๋ฅผ ์ํด ์ค์ ์ด๋ฏธ์ง ํฌ๊ธฐ๋ฅผ ๊ฒฐ๊ณผ๊ฐ๊ณผ ํจ๊ป post_process_keypoint_detection์ ์ ๋ฌํด์ผ ํฉ๋๋ค.
image_sizes = [(image.size[1], image.size[0]) for image in images]
outputs = processor.post_process_keypoint_detection(outputs, image_sizes)
์ ์ฝ๋๋ฅผ ํตํด ๊ฒฐ๊ณผ๊ฐ์ ๋์ ๋๋ฆฌ๋ฅผ ๊ฐ๋ ๋ฆฌ์คํธ๊ฐ ๋๊ณ , ๊ฐ ๋์ ๋๋ฆฌ๋ค์ ํ์ฒ๋ฆฌ๋ ํคํฌ์ธํธ, ์ ์ ๋ฐ ๋์คํฌ๋ฆฝํฐ๋ก ์ด๋ฃจ์ด์ ธ์์ต๋๋ค.
[{'keypoints': tensor([[ 226, 57],
[ 356, 57],
[ 89, 64],
...,
[3604, 3391]], dtype=torch.int32),
'scores': tensor([0.0056, 0.0053, ...], grad_fn=<IndexBackward0>),
'descriptors': tensor([[-0.0807, 0.0114, -0.1210, ..., -0.1122, 0.0899, 0.0357],
[-0.0807, 0.0114, -0.1210, ..., -0.1122, 0.0899, 0.0357]],
grad_fn=<IndexBackward0>)},
{'keypoints': tensor([[ 46, 6],
[ 78, 6],
[422, 6],
[206, 404]], dtype=torch.int32),
'scores': tensor([0.0206, 0.0058, 0.0065, 0.0053, 0.0070, ...,grad_fn=<IndexBackward0>),
'descriptors': tensor([[-0.0525, 0.0726, 0.0270, ..., 0.0389, -0.0189, -0.0211],
[-0.0525, 0.0726, 0.0270, ..., 0.0389, -0.0189, -0.0211]}]
์ด์ ์ ๋์ ๋๋ฆฌ๋ฅผ ์ฌ์ฉํ์ฌ ํคํฌ์ธํธ๋ฅผ ํ์ํ ์ ์์ต๋๋ค.
import matplotlib.pyplot as plt
import torch
for i in range(len(images)):
keypoints = outputs[i]["keypoints"]
scores = outputs[i]["scores"]
descriptors = outputs[i]["descriptors"]
keypoints = outputs[i]["keypoints"].detach().numpy()
scores = outputs[i]["scores"].detach().numpy()
image = images[i]
image_width, image_height = image.size
plt.axis('off')
plt.imshow(image)
plt.scatter(
keypoints[:, 0],
keypoints[:, 1],
s=scores * 100,
c='cyan',
alpha=0.4
)
plt.show()
์๋์์ ๊ฒฐ๊ณผ๋ฅผ ํ์ธํ ์ ์์ต๋๋ค.