File size: 1,235 Bytes
032e687 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
from projects.llava_sam2.models.sam2 import SAM2
import numpy as np
from PIL import Image
import torch
IMG_PATH = 'assets/view.jpg'
IMG_SIZE = 1024
img_mean=(0.485, 0.456, 0.406)
img_std=(0.229, 0.224, 0.225)
def prepare():
torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
if __name__ == '__main__':
prepare()
model = SAM2()
model.eval()
model.to(torch.device('cuda'))
img_pil = Image.open(IMG_PATH)
img_np = np.array(img_pil.convert("RGB").resize((IMG_SIZE, IMG_SIZE)))
if img_np.dtype == np.uint8: # np.uint8 is expected for JPEG images
img_np = img_np / 255.0
else:
raise NotImplementedError
img = torch.from_numpy(img_np).permute(2, 0, 1)
img_mean = torch.tensor(img_mean, dtype=torch.float32)[:, None, None]
img_std = torch.tensor(img_std, dtype=torch.float32)[:, None, None]
img -= img_mean
img /= img_std
images = img.unsqueeze(0).repeat(5, 1, 1, 1)
# Start
language_embd = torch.ones((1, 1, 256), dtype=torch.float32, device=torch.device('cuda'))
a = model.inject_language_embd(images, language_embd)
print(1)
|