init project
Browse files- app.py +12 -5
- modules/pe3r/models.py +3 -3
app.py
CHANGED
|
@@ -37,6 +37,8 @@ from modules.mobilesamv2.utils.transforms import ResizeLongestSide
|
|
| 37 |
from modules.pe3r.models import Models
|
| 38 |
import torchvision.transforms as tvf
|
| 39 |
|
|
|
|
|
|
|
| 40 |
silent = False
|
| 41 |
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 42 |
pe3r = Models('cpu') #
|
|
@@ -304,6 +306,10 @@ def get_cog_feats(images):
|
|
| 304 |
|
| 305 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 306 |
pe3r.sam2.to(device)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
# pe3r.siglip_processor.to(device)
|
| 308 |
# pe3r.siglip.to(device)
|
| 309 |
|
|
@@ -399,10 +405,10 @@ def get_cog_feats(images):
|
|
| 399 |
seg_imgs = np.stack(seg_img_list, axis=0) # b,H,W,3
|
| 400 |
seg_imgs = torch.from_numpy(seg_imgs).permute(0,3,1,2) # / 255.0
|
| 401 |
|
| 402 |
-
inputs =
|
| 403 |
inputs = {key: value.to(device) for key, value in inputs.items()}
|
| 404 |
|
| 405 |
-
image_features =
|
| 406 |
image_features = image_features / image_features.norm(dim=-1, keepdim=True)
|
| 407 |
image_features = image_features.detach().cpu()
|
| 408 |
|
|
@@ -519,13 +525,14 @@ def get_3D_object_from_scene(outdir, text, threshold, scene, min_conf_thr, as_po
|
|
| 519 |
|
| 520 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 521 |
# pe3r.siglip_tokenizer.to(device)
|
| 522 |
-
|
|
|
|
| 523 |
|
| 524 |
texts = [text]
|
| 525 |
-
inputs =
|
| 526 |
inputs = {key: value.to(device) for key, value in inputs.items()}
|
| 527 |
with torch.no_grad():
|
| 528 |
-
text_feats =
|
| 529 |
text_feats = text_feats / text_feats.norm(dim=-1, keepdim=True)
|
| 530 |
scene.render_image(text_feats, threshold)
|
| 531 |
scene.ori_imgs = scene.rendered_imgs
|
|
|
|
| 37 |
from modules.pe3r.models import Models
|
| 38 |
import torchvision.transforms as tvf
|
| 39 |
|
| 40 |
+
from transformers import AutoTokenizer, AutoModel, AutoProcessor, SamModel
|
| 41 |
+
|
| 42 |
silent = False
|
| 43 |
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 44 |
pe3r = Models('cpu') #
|
|
|
|
| 306 |
|
| 307 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 308 |
pe3r.sam2.to(device)
|
| 309 |
+
|
| 310 |
+
siglip = AutoModel.from_pretrained("google/siglip-large-patch16-256", device_map=device)
|
| 311 |
+
siglip_processor = AutoProcessor.from_pretrained("google/siglip-large-patch16-256", device_map=device)
|
| 312 |
+
|
| 313 |
# pe3r.siglip_processor.to(device)
|
| 314 |
# pe3r.siglip.to(device)
|
| 315 |
|
|
|
|
| 405 |
seg_imgs = np.stack(seg_img_list, axis=0) # b,H,W,3
|
| 406 |
seg_imgs = torch.from_numpy(seg_imgs).permute(0,3,1,2) # / 255.0
|
| 407 |
|
| 408 |
+
inputs = siglip_processor(images=seg_imgs, return_tensors="pt")
|
| 409 |
inputs = {key: value.to(device) for key, value in inputs.items()}
|
| 410 |
|
| 411 |
+
image_features = siglip.get_image_features(**inputs)
|
| 412 |
image_features = image_features / image_features.norm(dim=-1, keepdim=True)
|
| 413 |
image_features = image_features.detach().cpu()
|
| 414 |
|
|
|
|
| 525 |
|
| 526 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 527 |
# pe3r.siglip_tokenizer.to(device)
|
| 528 |
+
siglip = AutoModel.from_pretrained("google/siglip-large-patch16-256", device_map=device)
|
| 529 |
+
siglip_tokenizer = AutoTokenizer.from_pretrained("google/siglip-large-patch16-256", device_map=device)
|
| 530 |
|
| 531 |
texts = [text]
|
| 532 |
+
inputs = siglip_tokenizer(text=texts, padding="max_length", return_tensors="pt")
|
| 533 |
inputs = {key: value.to(device) for key, value in inputs.items()}
|
| 534 |
with torch.no_grad():
|
| 535 |
+
text_feats =siglip.get_text_features(**inputs)
|
| 536 |
text_feats = text_feats / text_feats.norm(dim=-1, keepdim=True)
|
| 537 |
scene.render_image(text_feats, threshold)
|
| 538 |
scene.ori_imgs = scene.rendered_imgs
|
modules/pe3r/models.py
CHANGED
|
@@ -47,6 +47,6 @@ class Models:
|
|
| 47 |
self.yolov8 = ObjectAwareModel(YOLO8_CKP)
|
| 48 |
|
| 49 |
# -- siglip --
|
| 50 |
-
self.siglip = AutoModel.from_pretrained("google/siglip-large-patch16-256", device_map=device)
|
| 51 |
-
self.siglip_tokenizer = AutoTokenizer.from_pretrained("google/siglip-large-patch16-256", device_map=device)
|
| 52 |
-
self.siglip_processor = AutoProcessor.from_pretrained("google/siglip-large-patch16-256", device_map=device)
|
|
|
|
| 47 |
self.yolov8 = ObjectAwareModel(YOLO8_CKP)
|
| 48 |
|
| 49 |
# -- siglip --
|
| 50 |
+
# self.siglip = AutoModel.from_pretrained("google/siglip-large-patch16-256", device_map=device)
|
| 51 |
+
# self.siglip_tokenizer = AutoTokenizer.from_pretrained("google/siglip-large-patch16-256", device_map=device)
|
| 52 |
+
# self.siglip_processor = AutoProcessor.from_pretrained("google/siglip-large-patch16-256", device_map=device)
|