HaMeR

Build error

App Files Files Community

geopavlakos commited on May 30, 2024

Commit

c6cc682

verified ·

1 Parent(s): b207947

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -24

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import argparse
 import os
 from pathlib import Path
 import tempfile
 import sys
 import cv2
 import gradio as gr
@@ -23,12 +24,22 @@ from hamer.models import HAMER
 from hamer.utils import recursive_to
 from hamer.utils.renderer import Renderer, cam_crop_to_full
-try:
-    import detectron2
-except:
-    import os
-    os.system('pip install --upgrade pip')
-    os.system('pip install git+https://github.com/facebookresearch/detectron2.git@b7ff9466d174fbb7061ff6d3773cd9c372a8e56f')
 #try:
 #    from vitpose_model import ViTPoseModel
@@ -57,35 +68,48 @@ model.eval()
 # Load detector
-from detectron2.config import LazyConfig
-from hamer.utils.utils_detectron2 import DefaultPredictor_Lazy
-detectron2_cfg = LazyConfig.load(f"vendor/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_vitdet_h_75ep.py")
-detectron2_cfg.train.init_checkpoint = "https://dl.fbaipublicfiles.com/detectron2/ViTDet/COCO/cascade_mask_rcnn_vitdet_h/f328730692/model_final_f05665.pkl"
-for i in range(3):
-    detectron2_cfg.model.roi_heads.box_predictors[i].test_score_thresh = 0.25
-detector = DefaultPredictor_Lazy(detectron2_cfg)
 # Setup the renderer
 renderer = Renderer(model_cfg, faces=model.mano.faces)
 # keypoint detector
 cpm = ViTPoseModel(device)
 import numpy as np
-def infer(in_pil_img, in_threshold=0.8, out_pil_img=None):
     open_cv_image = np.array(in_pil_img)
     # Convert RGB to BGR
     open_cv_image = open_cv_image[:, :, ::-1].copy()
     print("EEEEE", open_cv_image.shape)
-    det_out = detector(open_cv_image)
-    det_instances = det_out['instances']
-    valid_idx = (det_instances.pred_classes==0) & (det_instances.scores > in_threshold)
-    pred_bboxes=det_instances.pred_boxes.tensor[valid_idx].cpu().numpy()
-    pred_scores=det_instances.scores[valid_idx].cpu().numpy()
     # Detect human keypoints for each person
@@ -121,6 +145,9 @@ def infer(in_pil_img, in_threshold=0.8, out_pil_img=None):
     boxes = np.stack(bboxes)
     right = np.stack(is_right)
     # Run HaMeR on all detected humans
@@ -136,11 +163,14 @@ def infer(in_pil_img, in_threshold=0.8, out_pil_img=None):
     for batch in dataloader:
         batch = recursive_to(batch, device)
         with torch.no_grad():
             out = model(batch)
         multiplier = (2*batch['right']-1)
         pred_cam = out['pred_cam']
         pred_cam[:,1] = multiplier*pred_cam[:,1]
         box_center = batch["box_center"].float()
         box_size = batch["box_size"].float()
@@ -204,9 +234,15 @@ def infer(in_pil_img, in_threshold=0.8, out_pil_img=None):
 with gr.Blocks(title="HaMeR", css=".gradio-container") as demo:
-    gr.HTML("""<div style="font-weight:bold; text-align:center; font-size: 30px;">HaMeR Demo</div>""")
-    gr.HTML("""<div style="text-align:left; font-size: 20px;">You can drop an image at the top-left panel (or select one of the examples)
-    and you will get the 3D reconstructions of the detected hands on the right.
     You can also download the .obj files for each hand reconstruction.</div>""")
     with gr.Row():
@@ -228,10 +264,9 @@ with gr.Blocks(title="HaMeR", css=".gradio-container") as demo:
         ['/home/user/app/assets/test1.jpg'],
         ['/home/user/app/assets/test2.jpg'],
         ['/home/user/app/assets/test3.jpg'],
-        ['/home/user/app/assets/test4.jpg'],
         ['/home/user/app/assets/test5.jpg'],
         ],
-        inputs=[input_image, 0.6])
 #demo.queue()

 import os
 from pathlib import Path
 import tempfile
+import tarfile
 import sys
 import cv2
 import gradio as gr
 from hamer.utils import recursive_to
 from hamer.utils.renderer import Renderer, cam_crop_to_full
+def extract_tar() -> None:
+    if Path('mmdet_configs/configs').exists():
+        return
+    with tarfile.open('mmdet_configs/configs.tar') as f:
+        f.extractall('mmdet_configs')
+extract_tar()
+#from vitpose_model import DetModel
+#try:
+#    import detectron2
+#except:
+#    import os
+#    os.system('pip install --upgrade pip')
+#    os.system('pip install git+https://github.com/facebookresearch/detectron2.git')
 #try:
 #    from vitpose_model import ViTPoseModel
 # Load detector
+#from detectron2.config import LazyConfig
+#from hamer.utils.utils_detectron2 import DefaultPredictor_Lazy
+#detectron2_cfg = LazyConfig.load(f"vendor/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_vitdet_h_75ep.py")
+#detectron2_cfg.train.init_checkpoint = "https://dl.fbaipublicfiles.com/detectron2/ViTDet/COCO/cascade_mask_rcnn_vitdet_h/f328730692/model_final_f05665.pkl"
+#for i in range(3):
+#    detectron2_cfg.model.roi_heads.box_predictors[i].test_score_thresh = 0.25
+#detector = DefaultPredictor_Lazy(detectron2_cfg)
 # Setup the renderer
 renderer = Renderer(model_cfg, faces=model.mano.faces)
+# mmdet detector
+#det_model = DetModel()
+det_model = torch.hub.load('ultralytics/yolov5', 'yolov5x6')
 # keypoint detector
 cpm = ViTPoseModel(device)
 import numpy as np
+def infer(in_pil_img, in_threshold=0.4, out_pil_img=None):
+    print(in_threshold)
     open_cv_image = np.array(in_pil_img)
+    det_out = det_model(open_cv_image)
+    det_out = det_out.xyxy[0]
     # Convert RGB to BGR
     open_cv_image = open_cv_image[:, :, ::-1].copy()
     print("EEEEE", open_cv_image.shape)
+    print(det_out)
+    #det_out = detector(open_cv_image)
+    scores = det_out[:,4]
+    det_instances = det_out[:,5]
+    print(scores)
+    print(det_instances)
+    valid_idx = (det_instances==0) & (scores > in_threshold)
+    print(valid_idx)
+    pred_bboxes=det_out[valid_idx,:4].cpu().numpy()
+    pred_scores=scores[valid_idx].cpu().numpy()
     # Detect human keypoints for each person
     boxes = np.stack(bboxes)
     right = np.stack(is_right)
+    print(boxes)
+    print(right)
+    print(open_cv_image)
     # Run HaMeR on all detected humans
     for batch in dataloader:
         batch = recursive_to(batch, device)
+        print(batch['img'])
         with torch.no_grad():
             out = model(batch)
         multiplier = (2*batch['right']-1)
         pred_cam = out['pred_cam']
+        print(out['pred_vertices'])
+        print(pred_cam)
         pred_cam[:,1] = multiplier*pred_cam[:,1]
         box_center = batch["box_center"].float()
         box_size = batch["box_size"].float()
 with gr.Blocks(title="HaMeR", css=".gradio-container") as demo:
+    #title="HaMeR"
+    #description="Gradio Demo for HaMeR."
+    #gr.HTML("""<h1>HaMeR</h1>""")
+    #gr.HTML("""<h3>Gradio Demo for HaMeR. You can select an </h3>""")
+    gr.HTML("""<div style="font-weight:bold; text-align:center; font-size: 30px;">HaMeR</div>""")
+    gr.HTML("""<div style="text-align:left; font-size: 20px;">Demo for HaMeR. You can drop an image at the top-left panel
+    (or select one of the examples) and you will get the 3D reconstructions of the detected hands on the right.
     You can also download the .obj files for each hand reconstruction.</div>""")
     with gr.Row():
         ['/home/user/app/assets/test1.jpg'],
         ['/home/user/app/assets/test2.jpg'],
         ['/home/user/app/assets/test3.jpg'],
         ['/home/user/app/assets/test5.jpg'],
         ],
+        inputs=input_image)
 #demo.queue()